dpdk1702-rc2 upstream files unchanged + mlx5 driver rc3
authorIdo Barnea <[email protected]>
Sun, 5 Feb 2017 13:21:19 +0000 (15:21 +0200)
committerIdo Barnea <[email protected]>
Mon, 13 Feb 2017 10:32:01 +0000 (12:32 +0200)
Signed-off-by: Ido Barnea <[email protected]>
323 files changed:
src/dpdk/drivers/net/af_packet/rte_eth_af_packet.c
src/dpdk/drivers/net/bnx2x/bnx2x.c
src/dpdk/drivers/net/bnx2x/bnx2x.h
src/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c
src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c
src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.h
src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c
src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h
src/dpdk/drivers/net/bnx2x/debug.c [deleted file]
src/dpdk/drivers/net/bnx2x/elink.c
src/dpdk/drivers/net/bnx2x/elink.h
src/dpdk/drivers/net/bonding/rte_eth_bond_api.c
src/dpdk/drivers/net/bonding/rte_eth_bond_args.c
src/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c
src/dpdk/drivers/net/bonding/rte_eth_bond_private.h
src/dpdk/drivers/net/cxgbe/base/adapter.h
src/dpdk/drivers/net/cxgbe/base/t4_hw.c
src/dpdk/drivers/net/cxgbe/cxgbe_compat.h
src/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c
src/dpdk/drivers/net/cxgbe/cxgbe_main.c
src/dpdk/drivers/net/cxgbe/sge.c
src/dpdk/drivers/net/e1000/base/e1000_82575.c
src/dpdk/drivers/net/e1000/base/e1000_82575.h
src/dpdk/drivers/net/e1000/base/e1000_api.c
src/dpdk/drivers/net/e1000/base/e1000_defines.h
src/dpdk/drivers/net/e1000/base/e1000_hw.h
src/dpdk/drivers/net/e1000/base/e1000_ich8lan.c
src/dpdk/drivers/net/e1000/base/e1000_ich8lan.h
src/dpdk/drivers/net/e1000/base/e1000_mbx.c
src/dpdk/drivers/net/e1000/base/e1000_nvm.c
src/dpdk/drivers/net/e1000/base/e1000_osdep.h
src/dpdk/drivers/net/e1000/base/e1000_regs.h
src/dpdk/drivers/net/e1000/e1000_ethdev.h
src/dpdk/drivers/net/e1000/em_ethdev.c
src/dpdk/drivers/net/e1000/em_rxtx.c
src/dpdk/drivers/net/e1000/igb_ethdev.c
src/dpdk/drivers/net/e1000/igb_pf.c
src/dpdk/drivers/net/e1000/igb_rxtx.c
src/dpdk/drivers/net/enic/base/vnic_dev.c
src/dpdk/drivers/net/enic/base/vnic_rq.c
src/dpdk/drivers/net/enic/base/vnic_rq.h
src/dpdk/drivers/net/enic/enic.h
src/dpdk/drivers/net/enic/enic_clsf.c
src/dpdk/drivers/net/enic/enic_compat.h
src/dpdk/drivers/net/enic/enic_ethdev.c
src/dpdk/drivers/net/enic/enic_main.c
src/dpdk/drivers/net/enic/enic_res.c
src/dpdk/drivers/net/enic/enic_res.h
src/dpdk/drivers/net/enic/enic_rxtx.c
src/dpdk/drivers/net/fm10k/base/fm10k_osdep.h
src/dpdk/drivers/net/fm10k/fm10k.h
src/dpdk/drivers/net/fm10k/fm10k_ethdev.c
src/dpdk/drivers/net/fm10k/fm10k_rxtx.c
src/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c
src/dpdk/drivers/net/i40e/base/i40e_adminq.c
src/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h
src/dpdk/drivers/net/i40e/base/i40e_common.c
src/dpdk/drivers/net/i40e/base/i40e_devids.h
src/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c
src/dpdk/drivers/net/i40e/base/i40e_nvm.c
src/dpdk/drivers/net/i40e/base/i40e_osdep.h
src/dpdk/drivers/net/i40e/base/i40e_prototype.h
src/dpdk/drivers/net/i40e/base/i40e_register.h
src/dpdk/drivers/net/i40e/base/i40e_type.h
src/dpdk/drivers/net/i40e/base/i40e_virtchnl.h
src/dpdk/drivers/net/i40e/i40e_ethdev.c
src/dpdk/drivers/net/i40e/i40e_ethdev.h
src/dpdk/drivers/net/i40e/i40e_ethdev_vf.c
src/dpdk/drivers/net/i40e/i40e_fdir.c
src/dpdk/drivers/net/i40e/i40e_flow.c [new file with mode: 0644]
src/dpdk/drivers/net/i40e/i40e_pf.c
src/dpdk/drivers/net/i40e/i40e_pf.h
src/dpdk/drivers/net/i40e/i40e_rxtx.c
src/dpdk/drivers/net/i40e/i40e_rxtx.h
src/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h [new file with mode: 0644]
src/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c [new file with mode: 0644]
src/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c [moved from src/dpdk/drivers/net/i40e/i40e_rxtx_vec.c with 75% similarity]
src/dpdk/drivers/net/i40e/rte_pmd_i40e.h [new file with mode: 0644]
src/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c
src/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c
src/dpdk/drivers/net/ixgbe/base/ixgbe_api.c
src/dpdk/drivers/net/ixgbe/base/ixgbe_api.h
src/dpdk/drivers/net/ixgbe/base/ixgbe_common.c
src/dpdk/drivers/net/ixgbe/base/ixgbe_common.h
src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c [new file with mode: 0644]
src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h [new file with mode: 0644]
src/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h
src/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h
src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c
src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h
src/dpdk/drivers/net/ixgbe/base/ixgbe_type.h
src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c
src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h
src/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c
src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c
src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h
src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c
src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h
src/dpdk/drivers/net/ixgbe/ixgbe_fdir.c
src/dpdk/drivers/net/ixgbe/ixgbe_flow.c [new file with mode: 0644]
src/dpdk/drivers/net/ixgbe/ixgbe_pf.c
src/dpdk/drivers/net/ixgbe/ixgbe_regs.h
src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c
src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h
src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
src/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.h [new file with mode: 0644]
src/dpdk/drivers/net/mlx4/mlx4.c
src/dpdk/drivers/net/mlx4/mlx4.h
src/dpdk/drivers/net/mlx5/mlx5.c
src/dpdk/drivers/net/mlx5/mlx5.h
src/dpdk/drivers/net/mlx5/mlx5_defs.h
src/dpdk/drivers/net/mlx5/mlx5_ethdev.c
src/dpdk/drivers/net/mlx5/mlx5_fdir.c
src/dpdk/drivers/net/mlx5/mlx5_flow.c [new file with mode: 0644]
src/dpdk/drivers/net/mlx5/mlx5_prm.h
src/dpdk/drivers/net/mlx5/mlx5_rxq.c
src/dpdk/drivers/net/mlx5/mlx5_rxtx.c
src/dpdk/drivers/net/mlx5/mlx5_rxtx.h
src/dpdk/drivers/net/mlx5/mlx5_stats.c
src/dpdk/drivers/net/mlx5/mlx5_trigger.c
src/dpdk/drivers/net/mlx5/mlx5_txq.c
src/dpdk/drivers/net/null/rte_eth_null.c
src/dpdk/drivers/net/ring/rte_eth_ring.c
src/dpdk/drivers/net/szedata2/rte_eth_szedata2.c
src/dpdk/drivers/net/szedata2/rte_eth_szedata2.h
src/dpdk/drivers/net/virtio/virtio_ethdev.c
src/dpdk/drivers/net/virtio/virtio_ethdev.h
src/dpdk/drivers/net/virtio/virtio_pci.c
src/dpdk/drivers/net/virtio/virtio_pci.h
src/dpdk/drivers/net/virtio/virtio_rxtx.c
src/dpdk/drivers/net/virtio/virtio_rxtx.h
src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c
src/dpdk/drivers/net/virtio/virtio_rxtx_simple.h [new file with mode: 0644]
src/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c [new file with mode: 0644]
src/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c [new file with mode: 0644]
src/dpdk/drivers/net/virtio/virtio_user/vhost.h [new file with mode: 0644]
src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c [new file with mode: 0644]
src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c [new file with mode: 0644]
src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h [new file with mode: 0644]
src/dpdk/drivers/net/virtio/virtio_user/vhost_user.c [new file with mode: 0644]
src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c [new file with mode: 0644]
src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h [new file with mode: 0644]
src/dpdk/drivers/net/virtio/virtio_user_ethdev.c
src/dpdk/drivers/net/virtio/virtqueue.c
src/dpdk/drivers/net/virtio/virtqueue.h
src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c
src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h
src/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h
src/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c
src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.c
src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.h
src/dpdk/lib/librte_acl/acl.h
src/dpdk/lib/librte_acl/acl_run.h
src/dpdk/lib/librte_acl/acl_run_altivec.c [moved from src/dpdk/lib/librte_eal/bsdapp/eal/eal_log.c with 76% similarity]
src/dpdk/lib/librte_acl/acl_run_altivec.h [new file with mode: 0644]
src/dpdk/lib/librte_acl/rte_acl.c
src/dpdk/lib/librte_acl/rte_acl.h
src/dpdk/lib/librte_cfgfile/rte_cfgfile.h
src/dpdk/lib/librte_eal/bsdapp/contigmem/contigmem.c
src/dpdk/lib/librte_eal/bsdapp/eal/eal.c
src/dpdk/lib/librte_eal/bsdapp/eal/eal_interrupts.c
src/dpdk/lib/librte_eal/bsdapp/eal/eal_pci.c
src/dpdk/lib/librte_eal/common/eal_common_bus.c [new file with mode: 0644]
src/dpdk/lib/librte_eal/common/eal_common_cpuflags.c
src/dpdk/lib/librte_eal/common/eal_common_dev.c
src/dpdk/lib/librte_eal/common/eal_common_devargs.c
src/dpdk/lib/librte_eal/common/eal_common_log.c
src/dpdk/lib/librte_eal/common/eal_common_memzone.c
src/dpdk/lib/librte_eal/common/eal_common_options.c
src/dpdk/lib/librte_eal/common/eal_common_pci.c
src/dpdk/lib/librte_eal/common/eal_common_timer.c
src/dpdk/lib/librte_eal/common/eal_common_vdev.c [new file with mode: 0644]
src/dpdk/lib/librte_eal/common/eal_filesystem.h
src/dpdk/lib/librte_eal/common/eal_hugepages.h
src/dpdk/lib/librte_eal/common/eal_private.h
src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h
src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
src/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder.h
src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h
src/dpdk/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h
src/dpdk/lib/librte_eal/common/include/arch/x86/rte_cycles.h
src/dpdk/lib/librte_eal/common/include/arch/x86/rte_io.h [new file with mode: 0644]
src/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
src/dpdk/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
src/dpdk/lib/librte_eal/common/include/arch/x86/rte_rtm.h
src/dpdk/lib/librte_eal/common/include/arch/x86/rte_vect.h
src/dpdk/lib/librte_eal/common/include/generic/rte_atomic.h
src/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h
src/dpdk/lib/librte_eal/common/include/generic/rte_cpuflags.h
src/dpdk/lib/librte_eal/common/include/generic/rte_cycles.h
src/dpdk/lib/librte_eal/common/include/generic/rte_io.h [new file with mode: 0644]
src/dpdk/lib/librte_eal/common/include/generic/rte_memcpy.h
src/dpdk/lib/librte_eal/common/include/generic/rte_vect.h [new file with mode: 0644]
src/dpdk/lib/librte_eal/common/include/rte_bus.h [new file with mode: 0644]
src/dpdk/lib/librte_eal/common/include/rte_common.h
src/dpdk/lib/librte_eal/common/include/rte_dev.h
src/dpdk/lib/librte_eal/common/include/rte_devargs.h
src/dpdk/lib/librte_eal/common/include/rte_eal.h
src/dpdk/lib/librte_eal/common/include/rte_interrupts.h
src/dpdk/lib/librte_eal/common/include/rte_log.h
src/dpdk/lib/librte_eal/common/include/rte_malloc.h
src/dpdk/lib/librte_eal/common/include/rte_memory.h
src/dpdk/lib/librte_eal/common/include/rte_memzone.h
src/dpdk/lib/librte_eal/common/include/rte_pci.h
src/dpdk/lib/librte_eal/common/include/rte_pci_dev_ids.h [deleted file]
src/dpdk/lib/librte_eal/common/include/rte_tailq.h
src/dpdk/lib/librte_eal/common/include/rte_time.h
src/dpdk/lib/librte_eal/common/include/rte_vdev.h [new file with mode: 0644]
src/dpdk/lib/librte_eal/common/include/rte_version.h
src/dpdk/lib/librte_eal/common/malloc_heap.c
src/dpdk/lib/librte_eal/linuxapp/eal/eal.c
src/dpdk/lib/librte_eal/linuxapp/eal/eal_interrupts.c
src/dpdk/lib/librte_eal/linuxapp/eal/eal_ivshmem.c [deleted file]
src/dpdk/lib/librte_eal/linuxapp/eal/eal_log.c
src/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c
src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci.c
src/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
src/dpdk/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
src/dpdk/lib/librte_eal/linuxapp/kni/compat.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c [deleted file]
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c [deleted file]
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c [deleted file]
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c [deleted file]
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c [deleted file]
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c [deleted file]
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h [deleted file]
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
src/dpdk/lib/librte_eal/linuxapp/kni/kni_dev.h
src/dpdk/lib/librte_eal/linuxapp/kni/kni_ethtool.c
src/dpdk/lib/librte_eal/linuxapp/kni/kni_fifo.h
src/dpdk/lib/librte_eal/linuxapp/kni/kni_misc.c
src/dpdk/lib/librte_eal/linuxapp/kni/kni_net.c
src/dpdk/lib/librte_eal/linuxapp/kni/kni_vhost.c
src/dpdk/lib/librte_ether/rte_dev_info.h
src/dpdk/lib/librte_ether/rte_eth_ctrl.h
src/dpdk/lib/librte_ether/rte_ethdev.c
src/dpdk/lib/librte_ether/rte_ethdev.h
src/dpdk/lib/librte_ether/rte_flow.c [new file with mode: 0644]
src/dpdk/lib/librte_ether/rte_flow.h [new file with mode: 0644]
src/dpdk/lib/librte_ether/rte_flow_driver.h [new file with mode: 0644]
src/dpdk/lib/librte_hash/rte_cuckoo_hash.c
src/dpdk/lib/librte_hash/rte_cuckoo_hash.h
src/dpdk/lib/librte_hash/rte_cuckoo_hash_x86.h
src/dpdk/lib/librte_hash/rte_fbk_hash.h
src/dpdk/lib/librte_hash/rte_thash.h
src/dpdk/lib/librte_kvargs/rte_kvargs.c
src/dpdk/lib/librte_kvargs/rte_kvargs.h
src/dpdk/lib/librte_mbuf/rte_mbuf.c
src/dpdk/lib/librte_mbuf/rte_mbuf.h
src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.c [new file with mode: 0644]
src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.h [new file with mode: 0644]
src/dpdk/lib/librte_mempool/rte_mempool.c
src/dpdk/lib/librte_mempool/rte_mempool.h
src/dpdk/lib/librte_mempool/rte_mempool_stack.c
src/dpdk/lib/librte_net/rte_ether.h [moved from src/dpdk/lib/librte_ether/rte_ether.h with 98% similarity]
src/dpdk/lib/librte_net/rte_gre.h [moved from src/dpdk/lib/librte_eal/common/include/rte_warnings.h with 60% similarity]
src/dpdk/lib/librte_net/rte_ip.h
src/dpdk/lib/librte_net/rte_net.c [new file with mode: 0644]
src/dpdk/lib/librte_net/rte_net.h [new file with mode: 0644]
src/dpdk/lib/librte_pipeline/rte_pipeline.h
src/dpdk/lib/librte_port/rte_port_fd.c [new file with mode: 0644]
src/dpdk/lib/librte_port/rte_port_fd.h [new file with mode: 0644]
src/dpdk/lib/librte_port/rte_port_source_sink.h
src/dpdk/lib/librte_ring/rte_ring.h
src/dpdk/lib/librte_table/rte_table_acl.c
src/dpdk/lib/librte_table/rte_table_hash.h
src/dpdk/lib/librte_table/rte_table_hash_cuckoo.c [new file with mode: 0644]
src/dpdk/lib/librte_table/rte_table_hash_key16.c
src/dpdk/lib/librte_table/rte_table_hash_key32.c
src/dpdk/lib/librte_table/rte_table_hash_key8.c

index f795566..2f87553 100644 (file)
@@ -40,7 +40,7 @@
 #include <rte_ethdev.h>
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
-#include <rte_dev.h>
+#include <rte_vdev.h>
 
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
@@ -83,6 +83,7 @@ struct pkt_rx_queue {
 
 struct pkt_tx_queue {
        int sockfd;
+       unsigned int frame_data_size;
 
        struct iovec *rd;
        uint8_t *map;
@@ -98,6 +99,7 @@ struct pmd_internals {
        unsigned nb_queues;
 
        int if_index;
+       char *if_name;
        struct ether_addr eth_addr;
 
        struct tpacket_req req;
@@ -115,8 +117,6 @@ static const char *valid_arguments[] = {
        NULL
 };
 
-static const char *drivername = "AF_PACKET PMD";
-
 static struct rte_eth_link pmd_link = {
        .link_speed = ETH_SPEED_NUM_10G,
        .link_duplex = ETH_LINK_FULL_DUPLEX,
@@ -161,6 +161,12 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                pbuf = (uint8_t *) ppd + ppd->tp_mac;
                memcpy(rte_pktmbuf_mtod(mbuf, void *), pbuf, rte_pktmbuf_data_len(mbuf));
 
+               /* check for vlan info */
+               if (ppd->tp_status & TP_STATUS_VLAN_VALID) {
+                       mbuf->vlan_tci = ppd->tp_vlan_tci;
+                       mbuf->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
+               }
+
                /* release incoming frame and advance ring buffer */
                ppd->tp_status = TP_STATUS_KERNEL;
                if (++framenum >= framecount)
@@ -206,13 +212,28 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
        framenum = pkt_q->framenum;
        ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base;
        for (i = 0; i < nb_pkts; i++) {
+               mbuf = *bufs++;
+
+               /* drop oversized packets */
+               if (rte_pktmbuf_data_len(mbuf) > pkt_q->frame_data_size) {
+                       rte_pktmbuf_free(mbuf);
+                       continue;
+               }
+
+               /* insert vlan info if necessary */
+               if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
+                       if (rte_vlan_insert(&mbuf)) {
+                               rte_pktmbuf_free(mbuf);
+                               continue;
+                       }
+               }
+
                /* point at the next incoming frame */
                if ((ppd->tp_status != TP_STATUS_AVAILABLE) &&
                    (poll(&pfd, 1, -1) < 0))
-                               continue;
+                       break;
 
                /* copy the tx frame data */
-               mbuf = bufs[num_tx];
                pbuf = (uint8_t *) ppd + TPACKET2_HDRLEN -
                        sizeof(struct sockaddr_ll);
                memcpy(pbuf, rte_pktmbuf_mtod(mbuf, void*), rte_pktmbuf_data_len(mbuf));
@@ -231,13 +252,13 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
        /* kick-off transmits */
        if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1)
-               return 0; /* error sending -- no packets transmitted */
+               num_tx = 0; /* error sending -- no packets transmitted */
 
        pkt_q->framenum = framenum;
        pkt_q->tx_pkts += num_tx;
-       pkt_q->err_pkts += nb_pkts - num_tx;
+       pkt_q->err_pkts += i - num_tx;
        pkt_q->tx_bytes += num_tx_bytes;
-       return num_tx;
+       return i;
 }
 
 static int
@@ -261,9 +282,16 @@ eth_dev_stop(struct rte_eth_dev *dev)
                sockfd = internals->rx_queue[i].sockfd;
                if (sockfd != -1)
                        close(sockfd);
-               sockfd = internals->tx_queue[i].sockfd;
-               if (sockfd != -1)
-                       close(sockfd);
+
+               /* Prevent use after free in case tx fd == rx fd */
+               if (sockfd != internals->tx_queue[i].sockfd) {
+                       sockfd = internals->tx_queue[i].sockfd;
+                       if (sockfd != -1)
+                               close(sockfd);
+               }
+
+               internals->rx_queue[i].sockfd = -1;
+               internals->tx_queue[i].sockfd = -1;
        }
 
        dev->data->dev_link.link_status = ETH_LINK_DOWN;
@@ -280,14 +308,12 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
        struct pmd_internals *internals = dev->data->dev_private;
 
-       dev_info->driver_name = drivername;
        dev_info->if_index = internals->if_index;
        dev_info->max_mac_addrs = 1;
        dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
        dev_info->max_rx_queues = (uint16_t)internals->nb_queues;
        dev_info->max_tx_queues = (uint16_t)internals->nb_queues;
        dev_info->min_rx_bufsize = 0;
-       dev_info->pci_dev = NULL;
 }
 
 static void
@@ -370,18 +396,20 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 {
        struct pmd_internals *internals = dev->data->dev_private;
        struct pkt_rx_queue *pkt_q = &internals->rx_queue[rx_queue_id];
-       uint16_t buf_size;
+       unsigned int buf_size, data_size;
 
        pkt_q->mb_pool = mb_pool;
 
        /* Now get the space available for data in the mbuf */
-       buf_size = (uint16_t)(rte_pktmbuf_data_room_size(pkt_q->mb_pool) -
-               RTE_PKTMBUF_HEADROOM);
+       buf_size = rte_pktmbuf_data_room_size(pkt_q->mb_pool) -
+               RTE_PKTMBUF_HEADROOM;
+       data_size = internals->req.tp_frame_size;
+       data_size -= TPACKET2_HDRLEN - sizeof(struct sockaddr_ll);
 
-       if (ETH_FRAME_LEN > buf_size) {
+       if (data_size > buf_size) {
                RTE_LOG(ERR, PMD,
                        "%s: %d bytes will not fit in mbuf (%d bytes)\n",
-                       dev->data->name, ETH_FRAME_LEN, buf_size);
+                       dev->data->name, data_size, buf_size);
                return -ENOMEM;
        }
 
@@ -405,12 +433,80 @@ eth_tx_queue_setup(struct rte_eth_dev *dev,
        return 0;
 }
 
+static int
+eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+       struct pmd_internals *internals = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_mtu = mtu };
+       int ret;
+       int s;
+       unsigned int data_size = internals->req.tp_frame_size -
+                                TPACKET2_HDRLEN -
+                                sizeof(struct sockaddr_ll);
+
+       if (mtu > data_size)
+               return -EINVAL;
+
+       s = socket(PF_INET, SOCK_DGRAM, 0);
+       if (s < 0)
+               return -EINVAL;
+
+       strncpy(ifr.ifr_name, internals->if_name, IFNAMSIZ);
+       ret = ioctl(s, SIOCSIFMTU, &ifr);
+       close(s);
+
+       if (ret < 0)
+               return -EINVAL;
+
+       return 0;
+}
+
+static void
+eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask)
+{
+       struct ifreq ifr;
+       int s;
+
+       s = socket(PF_INET, SOCK_DGRAM, 0);
+       if (s < 0)
+               return;
+
+       strncpy(ifr.ifr_name, if_name, IFNAMSIZ);
+       if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0)
+               goto out;
+       ifr.ifr_flags &= mask;
+       ifr.ifr_flags |= flags;
+       if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0)
+               goto out;
+out:
+       close(s);
+}
+
+static void
+eth_dev_promiscuous_enable(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *internals = dev->data->dev_private;
+
+       eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0);
+}
+
+static void
+eth_dev_promiscuous_disable(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *internals = dev->data->dev_private;
+
+       eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC);
+}
+
 static const struct eth_dev_ops ops = {
        .dev_start = eth_dev_start,
        .dev_stop = eth_dev_stop,
        .dev_close = eth_dev_close,
        .dev_configure = eth_dev_configure,
        .dev_infos_get = eth_dev_info,
+       .mtu_set = eth_dev_mtu_set,
+       .promiscuous_enable = eth_dev_promiscuous_enable,
+       .promiscuous_disable = eth_dev_promiscuous_disable,
        .rx_queue_setup = eth_rx_queue_setup,
        .tx_queue_setup = eth_tx_queue_setup,
        .rx_queue_release = eth_queue_release,
@@ -440,6 +536,8 @@ open_packet_iface(const char *key __rte_unused,
        return 0;
 }
 
+static struct rte_vdev_driver pmd_af_packet_drv;
+
 static int
 rte_pmd_init_internals(const char *name,
                        const int sockfd,
@@ -524,6 +622,7 @@ rte_pmd_init_internals(const char *name,
                        name);
                goto error_early;
        }
+       (*internals)->if_name = strdup(pair->value);
        (*internals)->if_index = ifr.ifr_ifindex;
 
        if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) {
@@ -633,6 +732,9 @@ rte_pmd_init_internals(const char *name,
 
                tx_queue = &((*internals)->tx_queue[q]);
                tx_queue->framecount = req->tp_frame_nr;
+               tx_queue->frame_data_size = req->tp_frame_size;
+               tx_queue->frame_data_size -= TPACKET2_HDRLEN -
+                       sizeof(struct sockaddr_ll);
 
                tx_queue->map = rx_queue->map + req->tp_block_size * req->tp_block_nr;
 
@@ -666,7 +768,7 @@ rte_pmd_init_internals(const char *name,
        }
 
        /* reserve an ethdev entry */
-       *eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+       *eth_dev = rte_eth_dev_allocate(name);
        if (*eth_dev == NULL)
                goto error;
 
@@ -693,7 +795,7 @@ rte_pmd_init_internals(const char *name,
        (*eth_dev)->dev_ops = &ops;
        (*eth_dev)->driver = NULL;
        (*eth_dev)->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
-       (*eth_dev)->data->drv_name = drivername;
+       (*eth_dev)->data->drv_name = pmd_af_packet_drv.driver.name;
        (*eth_dev)->data->kdrv = RTE_KDRV_NONE;
        (*eth_dev)->data->numa_node = numa_node;
 
@@ -712,6 +814,7 @@ error:
                        ((*internals)->rx_queue[q].sockfd != qsockfd))
                        close((*internals)->rx_queue[q].sockfd);
        }
+       free((*internals)->if_name);
        rte_free(*internals);
 error_early:
        rte_free(data);
@@ -820,7 +923,7 @@ rte_eth_from_packet(const char *name,
 }
 
 static int
-rte_pmd_af_packet_devinit(const char *name, const char *params)
+rte_pmd_af_packet_probe(const char *name, const char *params)
 {
        unsigned numa_node;
        int ret = 0;
@@ -858,7 +961,7 @@ exit:
 }
 
 static int
-rte_pmd_af_packet_devuninit(const char *name)
+rte_pmd_af_packet_remove(const char *name)
 {
        struct rte_eth_dev *eth_dev = NULL;
        struct pmd_internals *internals;
@@ -880,6 +983,7 @@ rte_pmd_af_packet_devuninit(const char *name)
                rte_free(internals->rx_queue[q].rd);
                rte_free(internals->tx_queue[q].rd);
        }
+       free(internals->if_name);
 
        rte_free(eth_dev->data->dev_private);
        rte_free(eth_dev->data);
@@ -889,14 +993,14 @@ rte_pmd_af_packet_devuninit(const char *name)
        return 0;
 }
 
-static struct rte_driver pmd_af_packet_drv = {
-       .type = PMD_VDEV,
-       .init = rte_pmd_af_packet_devinit,
-       .uninit = rte_pmd_af_packet_devuninit,
+static struct rte_vdev_driver pmd_af_packet_drv = {
+       .probe = rte_pmd_af_packet_probe,
+       .remove = rte_pmd_af_packet_remove,
 };
 
-PMD_REGISTER_DRIVER(pmd_af_packet_drv, eth_af_packet);
-DRIVER_REGISTER_PARAM_STRING(eth_af_packet,
+RTE_PMD_REGISTER_VDEV(net_af_packet, pmd_af_packet_drv);
+RTE_PMD_REGISTER_ALIAS(net_af_packet, eth_af_packet);
+RTE_PMD_REGISTER_PARAM_STRING(net_af_packet,
        "iface=<string> "
        "qpairs=<int> "
        "blocksz=<int> "
index a49a07f..cc380bd 100644 (file)
@@ -178,7 +178,7 @@ bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size, struct bnx2x_dma *dma,
 
        /* Caller must take care that strlen(mz_name) < RTE_MEMZONE_NAMESIZE */
        z = rte_memzone_reserve_aligned(mz_name, (uint64_t) (size),
-                                       rte_lcore_to_socket_id(rte_lcore_id()),
+                                       SOCKET_ID_ANY,
                                        0, align);
        if (z == NULL) {
                PMD_DRV_LOG(ERR, "DMA alloc failed for %s", msg);
@@ -1397,10 +1397,10 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
        return rc;
 }
 
-int
+static int
 bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
-                     unsigned long *rx_accept_flags,
-                     unsigned long *tx_accept_flags)
+                       unsigned long *rx_accept_flags,
+                       unsigned long *tx_accept_flags)
 {
        /* Clear the flags first */
        *rx_accept_flags = 0;
@@ -1438,6 +1438,7 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
 
                break;
 
+       case BNX2X_RX_MODE_ALLMULTI_PROMISC:
        case BNX2X_RX_MODE_PROMISC:
                /*
                 * According to deffinition of SI mode, iface in promisc mode
@@ -2219,7 +2220,7 @@ int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0)
        }
 
        PMD_TX_LOG(DEBUG,
-                  "start bd: nbytes %d flags %x vlan %x\n",
+                  "start bd: nbytes %d flags %x vlan %x",
                   tx_start_bd->nbytes,
                   tx_start_bd->bd_flags.as_bitfield,
                   tx_start_bd->vlan_or_ethertype);
@@ -7016,34 +7017,6 @@ static int bnx2x_initial_phy_init(struct bnx2x_softc *sc, int load_mode)
 
        bnx2x_set_requested_fc(sc);
 
-       if (CHIP_REV_IS_SLOW(sc)) {
-               uint32_t bond = CHIP_BOND_ID(sc);
-               uint32_t feat = 0;
-
-               if (CHIP_IS_E2(sc) && CHIP_IS_MODE_4_PORT(sc)) {
-                       feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC;
-               } else if (bond & 0x4) {
-                       if (CHIP_IS_E3(sc)) {
-                               feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC;
-                       } else {
-                               feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC;
-                       }
-               } else if (bond & 0x8) {
-                       if (CHIP_IS_E3(sc)) {
-                               feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC;
-                       } else {
-                               feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC;
-                       }
-               }
-
-/* disable EMAC for E3 and above */
-               if (bond & 0x2) {
-                       feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC;
-               }
-
-               sc->link_params.feature_config_flags |= feat;
-       }
-
        if (load_mode == LOAD_DIAG) {
                lp->loopback_mode = ELINK_LOOPBACK_XGXS;
 /* Prefer doing PHY loopback at 10G speed, if possible */
@@ -9556,8 +9529,8 @@ static void bnx2x_init_rte(struct bnx2x_softc *sc)
                sc->max_rx_queues = min(BNX2X_VF_MAX_QUEUES_PER_VF,
                                        sc->igu_sb_cnt);
        } else {
-               sc->max_tx_queues = 128;
-               sc->max_rx_queues = 128;
+               sc->max_rx_queues = BNX2X_MAX_RSS_COUNT(sc);
+               sc->max_tx_queues = sc->max_rx_queues;
        }
 }
 
index 78757a8..b3cd5fc 100644 (file)
@@ -17,6 +17,8 @@
 #define __BNX2X_H__
 
 #include <rte_byteorder.h>
+#include <rte_spinlock.h>
+#include <rte_io.h>
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 #ifndef __LITTLE_ENDIAN
@@ -304,10 +306,7 @@ struct bnx2x_device_type {
 /* TCP with Timestamp Option (32) + IPv6 (40) */
 
 /* max supported alignment is 256 (8 shift) */
-#define BNX2X_RX_ALIGN_SHIFT 8
-/* FW uses 2 cache lines alignment for start packet and size  */
-#define BNX2X_FW_RX_ALIGN_START (1 << BNX2X_RX_ALIGN_SHIFT)
-#define BNX2X_FW_RX_ALIGN_END   (1 << BNX2X_RX_ALIGN_SHIFT)
+#define BNX2X_RX_ALIGN_SHIFT   RTE_MAX(6, min(8, RTE_CACHE_LINE_SIZE_LOG2))
 
 #define BNX2X_PXP_DRAM_ALIGN (BNX2X_RX_ALIGN_SHIFT - 5)
 
@@ -1031,12 +1030,13 @@ struct bnx2x_softc {
        struct bnx2x_mac_ops mac_ops;
 
        /* structures for VF mbox/response/bulletin */
-       struct bnx2x_vf_mbx_msg *vf2pf_mbox;
-       struct bnx2x_dma                vf2pf_mbox_mapping;
-       struct vf_acquire_resp_tlv acquire_resp;
+       struct bnx2x_vf_mbx_msg         *vf2pf_mbox;
+       struct bnx2x_dma                 vf2pf_mbox_mapping;
+       struct vf_acquire_resp_tlv       acquire_resp;
        struct bnx2x_vf_bulletin        *pf2vf_bulletin;
-       struct bnx2x_dma                pf2vf_bulletin_mapping;
-       struct bnx2x_vf_bulletin        old_bulletin;
+       struct bnx2x_dma                 pf2vf_bulletin_mapping;
+       struct bnx2x_vf_bulletin         old_bulletin;
+       rte_spinlock_t                   vf2pf_lock;
 
        int             media;
 
@@ -1147,11 +1147,12 @@ struct bnx2x_softc {
 #define BNX2X_RECOVERY_NIC_LOADING 5
 
        uint32_t rx_mode;
-#define BNX2X_RX_MODE_NONE     0
-#define BNX2X_RX_MODE_NORMAL   1
-#define BNX2X_RX_MODE_ALLMULTI 2
-#define BNX2X_RX_MODE_PROMISC  3
-#define BNX2X_MAX_MULTICAST    64
+#define BNX2X_RX_MODE_NONE             0
+#define BNX2X_RX_MODE_NORMAL           1
+#define BNX2X_RX_MODE_ALLMULTI         2
+#define BNX2X_RX_MODE_ALLMULTI_PROMISC 3
+#define BNX2X_RX_MODE_PROMISC          4
+#define BNX2X_MAX_MULTICAST            64
 
        struct bnx2x_port port;
 
@@ -1415,34 +1416,90 @@ struct bnx2x_func_init_params {
 #define BAR1 2
 #define BAR2 4
 
+static inline void
+bnx2x_reg_write8(struct bnx2x_softc *sc, size_t offset, uint8_t val)
+{
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x",
+                              (unsigned long)offset, val);
+       rte_write8(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset));
+}
+
+static inline void
+bnx2x_reg_write16(struct bnx2x_softc *sc, size_t offset, uint16_t val)
+{
 #ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC
-uint8_t bnx2x_reg_read8(struct bnx2x_softc *sc, size_t offset);
-uint16_t bnx2x_reg_read16(struct bnx2x_softc *sc, size_t offset);
-uint32_t bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset);
+       if ((offset % 2) != 0)
+               PMD_DRV_LOG(NOTICE, "Unaligned 16-bit write to 0x%08lx",
+                           (unsigned long)offset);
+#endif
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%04x",
+                              (unsigned long)offset, val);
+       rte_write16(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset));
 
-void bnx2x_reg_write8(struct bnx2x_softc *sc, size_t offset, uint8_t val);
-void bnx2x_reg_write16(struct bnx2x_softc *sc, size_t offset, uint16_t val);
-void bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val);
-#else
-#define bnx2x_reg_write8(sc, offset, val)\
-       *((volatile uint8_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val
+}
 
-#define bnx2x_reg_write16(sc, offset, val)\
-       *((volatile uint16_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val
+static inline void
+bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val)
+{
+#ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC
+       if ((offset % 4) != 0)
+               PMD_DRV_LOG(NOTICE, "Unaligned 32-bit write to 0x%08lx",
+                           (unsigned long)offset);
+#endif
 
-#define bnx2x_reg_write32(sc, offset, val)\
-       *((volatile uint32_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x",
+                              (unsigned long)offset, val);
+       rte_write32(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset));
+}
 
-#define bnx2x_reg_read8(sc, offset)\
-       (*((volatile uint8_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)))
+static inline uint8_t
+bnx2x_reg_read8(struct bnx2x_softc *sc, size_t offset)
+{
+       uint8_t val;
 
-#define bnx2x_reg_read16(sc, offset)\
-       (*((volatile uint16_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)))
+       val = rte_read8((uint8_t *)sc->bar[BAR0].base_addr + offset);
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x",
+                              (unsigned long)offset, val);
 
-#define bnx2x_reg_read32(sc, offset)\
-       (*((volatile uint32_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)))
+       return val;
+}
+
+static inline uint16_t
+bnx2x_reg_read16(struct bnx2x_softc *sc, size_t offset)
+{
+       uint16_t val;
+
+#ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC
+       if ((offset % 2) != 0)
+               PMD_DRV_LOG(NOTICE, "Unaligned 16-bit read from 0x%08lx",
+                           (unsigned long)offset);
+#endif
+
+       val = rte_read16(((uint8_t *)sc->bar[BAR0].base_addr + offset));
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x",
+                              (unsigned long)offset, val);
+
+       return val;
+}
+
+static inline uint32_t
+bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset)
+{
+       uint32_t val;
+
+#ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC
+       if ((offset % 4) != 0)
+               PMD_DRV_LOG(NOTICE, "Unaligned 32-bit read from 0x%08lx",
+                           (unsigned long)offset);
 #endif
 
+       val = rte_read32(((uint8_t *)sc->bar[BAR0].base_addr + offset));
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x",
+                              (unsigned long)offset, val);
+
+       return val;
+}
+
 #define REG_ADDR(sc, offset) (((uint64_t)sc->bar[BAR0].base_addr) + (offset))
 
 #define REG_RD8(sc, offset)  bnx2x_reg_read8(sc, (offset))
@@ -1500,11 +1557,9 @@ void bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val);
 #define DPM_TRIGGER_TYPE 0x40
 
 /* Doorbell macro */
-#define BNX2X_DB_WRITE(db_bar, val) \
-       *((volatile uint32_t *)(db_bar)) = (val)
+#define BNX2X_DB_WRITE(db_bar, val) rte_write32_relaxed((val), (db_bar))
 
-#define BNX2X_DB_READ(db_bar) \
-       *((volatile uint32_t *)(db_bar))
+#define BNX2X_DB_READ(db_bar) rte_read32_relaxed(db_bar)
 
 #define DOORBELL_ADDR(sc, offset) \
        (volatile uint32_t *)(((char *)(sc)->bar[BAR1].base_addr + (offset)))
@@ -1883,8 +1938,6 @@ int bnx2x_vf_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
        int leading);
 void bnx2x_free_hsi_mem(struct bnx2x_softc *sc);
 int bnx2x_vf_set_rx_mode(struct bnx2x_softc *sc);
-int bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode,
-       unsigned long *rx_accept_flags, unsigned long *tx_accept_flags);
 int bnx2x_check_bull(struct bnx2x_softc *sc);
 
 //#define BNX2X_PULSE
index f3ab355..a0b0dfa 100644 (file)
@@ -17,7 +17,7 @@
  * The set of PCI devices this driver supports
  */
 #define BROADCOM_PCI_VENDOR_ID 0x14E4
-static struct rte_pci_id pci_id_bnx2x_map[] = {
+static const struct rte_pci_id pci_id_bnx2x_map[] = {
        { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57800) },
        { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57711) },
        { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57810) },
@@ -33,7 +33,7 @@ static struct rte_pci_id pci_id_bnx2x_map[] = {
        { .vendor_id = 0, }
 };
 
-static struct rte_pci_id pci_id_bnx2xvf_map[] = {
+static const struct rte_pci_id pci_id_bnx2xvf_map[] = {
        { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57800_VF) },
        { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57810_VF) },
        { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57811_VF) },
@@ -119,12 +119,12 @@ bnx2x_interrupt_action(struct rte_eth_dev *dev)
 }
 
 static __rte_unused void
-bnx2x_interrupt_handler(__rte_unused struct rte_intr_handle *handle, void *param)
+bnx2x_interrupt_handler(struct rte_intr_handle *handle, void *param)
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
 
        bnx2x_interrupt_action(dev);
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       rte_intr_enable(handle);
 }
 
 /*
@@ -187,10 +187,10 @@ bnx2x_dev_start(struct rte_eth_dev *dev)
        }
 
        if (IS_PF(sc)) {
-               rte_intr_callback_register(&(dev->pci_dev->intr_handle),
+               rte_intr_callback_register(&sc->pci_dev->intr_handle,
                                bnx2x_interrupt_handler, (void *)dev);
 
-               if(rte_intr_enable(&(dev->pci_dev->intr_handle)))
+               if (rte_intr_enable(&sc->pci_dev->intr_handle))
                        PMD_DRV_LOG(ERR, "rte_intr_enable failed");
        }
 
@@ -203,8 +203,6 @@ bnx2x_dev_start(struct rte_eth_dev *dev)
        /* Print important adapter info for the user. */
        bnx2x_print_adapter_info(sc);
 
-       DELAY_MS(2500);
-
        return ret;
 }
 
@@ -217,8 +215,8 @@ bnx2x_dev_stop(struct rte_eth_dev *dev)
        PMD_INIT_FUNC_TRACE();
 
        if (IS_PF(sc)) {
-               rte_intr_disable(&(dev->pci_dev->intr_handle));
-               rte_intr_callback_unregister(&(dev->pci_dev->intr_handle),
+               rte_intr_disable(&sc->pci_dev->intr_handle);
+               rte_intr_callback_unregister(&sc->pci_dev->intr_handle,
                                bnx2x_interrupt_handler, (void *)dev);
        }
 
@@ -258,6 +256,8 @@ bnx2x_promisc_enable(struct rte_eth_dev *dev)
 
        PMD_INIT_FUNC_TRACE();
        sc->rx_mode = BNX2X_RX_MODE_PROMISC;
+       if (rte_eth_allmulticast_get(dev->data->port_id) == 1)
+               sc->rx_mode = BNX2X_RX_MODE_ALLMULTI_PROMISC;
        bnx2x_set_rx_mode(sc);
 }
 
@@ -268,6 +268,8 @@ bnx2x_promisc_disable(struct rte_eth_dev *dev)
 
        PMD_INIT_FUNC_TRACE();
        sc->rx_mode = BNX2X_RX_MODE_NORMAL;
+       if (rte_eth_allmulticast_get(dev->data->port_id) == 1)
+               sc->rx_mode = BNX2X_RX_MODE_ALLMULTI;
        bnx2x_set_rx_mode(sc);
 }
 
@@ -278,6 +280,8 @@ bnx2x_dev_allmulticast_enable(struct rte_eth_dev *dev)
 
        PMD_INIT_FUNC_TRACE();
        sc->rx_mode = BNX2X_RX_MODE_ALLMULTI;
+       if (rte_eth_promiscuous_get(dev->data->port_id) == 1)
+               sc->rx_mode = BNX2X_RX_MODE_ALLMULTI_PROMISC;
        bnx2x_set_rx_mode(sc);
 }
 
@@ -288,6 +292,8 @@ bnx2x_dev_allmulticast_disable(struct rte_eth_dev *dev)
 
        PMD_INIT_FUNC_TRACE();
        sc->rx_mode = BNX2X_RX_MODE_NORMAL;
+       if (rte_eth_promiscuous_get(dev->data->port_id) == 1)
+               sc->rx_mode = BNX2X_RX_MODE_PROMISC;
        bnx2x_set_rx_mode(sc);
 }
 
@@ -424,6 +430,7 @@ bnx2x_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
                        xstats[num].value =
                                          *(uint64_t *)((char *)&sc->eth_stats +
                                          bnx2x_xstats_strings[num].offset_lo);
+               xstats[num].id = num;
        }
 
        return num;
@@ -433,6 +440,7 @@ static void
 bnx2x_dev_infos_get(struct rte_eth_dev *dev, __rte_unused struct rte_eth_dev_info *dev_info)
 {
        struct bnx2x_softc *sc = dev->data->dev_private;
+       dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device);
        dev_info->max_rx_queues  = sc->max_rx_queues;
        dev_info->max_tx_queues  = sc->max_tx_queues;
        dev_info->min_rx_bufsize = BNX2X_MIN_RX_BUF_SIZE;
@@ -518,7 +526,7 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf)
        PMD_INIT_FUNC_TRACE();
 
        eth_dev->dev_ops = is_vf ? &bnx2xvf_eth_dev_ops : &bnx2x_eth_dev_ops;
-       pci_dev = eth_dev->pci_dev;
+       pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
 
        rte_eth_copy_pci_info(eth_dev, pci_dev);
 
@@ -577,6 +585,8 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf)
                        eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id);
 
        if (IS_VF(sc)) {
+               rte_spinlock_init(&sc->vf2pf_lock);
+
                if (bnx2x_dma_alloc(sc, sizeof(struct bnx2x_vf_mbx_msg),
                                    &sc->vf2pf_mbox_mapping, "vf2pf_mbox",
                                    RTE_CACHE_LINE_SIZE) != 0)
@@ -618,9 +628,10 @@ eth_bnx2xvf_dev_init(struct rte_eth_dev *eth_dev)
 
 static struct eth_driver rte_bnx2x_pmd = {
        .pci_drv = {
-               .name = "rte_bnx2x_pmd",
                .id_table = pci_id_bnx2x_map,
                .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_bnx2x_dev_init,
        .dev_private_size = sizeof(struct bnx2x_softc),
@@ -631,41 +642,18 @@ static struct eth_driver rte_bnx2x_pmd = {
  */
 static struct eth_driver rte_bnx2xvf_pmd = {
        .pci_drv = {
-               .name = "rte_bnx2xvf_pmd",
                .id_table = pci_id_bnx2xvf_map,
                .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_bnx2xvf_dev_init,
        .dev_private_size = sizeof(struct bnx2x_softc),
 };
 
-static int rte_bnx2x_pmd_init(const char *name __rte_unused, const char *params __rte_unused)
-{
-       PMD_INIT_FUNC_TRACE();
-       rte_eth_driver_register(&rte_bnx2x_pmd);
-
-       return 0;
-}
-
-static int rte_bnx2xvf_pmd_init(const char *name __rte_unused, const char *params __rte_unused)
-{
-       PMD_INIT_FUNC_TRACE();
-       rte_eth_driver_register(&rte_bnx2xvf_pmd);
-
-       return 0;
-}
-
-static struct rte_driver rte_bnx2x_driver = {
-       .type = PMD_PDEV,
-       .init = rte_bnx2x_pmd_init,
-};
-
-static struct rte_driver rte_bnx2xvf_driver = {
-       .type = PMD_PDEV,
-       .init = rte_bnx2xvf_pmd_init,
-};
-
-PMD_REGISTER_DRIVER(rte_bnx2x_driver, bnx2x);
-DRIVER_REGISTER_PCI_TABLE(bnx2x, pci_id_bnx2x_map);
-PMD_REGISTER_DRIVER(rte_bnx2xvf_driver, bnx2xvf);
-DRIVER_REGISTER_PCI_TABLE(bnx2xvf, pci_id_bnx2xvf_map);
+RTE_PMD_REGISTER_PCI(net_bnx2x, rte_bnx2x_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_bnx2x, pci_id_bnx2x_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_bnx2x, "* igb_uio | uio_pci_generic | vfio");
+RTE_PMD_REGISTER_PCI(net_bnx2xvf, rte_bnx2xvf_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_bnx2xvf, pci_id_bnx2xvf_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_bnx2xvf, "* igb_uio | vfio");
index 0ec4f89..170e48f 100644 (file)
@@ -19,7 +19,8 @@ ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
        const struct rte_memzone *mz;
 
        snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-                       dev->driver->pci_drv.name, ring_name, dev->data->port_id, queue_id);
+                       dev->driver->pci_drv.driver.name, ring_name,
+                       dev->data->port_id, queue_id);
 
        mz = rte_memzone_lookup(z_name);
        if (mz)
@@ -59,7 +60,7 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev,
                       uint16_t queue_idx,
                       uint16_t nb_desc,
                       unsigned int socket_id,
-                      const struct rte_eth_rxconf *rx_conf,
+                      __rte_unused const struct rte_eth_rxconf *rx_conf,
                       struct rte_mempool *mp)
 {
        uint16_t j, idx;
@@ -84,7 +85,6 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->mb_pool = mp;
        rxq->queue_id = queue_idx;
        rxq->port_id = dev->data->port_id;
-       rxq->crc_len = (uint8_t)((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 : ETHER_CRC_LEN);
 
        rxq->nb_rx_pages = 1;
        while (USABLE_RX_BD(rxq) < nb_desc)
@@ -94,13 +94,9 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev,
        sc->rx_ring_size = USABLE_RX_BD(rxq);
        rxq->nb_cq_pages = RCQ_BD_PAGES(rxq);
 
-       rxq->rx_free_thresh = rx_conf->rx_free_thresh ?
-               rx_conf->rx_free_thresh : DEFAULT_RX_FREE_THRESH;
-
-       PMD_INIT_LOG(DEBUG, "fp[%02d] req_bd=%u, thresh=%u, usable_bd=%lu, "
+       PMD_INIT_LOG(DEBUG, "fp[%02d] req_bd=%u, usable_bd=%lu, "
                       "total_bd=%lu, rx_pages=%u, cq_pages=%u",
-                      queue_idx, nb_desc, rxq->rx_free_thresh,
-                      (unsigned long)USABLE_RX_BD(rxq),
+                      queue_idx, nb_desc, (unsigned long)USABLE_RX_BD(rxq),
                       (unsigned long)TOTAL_RX_BD(rxq), rxq->nb_rx_pages,
                       rxq->nb_cq_pages);
 
@@ -135,7 +131,6 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev,
        }
 
        /* Initialize software ring entries */
-       rxq->rx_mbuf_alloc = 0;
        for (idx = 0; idx < rxq->nb_rx_desc; idx = NEXT_RX_BD(idx)) {
                mbuf = rte_mbuf_raw_alloc(mp);
                if (NULL == mbuf) {
@@ -146,7 +141,6 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev,
                }
                rxq->sw_ring[idx] = mbuf;
                rxq->rx_ring[idx] = mbuf->buf_physaddr;
-               rxq->rx_mbuf_alloc++;
        }
        rxq->pkt_first_seg = NULL;
        rxq->pkt_last_seg = NULL;
index ccb22fc..dd251aa 100644 (file)
@@ -11,8 +11,6 @@
 #ifndef _BNX2X_RXTX_H_
 #define _BNX2X_RXTX_H_
 
-
-#define DEFAULT_RX_FREE_THRESH   0
 #define DEFAULT_TX_FREE_THRESH   512
 #define RTE_PMD_BNX2X_TX_MAX_BURST 1
 
@@ -42,13 +40,9 @@ struct bnx2x_rx_queue {
        uint16_t                   rx_bd_tail;           /**< Index of last rx bd. */
        uint16_t                   rx_cq_head;           /**< Index of current rcq bd. */
        uint16_t                   rx_cq_tail;           /**< Index of last rcq bd. */
-       uint16_t                   nb_rx_hold;           /**< number of held free RX desc. */
-       uint16_t                   rx_free_thresh;       /**< max free RX desc to hold. */
        uint16_t                   queue_id;             /**< RX queue index. */
        uint8_t                    port_id;              /**< Device port identifier. */
-       uint8_t                    crc_len;              /**< 0 if CRC stripped, 4 otherwise. */
        struct bnx2x_softc           *sc;                  /**< Ptr to dev_private data. */
-       uint64_t                   rx_mbuf_alloc;        /**< Number of allocated mbufs. */
 };
 
 /**
index 1c895f8..0ca0df8 100644 (file)
@@ -64,25 +64,46 @@ bnx2x_check_bull(struct bnx2x_softc *sc)
        return TRUE;
 }
 
-/* add tlv to a buffer */
-#define BNX2X_TLV_APPEND(_tlvs, _offset, _type, _length) \
-       ((struct vf_first_tlv *)((unsigned long)_tlvs + _offset))->type   = _type; \
-       ((struct vf_first_tlv *)((unsigned long)_tlvs + _offset))->length = _length
+/* place a given tlv on the tlv buffer at a given offset */
+static void
+bnx2x_add_tlv(__rte_unused struct bnx2x_softc *sc, void *tlvs_list,
+             uint16_t offset, uint16_t type, uint16_t length)
+{
+       struct channel_tlv *tl = (struct channel_tlv *)
+                                       ((unsigned long)tlvs_list + offset);
+
+       tl->type = type;
+       tl->length = length;
+}
 
 /* Initiliaze header of the first tlv and clear mailbox*/
 static void
-bnx2x_init_first_tlv(struct bnx2x_softc *sc, struct vf_first_tlv *tlv,
-       uint16_t type, uint16_t len)
+bnx2x_vf_prep(struct bnx2x_softc *sc, struct vf_first_tlv *first_tlv,
+             uint16_t type, uint16_t length)
 {
        struct bnx2x_vf_mbx_msg *mbox = sc->vf2pf_mbox;
+
+       rte_spinlock_lock(&sc->vf2pf_lock);
+
        PMD_DRV_LOG(DEBUG, "Preparing %d tlv for sending", type);
 
        memset(mbox, 0, sizeof(struct bnx2x_vf_mbx_msg));
 
-       BNX2X_TLV_APPEND(tlv, 0, type, len);
+       bnx2x_add_tlv(sc, &first_tlv->tl, 0, type, length);
 
        /* Initialize header of the first tlv */
-       tlv->reply_offset = sizeof(mbox->query);
+       first_tlv->reply_offset = sizeof(mbox->query);
+}
+
+/* releases the mailbox */
+static void
+bnx2x_vf_finalize(struct bnx2x_softc *sc,
+                 __rte_unused struct vf_first_tlv *first_tlv)
+{
+       PMD_DRV_LOG(DEBUG, "done sending [%d] tlv over vf pf channel",
+                   first_tlv->tl.type);
+
+       rte_spinlock_unlock(&sc->vf2pf_lock);
 }
 
 #define BNX2X_VF_CMD_ADDR_LO PXP_VF_ADDR_CSDM_GLOBAL_START
@@ -97,39 +118,36 @@ bnx2x_do_req4pf(struct bnx2x_softc *sc, phys_addr_t phys_addr)
        uint8_t *status = &sc->vf2pf_mbox->resp.common_reply.status;
        uint8_t i;
 
-       if (!*status) {
-               bnx2x_check_bull(sc);
-               if (sc->old_bulletin.valid_bitmap & (1 << CHANNEL_DOWN)) {
-                       PMD_DRV_LOG(ERR, "channel is down. Aborting message sending");
-                       *status = BNX2X_VF_STATUS_SUCCESS;
-                       return 0;
-               }
+       if (*status) {
+               PMD_DRV_LOG(ERR, "status should be zero before message"
+                                " to pf was sent");
+               return -EINVAL;
+       }
 
-               REG_WR(sc, BNX2X_VF_CMD_ADDR_LO, U64_LO(phys_addr));
-               REG_WR(sc, BNX2X_VF_CMD_ADDR_HI, U64_HI(phys_addr));
+       bnx2x_check_bull(sc);
+       if (sc->old_bulletin.valid_bitmap & (1 << CHANNEL_DOWN)) {
+               PMD_DRV_LOG(ERR, "channel is down. Aborting message sending");
+               return -EINVAL;
+       }
 
-               /* memory barrier to ensure that FW can read phys_addr */
-               wmb();
+       REG_WR(sc, BNX2X_VF_CMD_ADDR_LO, U64_LO(phys_addr));
+       REG_WR(sc, BNX2X_VF_CMD_ADDR_HI, U64_HI(phys_addr));
 
-               REG_WR8(sc, BNX2X_VF_CMD_TRIGGER, 1);
+       /* memory barrier to ensure that FW can read phys_addr */
+       wmb();
 
-               /* Do several attempts until PF completes
-                * "." is used to show progress
-                */
-               for (i = 0; i < BNX2X_VF_CHANNEL_TRIES; i++) {
-                       DELAY_MS(BNX2X_VF_CHANNEL_DELAY);
-                       if (*status)
-                               break;
-               }
+       REG_WR8(sc, BNX2X_VF_CMD_TRIGGER, 1);
 
-               if (!*status) {
-                       PMD_DRV_LOG(ERR, "Response from PF timed out");
-                       return -EAGAIN;
-               }
-       } else {
-               PMD_DRV_LOG(ERR, "status should be zero before message"
-                               "to pf was sent");
-               return -EINVAL;
+       /* Do several attempts until PF completes */
+       for (i = 0; i < BNX2X_VF_CHANNEL_TRIES; i++) {
+               DELAY_MS(BNX2X_VF_CHANNEL_DELAY);
+               if (*status)
+                       break;
+       }
+
+       if (!*status) {
+               PMD_DRV_LOG(ERR, "Response from PF timed out");
+               return -EAGAIN;
        }
 
        PMD_DRV_LOG(DEBUG, "Response from PF was received");
@@ -168,31 +186,23 @@ static inline int bnx2x_read_vf_id(struct bnx2x_softc *sc)
 #define BNX2X_VF_OBTAIN_MAC_FILTERS 1
 #define BNX2X_VF_OBTAIN_MC_FILTERS 10
 
-struct bnx2x_obtain_status {
-       int success;
-       int err_code;
-};
-
 static
-struct bnx2x_obtain_status bnx2x_loop_obtain_resources(struct bnx2x_softc *sc)
+int bnx2x_loop_obtain_resources(struct bnx2x_softc *sc)
 {
-       int tries = 0;
        struct vf_acquire_resp_tlv *resp = &sc->vf2pf_mbox->resp.acquire_resp,
-                                                                *sc_resp = &sc->acquire_resp;
-       struct vf_resource_query    *res_query;
-       struct vf_resc            *resc;
-       struct bnx2x_obtain_status     status;
+                                  *sc_resp = &sc->acquire_resp;
+       struct vf_resource_query   *res_query;
+       struct vf_resc             *resc;
        int res_obtained = false;
+       int tries = 0;
+       int rc;
 
        do {
                PMD_DRV_LOG(DEBUG, "trying to get resources");
 
-               if (bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr)) {
-                       /* timeout */
-                       status.success = 0;
-                       status.err_code = -EAGAIN;
-                       return status;
-               }
+               rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+               if (rc)
+                       return rc;
 
                memcpy(sc_resp, resp, sizeof(sc->acquire_resp));
 
@@ -203,12 +213,12 @@ struct bnx2x_obtain_status bnx2x_loop_obtain_resources(struct bnx2x_softc *sc)
                        PMD_DRV_LOG(DEBUG, "resources obtained successfully");
                        res_obtained = true;
                } else if (sc_resp->status == BNX2X_VF_STATUS_NO_RESOURCES &&
-                       tries < BNX2X_VF_OBTAIN_MAX_TRIES) {
+                          tries < BNX2X_VF_OBTAIN_MAX_TRIES) {
                        PMD_DRV_LOG(DEBUG,
                           "PF cannot allocate requested amount of resources");
 
                        res_query = &sc->vf2pf_mbox->query[0].acquire.res_query;
-                       resc     = &sc_resp->resc;
+                       resc      = &sc_resp->resc;
 
                        /* PF refused our request. Try to decrease request params */
                        res_query->num_txqs         = min(res_query->num_txqs, resc->num_txqs);
@@ -220,30 +230,30 @@ struct bnx2x_obtain_status bnx2x_loop_obtain_resources(struct bnx2x_softc *sc)
 
                        memset(&sc->vf2pf_mbox->resp, 0, sizeof(union resp_tlvs));
                } else {
-                       PMD_DRV_LOG(ERR, "Resources cannot be obtained. Status of handling: %d. Aborting",
-                                       sc_resp->status);
-                       status.success = 0;
-                       status.err_code = -EAGAIN;
-                       return status;
+                       PMD_DRV_LOG(ERR, "Failed to get the requested "
+                                        "amount of resources: %d.",
+                                        sc_resp->status);
+                       return -EINVAL;
                }
        } while (!res_obtained);
 
-       status.success = 1;
-       return status;
+       return 0;
 }
 
 int bnx2x_vf_get_resources(struct bnx2x_softc *sc, uint8_t tx_count, uint8_t rx_count)
 {
        struct vf_acquire_tlv *acq = &sc->vf2pf_mbox->query[0].acquire;
        int vf_id;
-       struct bnx2x_obtain_status obtain_status;
+       int rc;
 
        bnx2x_vf_close(sc);
-       bnx2x_init_first_tlv(sc, &acq->first_tlv, BNX2X_VF_TLV_ACQUIRE, sizeof(*acq));
+       bnx2x_vf_prep(sc, &acq->first_tlv, BNX2X_VF_TLV_ACQUIRE, sizeof(*acq));
 
        vf_id = bnx2x_read_vf_id(sc);
-       if (vf_id < 0)
-               return -EAGAIN;
+       if (vf_id < 0) {
+               rc = -EAGAIN;
+               goto out;
+       }
 
        acq->vf_id = vf_id;
 
@@ -256,19 +266,19 @@ int bnx2x_vf_get_resources(struct bnx2x_softc *sc, uint8_t tx_count, uint8_t rx_
        acq->bulletin_addr = sc->pf2vf_bulletin_mapping.paddr;
 
        /* Request physical port identifier */
-       BNX2X_TLV_APPEND(acq, acq->first_tlv.length,
-                        BNX2X_VF_TLV_PHYS_PORT_ID,
-                        sizeof(struct channel_tlv));
+       bnx2x_add_tlv(sc, acq, acq->first_tlv.tl.length,
+                     BNX2X_VF_TLV_PHYS_PORT_ID,
+                     sizeof(struct channel_tlv));
 
-       BNX2X_TLV_APPEND(acq,
-                        (acq->first_tlv.length + sizeof(struct channel_tlv)),
-                        BNX2X_VF_TLV_LIST_END,
-                        sizeof(struct channel_list_end_tlv));
+       bnx2x_add_tlv(sc, acq,
+                     (acq->first_tlv.tl.length + sizeof(struct channel_tlv)),
+                     BNX2X_VF_TLV_LIST_END,
+                     sizeof(struct channel_list_end_tlv));
 
        /* requesting the resources in loop */
-       obtain_status = bnx2x_loop_obtain_resources(sc);
-       if (!obtain_status.success)
-               return obtain_status.err_code;
+       rc = bnx2x_loop_obtain_resources(sc);
+       if (rc)
+               goto out;
 
        struct vf_acquire_resp_tlv sc_resp = sc->acquire_resp;
 
@@ -299,7 +309,10 @@ int bnx2x_vf_get_resources(struct bnx2x_softc *sc, uint8_t tx_count, uint8_t rx_
        else
                eth_random_addr(sc->link_params.mac_addr);
 
-       return 0;
+out:
+       bnx2x_vf_finalize(sc, &acq->first_tlv);
+
+       return rc;
 }
 
 /* Ask PF to release VF's resources */
@@ -309,19 +322,23 @@ bnx2x_vf_close(struct bnx2x_softc *sc)
        struct vf_release_tlv *query;
        struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply;
        int vf_id = bnx2x_read_vf_id(sc);
+       int rc;
 
        if (vf_id >= 0) {
                query = &sc->vf2pf_mbox->query[0].release;
-               bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_RELEASE,
-                               sizeof(*query));
+               bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_RELEASE,
+                             sizeof(*query));
 
                query->vf_id = vf_id;
-               BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END,
-                               sizeof(struct channel_list_end_tlv));
+               bnx2x_add_tlv(sc, query, query->first_tlv.tl.length,
+                             BNX2X_VF_TLV_LIST_END,
+                             sizeof(struct channel_list_end_tlv));
 
-               bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
-               if (reply->status != BNX2X_VF_STATUS_SUCCESS)
+               rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+               if (rc || reply->status != BNX2X_VF_STATUS_SUCCESS)
                        PMD_DRV_LOG(ERR, "Failed to release VF");
+
+               bnx2x_vf_finalize(sc, &query->first_tlv);
        }
 }
 
@@ -331,11 +348,11 @@ bnx2x_vf_init(struct bnx2x_softc *sc)
 {
        struct vf_init_tlv *query;
        struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply;
-       int i;
+       int i, rc;
 
        query = &sc->vf2pf_mbox->query[0].init;
-       bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_INIT,
-                       sizeof(*query));
+       bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_INIT,
+                     sizeof(*query));
 
        FOR_EACH_QUEUE(sc, i) {
                query->sb_addr[i] = (unsigned long)(sc->fp[i].sb_dma.paddr);
@@ -345,17 +362,23 @@ bnx2x_vf_init(struct bnx2x_softc *sc)
        query->stats_addr = sc->fw_stats_data_mapping +
                offsetof(struct bnx2x_fw_stats_data, queue_stats);
 
-       BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END,
-                       sizeof(struct channel_list_end_tlv));
+       bnx2x_add_tlv(sc, query, query->first_tlv.tl.length,
+                     BNX2X_VF_TLV_LIST_END,
+                     sizeof(struct channel_list_end_tlv));
 
-       bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+       rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+       if (rc)
+               goto out;
        if (reply->status != BNX2X_VF_STATUS_SUCCESS) {
                PMD_DRV_LOG(ERR, "Failed to init VF");
-               return -EINVAL;
+               rc = -EINVAL;
+               goto out;
        }
 
        PMD_DRV_LOG(DEBUG, "VF was initialized");
-       return 0;
+out:
+       bnx2x_vf_finalize(sc, &query->first_tlv);
+       return rc;
 }
 
 void
@@ -364,44 +387,49 @@ bnx2x_vf_unload(struct bnx2x_softc *sc)
        struct vf_close_tlv *query;
        struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply;
        struct vf_q_op_tlv *query_op;
-       int i, vf_id;
+       int i, vf_id, rc;
 
        vf_id = bnx2x_read_vf_id(sc);
        if (vf_id > 0) {
                FOR_EACH_QUEUE(sc, i) {
                        query_op = &sc->vf2pf_mbox->query[0].q_op;
-                       bnx2x_init_first_tlv(sc, &query_op->first_tlv,
-                                       BNX2X_VF_TLV_TEARDOWN_Q,
-                                       sizeof(*query_op));
+                       bnx2x_vf_prep(sc, &query_op->first_tlv,
+                                     BNX2X_VF_TLV_TEARDOWN_Q,
+                                     sizeof(*query_op));
 
                        query_op->vf_qid = i;
 
-                       BNX2X_TLV_APPEND(query_op, query_op->first_tlv.length,
-                                       BNX2X_VF_TLV_LIST_END,
-                                       sizeof(struct channel_list_end_tlv));
+                       bnx2x_add_tlv(sc, query_op,
+                                     query_op->first_tlv.tl.length,
+                                     BNX2X_VF_TLV_LIST_END,
+                                     sizeof(struct channel_list_end_tlv));
 
-                       bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
-                       if (reply->status != BNX2X_VF_STATUS_SUCCESS)
+                       rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+                       if (rc || reply->status != BNX2X_VF_STATUS_SUCCESS)
                                PMD_DRV_LOG(ERR,
                                            "Bad reply for vf_q %d teardown", i);
+
+                       bnx2x_vf_finalize(sc, &query_op->first_tlv);
                }
 
                bnx2x_vf_set_mac(sc, false);
 
                query = &sc->vf2pf_mbox->query[0].close;
-               bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_CLOSE,
-                               sizeof(*query));
+               bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_CLOSE,
+                             sizeof(*query));
 
                query->vf_id = vf_id;
 
-               BNX2X_TLV_APPEND(query, query->first_tlv.length,
-                               BNX2X_VF_TLV_LIST_END,
-                               sizeof(struct channel_list_end_tlv));
+               bnx2x_add_tlv(sc, query, query->first_tlv.tl.length,
+                             BNX2X_VF_TLV_LIST_END,
+                             sizeof(struct channel_list_end_tlv));
 
-               bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
-               if (reply->status != BNX2X_VF_STATUS_SUCCESS)
+               rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+               if (rc || reply->status != BNX2X_VF_STATUS_SUCCESS)
                        PMD_DRV_LOG(ERR,
                                    "Bad reply from PF for close message");
+
+               bnx2x_vf_finalize(sc, &query->first_tlv);
        }
 }
 
@@ -466,10 +494,11 @@ bnx2x_vf_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, int lead
        struct vf_setup_q_tlv *query;
        struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply;
        uint16_t flags = bnx2x_vf_q_flags(leading);
+       int rc;
 
        query = &sc->vf2pf_mbox->query[0].setup_q;
-       bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_SETUP_Q,
-                       sizeof(*query));
+       bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_SETUP_Q,
+                     sizeof(*query));
 
        query->vf_qid = fp->index;
        query->param_valid = VF_RXQ_VALID | VF_TXQ_VALID;
@@ -477,17 +506,22 @@ bnx2x_vf_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, int lead
        bnx2x_vf_rx_q_prep(sc, fp, &query->rxq, flags);
        bnx2x_vf_tx_q_prep(sc, fp, &query->txq, flags);
 
-       BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END,
-                       sizeof(struct channel_list_end_tlv));
+       bnx2x_add_tlv(sc, query, query->first_tlv.tl.length,
+                     BNX2X_VF_TLV_LIST_END,
+                     sizeof(struct channel_list_end_tlv));
 
-       bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+       rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+       if (rc)
+               goto out;
        if (reply->status != BNX2X_VF_STATUS_SUCCESS) {
                PMD_DRV_LOG(ERR, "Failed to setup VF queue[%d]",
                                 fp->index);
-               return -EINVAL;
+               rc = -EINVAL;
        }
+out:
+       bnx2x_vf_finalize(sc, &query->first_tlv);
 
-       return 0;
+       return rc;
 }
 
 int
@@ -495,9 +529,10 @@ bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set)
 {
        struct vf_set_q_filters_tlv *query;
        struct vf_common_reply_tlv *reply;
+       int rc;
 
        query = &sc->vf2pf_mbox->query[0].set_q_filters;
-       bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_SET_Q_FILTERS,
+       bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_SET_Q_FILTERS,
                        sizeof(*query));
 
        query->vf_qid = sc->fp->index;
@@ -511,10 +546,13 @@ bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set)
 
        rte_memcpy(query->filters[0].mac, sc->link_params.mac_addr, ETH_ALEN);
 
-       BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END,
-                       sizeof(struct channel_list_end_tlv));
+       bnx2x_add_tlv(sc, query, query->first_tlv.tl.length,
+                     BNX2X_VF_TLV_LIST_END,
+                     sizeof(struct channel_list_end_tlv));
 
-       bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+       rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+       if (rc)
+               goto out;
        reply = &sc->vf2pf_mbox->resp.common_reply;
 
        while (BNX2X_VF_STATUS_FAILURE == reply->status &&
@@ -525,16 +563,20 @@ bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set)
                rte_memcpy(query->filters[0].mac, sc->pf2vf_bulletin->mac,
                                ETH_ALEN);
 
-               bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+               rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+               if (rc)
+                       goto out;
        }
 
        if (BNX2X_VF_STATUS_SUCCESS != reply->status) {
                PMD_DRV_LOG(ERR, "Bad reply from PF for SET MAC message: %d",
                                reply->status);
-               return -EINVAL;
+               rc = -EINVAL;
        }
+out:
+       bnx2x_vf_finalize(sc, &query->first_tlv);
 
-       return 0;
+       return rc;
 }
 
 int
@@ -543,15 +585,17 @@ bnx2x_vf_config_rss(struct bnx2x_softc *sc,
 {
        struct vf_rss_tlv *query;
        struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply;
+       int rc;
 
        query = &sc->vf2pf_mbox->query[0].update_rss;
 
-       bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_UPDATE_RSS,
+       bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_UPDATE_RSS,
                        sizeof(*query));
 
        /* add list termination tlv */
-       BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END,
-                       sizeof(struct channel_list_end_tlv));
+       bnx2x_add_tlv(sc, query, query->first_tlv.tl.length,
+                     BNX2X_VF_TLV_LIST_END,
+                     sizeof(struct channel_list_end_tlv));
 
        rte_memcpy(query->rss_key, params->rss_key, sizeof(params->rss_key));
        query->rss_key_size = T_ETH_RSS_KEY;
@@ -562,13 +606,18 @@ bnx2x_vf_config_rss(struct bnx2x_softc *sc,
        query->rss_result_mask = params->rss_result_mask;
        query->rss_flags = params->rss_flags;
 
-       bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+       rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+       if (rc)
+               goto out;
+
        if (reply->status != BNX2X_VF_STATUS_SUCCESS) {
                PMD_DRV_LOG(ERR, "Failed to configure RSS");
-               return -EINVAL;
+               rc = -EINVAL;
        }
+out:
+       bnx2x_vf_finalize(sc, &query->first_tlv);
 
-       return 0;
+       return rc;
 }
 
 int
@@ -576,27 +625,56 @@ bnx2x_vf_set_rx_mode(struct bnx2x_softc *sc)
 {
        struct vf_set_q_filters_tlv *query;
        struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply;
-       unsigned long tx_mask;
+       int rc;
 
        query = &sc->vf2pf_mbox->query[0].set_q_filters;
-       bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_SET_Q_FILTERS,
+       bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_SET_Q_FILTERS,
                        sizeof(*query));
 
        query->vf_qid = 0;
        query->flags = BNX2X_VF_RX_MASK_CHANGED;
 
-       if (bnx2x_fill_accept_flags(sc, sc->rx_mode, &query->rx_mask, &tx_mask)) {
-               return -EINVAL;
+       switch (sc->rx_mode) {
+       case BNX2X_RX_MODE_NONE: /* no Rx */
+               query->rx_mask = VFPF_RX_MASK_ACCEPT_NONE;
+               break;
+       case BNX2X_RX_MODE_NORMAL:
+               query->rx_mask = VFPF_RX_MASK_ACCEPT_MATCHED_MULTICAST;
+               query->rx_mask |= VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST;
+               query->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST;
+               break;
+       case BNX2X_RX_MODE_ALLMULTI:
+               query->rx_mask = VFPF_RX_MASK_ACCEPT_ALL_MULTICAST;
+               query->rx_mask |= VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST;
+               query->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST;
+               break;
+       case BNX2X_RX_MODE_ALLMULTI_PROMISC:
+       case BNX2X_RX_MODE_PROMISC:
+               query->rx_mask = VFPF_RX_MASK_ACCEPT_ALL_UNICAST;
+               query->rx_mask |= VFPF_RX_MASK_ACCEPT_ALL_MULTICAST;
+               query->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST;
+               break;
+       default:
+               PMD_DRV_LOG(ERR, "BAD rx mode (%d)", sc->rx_mode);
+               rc = -EINVAL;
+               goto out;
        }
 
-       BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END,
-                       sizeof(struct channel_list_end_tlv));
+       bnx2x_add_tlv(sc, query, query->first_tlv.tl.length,
+                     BNX2X_VF_TLV_LIST_END,
+                     sizeof(struct channel_list_end_tlv));
+
+       rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
+       if (rc)
+               goto out;
 
-       bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
        if (reply->status != BNX2X_VF_STATUS_SUCCESS) {
                PMD_DRV_LOG(ERR, "Failed to set RX mode");
-               return -EINVAL;
+               rc = -EINVAL;
        }
 
-       return 0;
+out:
+       bnx2x_vf_finalize(sc, &query->first_tlv);
+
+       return rc;
 }
index f854d81..955ea98 100644 (file)
@@ -40,6 +40,13 @@ struct vf_resource_query {
 
 #define TLV_BUFFER_SIZE                        1024
 
+#define VFPF_RX_MASK_ACCEPT_NONE               0x00000000
+#define VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST    0x00000001
+#define VFPF_RX_MASK_ACCEPT_MATCHED_MULTICAST  0x00000002
+#define VFPF_RX_MASK_ACCEPT_ALL_UNICAST                0x00000004
+#define VFPF_RX_MASK_ACCEPT_ALL_MULTICAST      0x00000008
+#define VFPF_RX_MASK_ACCEPT_BROADCAST          0x00000010
+
 /* general tlv header (used for both vf->pf request and pf->vf response) */
 struct channel_tlv {
        uint16_t type;
@@ -47,8 +54,7 @@ struct channel_tlv {
 };
 
 struct vf_first_tlv {
-       uint16_t type;
-       uint16_t length;
+       struct channel_tlv tl;
        uint32_t reply_offset;
 };
 
@@ -58,16 +64,14 @@ struct tlv_buffer_size {
 
 /* tlv struct for all PF replies except acquire */
 struct vf_common_reply_tlv {
-       uint16_t type;
-       uint16_t length;
+       struct channel_tlv tl;
        uint8_t status;
        uint8_t pad[3];
 };
 
 /* used to terminate and pad a tlv list */
 struct channel_list_end_tlv {
-       uint16_t type;
-       uint16_t length;
+       struct channel_tlv tl;
        uint32_t pad;
 };
 
@@ -327,7 +331,6 @@ struct bnx2x_vf_mbx_msg {
        union resp_tlvs resp;
 };
 
-void bnx2x_add_tlv(void *tlvs_list, uint16_t offset, uint16_t type, uint16_t length);
 int bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set);
 int bnx2x_vf_config_rss(struct bnx2x_softc *sc, struct ecore_config_rss_params *params);
 
diff --git a/src/dpdk/drivers/net/bnx2x/debug.c b/src/dpdk/drivers/net/bnx2x/debug.c
deleted file mode 100644 (file)
index cc50845..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-/*-
- * Copyright (c) 2007-2013 QLogic Corporation. All rights reserved.
- *
- * Eric Davis        <[email protected]>
- * David Christensen <[email protected]>
- * Gary Zambrano     <[email protected]>
- *
- * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
- * Copyright (c) 2015 QLogic Corporation.
- * All rights reserved.
- * www.qlogic.com
- *
- * See LICENSE.bnx2x_pmd for copyright and licensing details.
- */
-
-#include "bnx2x.h"
-
-
-/*
- * Debug versions of the 8/16/32 bit OS register read/write functions to
- * capture/display values read/written from/to the controller.
- */
-void
-bnx2x_reg_write8(struct bnx2x_softc *sc, size_t offset, uint8_t val)
-{
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x", (unsigned long)offset, val);
-       *((volatile uint8_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val;
-}
-
-void
-bnx2x_reg_write16(struct bnx2x_softc *sc, size_t offset, uint16_t val)
-{
-       if ((offset % 2) != 0) {
-               PMD_DRV_LOG(NOTICE, "Unaligned 16-bit write to 0x%08lx",
-                           (unsigned long)offset);
-       }
-
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%04x", (unsigned long)offset, val);
-       *((volatile uint16_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val;
-}
-
-void
-bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val)
-{
-       if ((offset % 4) != 0) {
-               PMD_DRV_LOG(NOTICE, "Unaligned 32-bit write to 0x%08lx",
-                           (unsigned long)offset);
-       }
-
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", (unsigned long)offset, val);
-       *((volatile uint32_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val;
-}
-
-uint8_t
-bnx2x_reg_read8(struct bnx2x_softc *sc, size_t offset)
-{
-       uint8_t val;
-
-       val = (uint8_t)(*((volatile uint8_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)));
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x", (unsigned long)offset, val);
-
-       return val;
-}
-
-uint16_t
-bnx2x_reg_read16(struct bnx2x_softc *sc, size_t offset)
-{
-       uint16_t val;
-
-       if ((offset % 2) != 0) {
-               PMD_DRV_LOG(NOTICE, "Unaligned 16-bit read from 0x%08lx",
-                           (unsigned long)offset);
-       }
-
-       val = (uint16_t)(*((volatile uint16_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)));
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", (unsigned long)offset, val);
-
-       return val;
-}
-
-uint32_t
-bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset)
-{
-       uint32_t val;
-
-       if ((offset % 4) != 0) {
-               PMD_DRV_LOG(NOTICE, "Unaligned 32-bit read from 0x%08lx",
-                           (unsigned long)offset);
-               return 0;
-       }
-
-       val = (uint32_t)(*((volatile uint32_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)));
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", (unsigned long)offset, val);
-
-       return val;
-}
index 149cc97..5329396 100644 (file)
@@ -1586,26 +1586,6 @@ static elink_status_t elink_emac_enable(struct elink_params *params,
        /* enable emac and not bmac */
        REG_WR(sc, NIG_REG_EGRESS_EMAC0_PORT + port * 4, 1);
 
-#ifdef ELINK_INCLUDE_EMUL
-       /* for paladium */
-       if (CHIP_REV_IS_EMUL(sc)) {
-               /* Use lane 1 (of lanes 0-3) */
-               REG_WR(sc, NIG_REG_XGXS_LANE_SEL_P0 + port * 4, 1);
-               REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port * 4, 1);
-       }
-       /* for fpga */
-       else
-#endif
-#ifdef ELINK_INCLUDE_FPGA
-       if (CHIP_REV_IS_FPGA(sc)) {
-               /* Use lane 1 (of lanes 0-3) */
-               PMD_DRV_LOG(DEBUG, "elink_emac_enable: Setting FPGA");
-
-               REG_WR(sc, NIG_REG_XGXS_LANE_SEL_P0 + port * 4, 1);
-               REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port * 4, 0);
-       } else
-#endif
-               /* ASIC */
        if (vars->phy_flags & PHY_XGXS_FLAG) {
                uint32_t ser_lane = ((params->lane_config &
                                      PORT_HW_CFG_LANE_SWAP_CFG_MASTER_MASK) >>
@@ -1628,39 +1608,28 @@ static elink_status_t elink_emac_enable(struct elink_params *params,
        elink_bits_en(sc, emac_base + EMAC_REG_EMAC_TX_MODE,
                      EMAC_TX_MODE_RESET);
 
-#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA)
-       if (CHIP_REV_IS_SLOW(sc)) {
-               /* config GMII mode */
-               val = REG_RD(sc, emac_base + EMAC_REG_EMAC_MODE);
-               elink_cb_reg_write(sc, emac_base + EMAC_REG_EMAC_MODE,
-                                  (val | EMAC_MODE_PORT_GMII));
-       } else {                /* ASIC */
-#endif
-               /* pause enable/disable */
-               elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_RX_MODE,
-                              EMAC_RX_MODE_FLOW_EN);
+       /* pause enable/disable */
+       elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_RX_MODE,
+                      EMAC_RX_MODE_FLOW_EN);
 
-               elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_TX_MODE,
-                              (EMAC_TX_MODE_EXT_PAUSE_EN |
-                               EMAC_TX_MODE_FLOW_EN));
-               if (!(params->feature_config_flags &
-                     ELINK_FEATURE_CONFIG_PFC_ENABLED)) {
-                       if (vars->flow_ctrl & ELINK_FLOW_CTRL_RX)
-                               elink_bits_en(sc, emac_base +
-                                             EMAC_REG_EMAC_RX_MODE,
-                                             EMAC_RX_MODE_FLOW_EN);
-
-                       if (vars->flow_ctrl & ELINK_FLOW_CTRL_TX)
-                               elink_bits_en(sc, emac_base +
-                                             EMAC_REG_EMAC_TX_MODE,
-                                             (EMAC_TX_MODE_EXT_PAUSE_EN |
-                                              EMAC_TX_MODE_FLOW_EN));
-               } else
-                       elink_bits_en(sc, emac_base + EMAC_REG_EMAC_TX_MODE,
-                                     EMAC_TX_MODE_FLOW_EN);
-#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA)
-       }
-#endif
+       elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_TX_MODE,
+                      (EMAC_TX_MODE_EXT_PAUSE_EN |
+                       EMAC_TX_MODE_FLOW_EN));
+       if (!(params->feature_config_flags &
+             ELINK_FEATURE_CONFIG_PFC_ENABLED)) {
+               if (vars->flow_ctrl & ELINK_FLOW_CTRL_RX)
+                       elink_bits_en(sc, emac_base +
+                                     EMAC_REG_EMAC_RX_MODE,
+                                     EMAC_RX_MODE_FLOW_EN);
+
+               if (vars->flow_ctrl & ELINK_FLOW_CTRL_TX)
+                       elink_bits_en(sc, emac_base +
+                                     EMAC_REG_EMAC_TX_MODE,
+                                     (EMAC_TX_MODE_EXT_PAUSE_EN |
+                                      EMAC_TX_MODE_FLOW_EN));
+       } else
+               elink_bits_en(sc, emac_base + EMAC_REG_EMAC_TX_MODE,
+                             EMAC_TX_MODE_FLOW_EN);
 
        /* KEEP_VLAN_TAG, promiscuous */
        val = REG_RD(sc, emac_base + EMAC_REG_EMAC_RX_MODE);
@@ -1727,17 +1696,7 @@ static elink_status_t elink_emac_enable(struct elink_params *params,
        REG_WR(sc, NIG_REG_EMAC0_PAUSE_OUT_EN + port * 4, val);
        REG_WR(sc, NIG_REG_EGRESS_EMAC0_OUT_EN + port * 4, 0x1);
 
-#ifdef ELINK_INCLUDE_EMUL
-       if (CHIP_REV_IS_EMUL(sc)) {
-               /* Take the BigMac out of reset */
-               REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET,
-                      (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
-
-               /* Enable access for bmac registers */
-               REG_WR(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4, 0x1);
-       } else
-#endif
-               REG_WR(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4, 0x0);
+       REG_WR(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4, 0x0);
 
        vars->mac_type = ELINK_MAC_TYPE_EMAC;
        return ELINK_STATUS_OK;
@@ -2137,15 +2096,6 @@ static elink_status_t elink_bmac1_enable(struct elink_params *params,
        wb_data[1] = 0;
        REG_WR_DMAE(sc, bmac_addr + BIGMAC_REGISTER_RX_LLFC_MSG_FLDS,
                    wb_data, 2);
-#ifdef ELINK_INCLUDE_EMUL
-       /* Fix for emulation */
-       if (CHIP_REV_IS_EMUL(sc)) {
-               wb_data[0] = 0xf000;
-               wb_data[1] = 0;
-               REG_WR_DMAE(sc, bmac_addr + BIGMAC_REGISTER_TX_PAUSE_THRESHOLD,
-                           wb_data, 2);
-       }
-#endif
 
        return ELINK_STATUS_OK;
 }
@@ -5922,11 +5872,6 @@ elink_status_t elink_set_led(struct elink_params *params,
                                                          params, mode);
                }
        }
-#ifdef ELINK_INCLUDE_EMUL
-       if (params->feature_config_flags &
-           ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC)
-               return rc;
-#endif
 
        switch (mode) {
        case ELINK_LED_MODE_FRONT_PANEL_OFF:
@@ -6645,7 +6590,7 @@ static elink_status_t elink_8073_8727_external_rom_boot(struct bnx2x_softc *sc,
                                                        uint8_t port)
 {
        uint32_t count = 0;
-       uint16_t fw_ver1, fw_msgout;
+       uint16_t fw_ver1 = 0, fw_msgout;
        elink_status_t rc = ELINK_STATUS_OK;
 
        /* Boot port from external ROM  */
@@ -11671,10 +11616,7 @@ elink_status_t elink_phy_probe(struct elink_params * params)
        struct elink_phy *phy;
        params->num_phys = 0;
        PMD_DRV_LOG(DEBUG, "Begin phy probe");
-#ifdef ELINK_INCLUDE_EMUL
-       if (CHIP_REV_IS_EMUL(sc))
-               return ELINK_STATUS_OK;
-#endif
+
        phy_config_swapped = params->multi_phy_config &
            PORT_HW_CFG_PHY_SWAPPED_ENABLED;
 
@@ -11739,182 +11681,6 @@ elink_status_t elink_phy_probe(struct elink_params * params)
        return ELINK_STATUS_OK;
 }
 
-#ifdef ELINK_INCLUDE_EMUL
-static elink_status_t elink_init_e3_emul_mac(struct elink_params *params,
-                                            struct elink_vars *vars)
-{
-       struct bnx2x_softc *sc = params->sc;
-       vars->line_speed = params->req_line_speed[0];
-       /* In case link speed is auto, set speed the highest as possible */
-       if (params->req_line_speed[0] == ELINK_SPEED_AUTO_NEG) {
-               if (params->feature_config_flags &
-                   ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC)
-                       vars->line_speed = ELINK_SPEED_2500;
-               else if (elink_is_4_port_mode(sc))
-                       vars->line_speed = ELINK_SPEED_10000;
-               else
-                       vars->line_speed = ELINK_SPEED_20000;
-       }
-       if (vars->line_speed < ELINK_SPEED_10000) {
-               if ((params->feature_config_flags &
-                    ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC)) {
-                       PMD_DRV_LOG(DEBUG, "Invalid line speed %d while UMAC is"
-                                   " disabled!", params->req_line_speed[0]);
-                       return ELINK_STATUS_ERROR;
-               }
-               switch (vars->line_speed) {
-               case ELINK_SPEED_10:
-                       vars->link_status = ELINK_LINK_10TFD;
-                       break;
-               case ELINK_SPEED_100:
-                       vars->link_status = ELINK_LINK_100TXFD;
-                       break;
-               case ELINK_SPEED_1000:
-                       vars->link_status = ELINK_LINK_1000TFD;
-                       break;
-               case ELINK_SPEED_2500:
-                       vars->link_status = ELINK_LINK_2500TFD;
-                       break;
-               default:
-                       PMD_DRV_LOG(DEBUG, "Invalid line speed %d for UMAC",
-                                   vars->line_speed);
-                       return ELINK_STATUS_ERROR;
-               }
-               vars->link_status |= LINK_STATUS_LINK_UP;
-
-               if (params->loopback_mode == ELINK_LOOPBACK_UMAC)
-                       elink_umac_enable(params, vars, 1);
-               else
-                       elink_umac_enable(params, vars, 0);
-       } else {
-               /* Link speed >= 10000 requires XMAC enabled */
-               if (params->feature_config_flags &
-                   ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC) {
-                       PMD_DRV_LOG(DEBUG, "Invalid line speed %d while XMAC is"
-                                   " disabled!", params->req_line_speed[0]);
-                       return ELINK_STATUS_ERROR;
-               }
-               /* Check link speed */
-               switch (vars->line_speed) {
-               case ELINK_SPEED_10000:
-                       vars->link_status = ELINK_LINK_10GTFD;
-                       break;
-               case ELINK_SPEED_20000:
-                       vars->link_status = ELINK_LINK_20GTFD;
-                       break;
-               default:
-                       PMD_DRV_LOG(DEBUG, "Invalid line speed %d for XMAC",
-                                   vars->line_speed);
-                       return ELINK_STATUS_ERROR;
-               }
-               vars->link_status |= LINK_STATUS_LINK_UP;
-               if (params->loopback_mode == ELINK_LOOPBACK_XMAC)
-                       elink_xmac_enable(params, vars, 1);
-               else
-                       elink_xmac_enable(params, vars, 0);
-       }
-       return ELINK_STATUS_OK;
-}
-
-static elink_status_t elink_init_emul(struct elink_params *params,
-                                     struct elink_vars *vars)
-{
-       struct bnx2x_softc *sc = params->sc;
-       if (CHIP_IS_E3(sc)) {
-               if (elink_init_e3_emul_mac(params, vars) != ELINK_STATUS_OK)
-                       return ELINK_STATUS_ERROR;
-       } else {
-               if (params->feature_config_flags &
-                   ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC) {
-                       vars->line_speed = ELINK_SPEED_1000;
-                       vars->link_status = (LINK_STATUS_LINK_UP |
-                                            ELINK_LINK_1000XFD);
-                       if (params->loopback_mode == ELINK_LOOPBACK_EMAC)
-                               elink_emac_enable(params, vars, 1);
-                       else
-                               elink_emac_enable(params, vars, 0);
-               } else {
-                       vars->line_speed = ELINK_SPEED_10000;
-                       vars->link_status = (LINK_STATUS_LINK_UP |
-                                            ELINK_LINK_10GTFD);
-                       if (params->loopback_mode == ELINK_LOOPBACK_BMAC)
-                               elink_bmac_enable(params, vars, 1, 1);
-                       else
-                               elink_bmac_enable(params, vars, 0, 1);
-               }
-       }
-       vars->link_up = 1;
-       vars->duplex = DUPLEX_FULL;
-       vars->flow_ctrl = ELINK_FLOW_CTRL_NONE;
-
-       if (CHIP_IS_E1x(sc))
-               elink_pbf_update(params, vars->flow_ctrl, vars->line_speed);
-       /* Disable drain */
-       REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
-
-       /* update shared memory */
-       elink_update_mng(params, vars->link_status);
-       return ELINK_STATUS_OK;
-}
-#endif
-#ifdef ELINK_INCLUDE_FPGA
-static elink_status_t elink_init_fpga(struct elink_params *params,
-                                     struct elink_vars *vars)
-{
-       /* Enable on E1.5 FPGA */
-       struct bnx2x_softc *sc = params->sc;
-       vars->duplex = DUPLEX_FULL;
-       vars->flow_ctrl = ELINK_FLOW_CTRL_NONE;
-       vars->flow_ctrl = (ELINK_FLOW_CTRL_TX | ELINK_FLOW_CTRL_RX);
-       vars->link_status |= (LINK_STATUS_TX_FLOW_CONTROL_ENABLED |
-                             LINK_STATUS_RX_FLOW_CONTROL_ENABLED);
-       if (CHIP_IS_E3(sc)) {
-               vars->line_speed = params->req_line_speed[0];
-               switch (vars->line_speed) {
-               case ELINK_SPEED_AUTO_NEG:
-                       vars->line_speed = ELINK_SPEED_2500;
-               case ELINK_SPEED_2500:
-                       vars->link_status = ELINK_LINK_2500TFD;
-                       break;
-               case ELINK_SPEED_1000:
-                       vars->link_status = ELINK_LINK_1000XFD;
-                       break;
-               case ELINK_SPEED_100:
-                       vars->link_status = ELINK_LINK_100TXFD;
-                       break;
-               case ELINK_SPEED_10:
-                       vars->link_status = ELINK_LINK_10TFD;
-                       break;
-               default:
-                       PMD_DRV_LOG(DEBUG, "Invalid link speed %d",
-                                   params->req_line_speed[0]);
-                       return ELINK_STATUS_ERROR;
-               }
-               vars->link_status |= LINK_STATUS_LINK_UP;
-               if (params->loopback_mode == ELINK_LOOPBACK_UMAC)
-                       elink_umac_enable(params, vars, 1);
-               else
-                       elink_umac_enable(params, vars, 0);
-       } else {
-               vars->line_speed = ELINK_SPEED_10000;
-               vars->link_status = (LINK_STATUS_LINK_UP | ELINK_LINK_10GTFD);
-               if (params->loopback_mode == ELINK_LOOPBACK_EMAC)
-                       elink_emac_enable(params, vars, 1);
-               else
-                       elink_emac_enable(params, vars, 0);
-       }
-       vars->link_up = 1;
-
-       if (CHIP_IS_E1x(sc))
-               elink_pbf_update(params, vars->flow_ctrl, vars->line_speed);
-       /* Disable drain */
-       REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
-
-       /* Update shared memory */
-       elink_update_mng(params, vars->link_status);
-       return ELINK_STATUS_OK;
-}
-#endif
 static void elink_init_bmac_loopback(struct elink_params *params,
                                     struct elink_vars *vars)
 {
@@ -12236,12 +12002,8 @@ elink_status_t elink_phy_init(struct elink_params *params,
                        ELINK_NIG_MASK_XGXS0_LINK10G |
                        ELINK_NIG_MASK_SERDES0_LINK_STATUS |
                        ELINK_NIG_MASK_MI_INT));
-#ifdef ELINK_INCLUDE_EMUL
-       if (!(params->feature_config_flags &
-             ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC))
-#endif
 
-               elink_emac_init(params);
+       elink_emac_init(params);
 
        if (params->feature_config_flags & ELINK_FEATURE_CONFIG_PFC_ENABLED)
                vars->link_status |= LINK_STATUS_PFC_ENABLED;
@@ -12253,45 +12015,36 @@ elink_status_t elink_phy_init(struct elink_params *params,
        set_phy_vars(params, vars);
 
        PMD_DRV_LOG(DEBUG, "Num of phys on board: %d", params->num_phys);
-#ifdef ELINK_INCLUDE_FPGA
-       if (CHIP_REV_IS_FPGA(sc)) {
-               return elink_init_fpga(params, vars);
-       } else
-#endif
-#ifdef ELINK_INCLUDE_EMUL
-       if (CHIP_REV_IS_EMUL(sc)) {
-               return elink_init_emul(params, vars);
-       } else
-#endif
-               switch (params->loopback_mode) {
-               case ELINK_LOOPBACK_BMAC:
-                       elink_init_bmac_loopback(params, vars);
-                       break;
-               case ELINK_LOOPBACK_EMAC:
-                       elink_init_emac_loopback(params, vars);
-                       break;
-               case ELINK_LOOPBACK_XMAC:
-                       elink_init_xmac_loopback(params, vars);
-                       break;
-               case ELINK_LOOPBACK_UMAC:
-                       elink_init_umac_loopback(params, vars);
-                       break;
-               case ELINK_LOOPBACK_XGXS:
-               case ELINK_LOOPBACK_EXT_PHY:
-                       elink_init_xgxs_loopback(params, vars);
-                       break;
-               default:
-                       if (!CHIP_IS_E3(sc)) {
-                               if (params->switch_cfg == ELINK_SWITCH_CFG_10G)
-                                       elink_xgxs_deassert(params);
-                               else
-                                       elink_serdes_deassert(sc, params->port);
-                       }
-                       elink_link_initialize(params, vars);
-                       DELAY(1000 * 30);
-                       elink_link_int_enable(params);
-                       break;
+
+       switch (params->loopback_mode) {
+       case ELINK_LOOPBACK_BMAC:
+               elink_init_bmac_loopback(params, vars);
+               break;
+       case ELINK_LOOPBACK_EMAC:
+               elink_init_emac_loopback(params, vars);
+               break;
+       case ELINK_LOOPBACK_XMAC:
+               elink_init_xmac_loopback(params, vars);
+               break;
+       case ELINK_LOOPBACK_UMAC:
+               elink_init_umac_loopback(params, vars);
+               break;
+       case ELINK_LOOPBACK_XGXS:
+       case ELINK_LOOPBACK_EXT_PHY:
+               elink_init_xgxs_loopback(params, vars);
+               break;
+       default:
+               if (!CHIP_IS_E3(sc)) {
+                       if (params->switch_cfg == ELINK_SWITCH_CFG_10G)
+                               elink_xgxs_deassert(params);
+                       else
+                               elink_serdes_deassert(sc, params->port);
                }
+               elink_link_initialize(params, vars);
+               DELAY(1000 * 30);
+               elink_link_int_enable(params);
+               break;
+       }
        elink_update_mng(params, vars->link_status);
 
        elink_update_mng_eee(params, vars->eee_status);
@@ -12325,22 +12078,12 @@ static elink_status_t elink_link_reset(struct elink_params *params,
                REG_WR(sc, NIG_REG_BMAC0_OUT_EN + port * 4, 0);
                REG_WR(sc, NIG_REG_EGRESS_EMAC0_OUT_EN + port * 4, 0);
        }
-#ifdef ELINK_INCLUDE_EMUL
-       /* Stop BigMac rx */
-       if (!(params->feature_config_flags &
-             ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC))
-#endif
-               if (!CHIP_IS_E3(sc))
-                       elink_set_bmac_rx(sc, port, 0);
-#ifdef ELINK_INCLUDE_EMUL
-       /* Stop XMAC/UMAC rx */
-       if (!(params->feature_config_flags &
-             ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC))
-#endif
-               if (CHIP_IS_E3(sc) && !CHIP_REV_IS_FPGA(sc)) {
-                       elink_set_xmac_rxtx(params, 0);
-                       elink_set_umac_rxtx(params, 0);
-               }
+       if (!CHIP_IS_E3(sc))
+               elink_set_bmac_rx(sc, port, 0);
+       if (CHIP_IS_E3(sc) && !CHIP_REV_IS_FPGA(sc)) {
+               elink_set_xmac_rxtx(params, 0);
+               elink_set_umac_rxtx(params, 0);
+       }
        /* Disable emac */
        if (!CHIP_IS_E3(sc))
                REG_WR(sc, NIG_REG_NIG_EMAC0_EN + port * 4, 0);
@@ -12376,14 +12119,11 @@ static elink_status_t elink_link_reset(struct elink_params *params,
                elink_bits_dis(sc, NIG_REG_LATCH_BC_0 + port * 4,
                               1 << ELINK_NIG_LATCH_BC_ENABLE_MI_INT);
        }
-#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA)
-       if (!CHIP_REV_IS_SLOW(sc))
-#endif
-               if (params->phy[ELINK_INT_PHY].link_reset)
-                       params->phy[ELINK_INT_PHY].link_reset(&params->
-                                                             phy
-                                                             [ELINK_INT_PHY],
-                                                             params);
+       if (params->phy[ELINK_INT_PHY].link_reset)
+               params->phy[ELINK_INT_PHY].link_reset(&params->
+                                                     phy
+                                                     [ELINK_INT_PHY],
+                                                     params);
 
        /* Disable nig ingress interface */
        if (!CHIP_IS_E3(sc)) {
@@ -12868,10 +12608,6 @@ elink_status_t elink_common_init_phy(struct bnx2x_softc * sc,
        uint32_t phy_ver, val;
        uint8_t phy_index = 0;
        uint32_t ext_phy_type, ext_phy_config;
-#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA)
-       if (CHIP_REV_IS_EMUL(sc) || CHIP_REV_IS_FPGA(sc))
-               return ELINK_STATUS_OK;
-#endif
 
        elink_set_mdio_clk(sc, GRCBASE_EMAC0);
        elink_set_mdio_clk(sc, GRCBASE_EMAC1);
index c4f886a..9401b7c 100644 (file)
@@ -359,10 +359,6 @@ struct elink_params {
 #define ELINK_FEATURE_CONFIG_PFC_ENABLED                       (1<<1)
 #define ELINK_FEATURE_CONFIG_BC_SUPPORTS_OPT_MDL_VRFY          (1<<2)
 #define ELINK_FEATURE_CONFIG_BC_SUPPORTS_DUAL_PHY_OPT_MDL_VRFY (1<<3)
-#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC                 (1<<4)
-#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC                 (1<<5)
-#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC                 (1<<6)
-#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC                 (1<<7)
 #define ELINK_FEATURE_CONFIG_BC_SUPPORTS_AFEX                  (1<<8)
 #define ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED                (1<<9)
 #define ELINK_FEATURE_CONFIG_BC_SUPPORTS_SFP_TX_DISABLED       (1<<10)
index 203ebe9..f552d96 100644 (file)
@@ -37,6 +37,7 @@
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
 #include <rte_tcp.h>
+#include <rte_vdev.h>
 
 #include "rte_eth_bond.h"
 #include "rte_eth_bond_private.h"
@@ -44,8 +45,6 @@
 
 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
 
-const char pmd_bond_driver_name[] = "rte_bond_pmd";
-
 int
 check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev)
 {
@@ -54,7 +53,7 @@ check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev)
                return -1;
 
        /* return 0 if driver name matches */
-       return eth_dev->data->drv_name != pmd_bond_driver_name;
+       return eth_dev->data->drv_name != pmd_bond_drv.driver.name;
 }
 
 int
@@ -166,6 +165,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
 {
        struct bond_dev_private *internals = NULL;
        struct rte_eth_dev *eth_dev = NULL;
+       uint32_t vlan_filter_bmp_size;
 
        /* now do all data allocation - for eth_dev structure, dummy pci driver
         * and internal (private) data
@@ -189,7 +189,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
        }
 
        /* reserve an ethdev entry */
-       eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+       eth_dev = rte_eth_dev_allocate(name);
        if (eth_dev == NULL) {
                RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
                goto err;
@@ -199,10 +199,6 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
        eth_dev->data->nb_rx_queues = (uint16_t)1;
        eth_dev->data->nb_tx_queues = (uint16_t)1;
 
-       TAILQ_INIT(&(eth_dev->link_intr_cbs));
-
-       eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
-
        eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
                        socket_id);
        if (eth_dev->data->mac_addrs == NULL) {
@@ -210,17 +206,12 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
                goto err;
        }
 
-       eth_dev->data->dev_started = 0;
-       eth_dev->data->promiscuous = 0;
-       eth_dev->data->scattered_rx = 0;
-       eth_dev->data->all_multicast = 0;
-
        eth_dev->dev_ops = &default_dev_ops;
        eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
                RTE_ETH_DEV_DETACHABLE;
        eth_dev->driver = NULL;
        eth_dev->data->kdrv = RTE_KDRV_NONE;
-       eth_dev->data->drv_name = pmd_bond_driver_name;
+       eth_dev->data->drv_name = pmd_bond_drv.driver.name;
        eth_dev->data->numa_node =  socket_id;
 
        rte_spinlock_init(&internals->lock);
@@ -260,6 +251,27 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
                goto err;
        }
 
+       vlan_filter_bmp_size =
+               rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
+       internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
+                                                  RTE_CACHE_LINE_SIZE);
+       if (internals->vlan_filter_bmpmem == NULL) {
+               RTE_BOND_LOG(ERR,
+                            "Failed to allocate vlan bitmap for bonded device %u\n",
+                            eth_dev->data->port_id);
+               goto err;
+       }
+
+       internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
+                       internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
+       if (internals->vlan_filter_bmp == NULL) {
+               RTE_BOND_LOG(ERR,
+                            "Failed to init vlan bitmap for bonded device %u\n",
+                            eth_dev->data->port_id);
+               rte_free(internals->vlan_filter_bmpmem);
+               goto err;
+       }
+
        return eth_dev->data->port_id;
 
 err:
@@ -299,6 +311,9 @@ rte_eth_bond_free(const char *name)
        eth_dev->rx_pkt_burst = NULL;
        eth_dev->tx_pkt_burst = NULL;
 
+       internals = eth_dev->data->dev_private;
+       rte_bitmap_free(internals->vlan_filter_bmp);
+       rte_free(internals->vlan_filter_bmpmem);
        rte_free(eth_dev->data->dev_private);
        rte_free(eth_dev->data->mac_addrs);
 
@@ -307,6 +322,46 @@ rte_eth_bond_free(const char *name)
        return 0;
 }
 
+static int
+slave_vlan_filter_set(uint8_t bonded_port_id, uint8_t slave_port_id)
+{
+       struct rte_eth_dev *bonded_eth_dev;
+       struct bond_dev_private *internals;
+       int found;
+       int res = 0;
+       uint64_t slab = 0;
+       uint32_t pos = 0;
+       uint16_t first;
+
+       bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+       if (bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter == 0)
+               return 0;
+
+       internals = bonded_eth_dev->data->dev_private;
+       found = rte_bitmap_scan(internals->vlan_filter_bmp, &pos, &slab);
+       first = pos;
+
+       if (!found)
+               return 0;
+
+       do {
+               uint32_t i;
+               uint64_t mask;
+
+               for (i = 0, mask = 1;
+                    i < RTE_BITMAP_SLAB_BIT_SIZE;
+                    i ++, mask <<= 1) {
+                       if (unlikely(slab & mask))
+                               res = rte_eth_dev_vlan_filter(slave_port_id,
+                                                             (uint16_t)pos, 1);
+               }
+               found = rte_bitmap_scan(internals->vlan_filter_bmp,
+                                       &pos, &slab);
+       } while (found && first != pos && res == 0);
+
+       return res;
+}
+
 static int
 __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 {
@@ -373,21 +428,6 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
                internals->candidate_max_rx_pktlen = dev_info.max_rx_pktlen;
 
        } else {
-               /* Check slave link properties are supported if props are set,
-                * all slaves must be the same */
-               if (internals->link_props_set) {
-                       if (link_properties_valid(&(bonded_eth_dev->data->dev_link),
-                                                                         &(slave_eth_dev->data->dev_link))) {
-                               slave_eth_dev->data->dev_flags &= (~RTE_ETH_DEV_BONDED_SLAVE);
-                               RTE_BOND_LOG(ERR,
-                                               "Slave port %d link speed/duplex not supported",
-                                               slave_port_id);
-                               return -1;
-                       }
-               } else {
-                       link_properties_set(bonded_eth_dev,
-                                       &(slave_eth_dev->data->dev_link));
-               }
                internals->rx_offload_capa &= dev_info.rx_offload_capa;
                internals->tx_offload_capa &= dev_info.tx_offload_capa;
                internals->flow_type_rss_offloads &= dev_info.flow_type_rss_offloads;
@@ -442,6 +482,9 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
                                activate_slave(bonded_eth_dev, slave_port_id);
                }
        }
+
+       slave_vlan_filter_set(bonded_port_id, slave_port_id);
+
        return 0;
 
 }
index 02ecde6..3dca273 100644 (file)
@@ -54,15 +54,23 @@ const char *pmd_bond_init_valid_arguments[] = {
 static inline int
 find_port_id_by_pci_addr(const struct rte_pci_addr *pci_addr)
 {
+       struct rte_pci_device *pci_dev;
        struct rte_pci_addr *eth_pci_addr;
        unsigned i;
 
        for (i = 0; i < rte_eth_dev_count(); i++) {
 
-               if (rte_eth_devices[i].pci_dev == NULL)
+               /* Currently populated by rte_eth_copy_pci_info().
+                *
+                * TODO: Once the PCI bus has arrived we should have a better
+                * way to test for being a PCI device or not.
+                */
+               if (rte_eth_devices[i].data->kdrv == RTE_KDRV_UNKNOWN ||
+                   rte_eth_devices[i].data->kdrv == RTE_KDRV_NONE)
                        continue;
 
-               eth_pci_addr = &(rte_eth_devices[i].pci_dev->addr);
+               pci_dev = RTE_DEV_TO_PCI(rte_eth_devices[i].device);
+               eth_pci_addr = &pci_dev->addr;
 
                if (pci_addr->bus == eth_pci_addr->bus &&
                        pci_addr->devid == eth_pci_addr->devid &&
index b20a272..f3ac9e2 100644 (file)
@@ -42,7 +42,7 @@
 #include <rte_ip_frag.h>
 #include <rte_devargs.h>
 #include <rte_kvargs.h>
-#include <rte_dev.h>
+#include <rte_vdev.h>
 #include <rte_alarm.h>
 #include <rte_cycles.h>
 
@@ -122,6 +122,15 @@ bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
                        bd_rx_q->queue_id, bufs, nb_pkts);
 }
 
+static inline uint8_t
+is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
+{
+       const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
+
+       return !vlan_tci && (ethertype == ether_type_slow_be &&
+               (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
+}
+
 static uint16_t
 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
                uint16_t nb_pkts)
@@ -141,6 +150,7 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
        uint8_t collecting;  /* current slave collecting status */
        const uint8_t promisc = internals->promiscuous_en;
        uint8_t i, j, k;
+       uint8_t subtype;
 
        rte_eth_macaddr_get(internals->port_id, &bond_mac);
        /* Copy slave list to protect against slave up/down changes during tx
@@ -166,10 +176,12 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
                                rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
 
                        hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
+                       subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
+
                        /* Remove packet from array if it is slow packet or slave is not
                         * in collecting state or bondign interface is not in promiscus
                         * mode and packet address does not match. */
-                       if (unlikely(hdr->ether_type == ether_type_slow_be ||
+                       if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
                                !collecting || (!promisc &&
                                        !is_multicast_ether_addr(&hdr->d_addr) &&
                                        !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
@@ -888,7 +900,6 @@ bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                }
 
                num_tx_total += num_send;
-               num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
        }
 
        return num_tx_total;
@@ -1305,8 +1316,6 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
        struct bond_rx_queue *bd_rx_q;
        struct bond_tx_queue *bd_tx_q;
 
-       uint16_t old_nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
-       uint16_t old_nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
        int errval;
        uint16_t q_id;
 
@@ -1335,6 +1344,9 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
                                bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
        }
 
+       slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
+                       bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
+
        /* Configure device */
        errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
                        bonded_eth_dev->data->nb_rx_queues,
@@ -1347,9 +1359,7 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
        }
 
        /* Setup Rx Queues */
-       /* Use existing queues, if any */
-       for (q_id = old_nb_rx_queues;
-            q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
+       for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
                bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
 
                errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
@@ -1365,9 +1375,7 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
        }
 
        /* Setup Tx Queues */
-       /* Use existing queues, if any */
-       for (q_id = old_nb_tx_queues;
-            q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
+       for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
                bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
 
                errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
@@ -1439,6 +1447,9 @@ slave_remove(struct bond_dev_private *internals,
                                (internals->slave_count - i - 1));
 
        internals->slave_count--;
+
+       /* force reconfiguration of slave interfaces */
+       _rte_eth_dev_reset(slave_eth_dev);
 }
 
 static void
@@ -1637,7 +1648,10 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev)
 void
 bond_ethdev_close(struct rte_eth_dev *dev)
 {
+       struct bond_dev_private *internals = dev->data->dev_private;
+
        bond_ethdev_free_queues(dev);
+       rte_bitmap_reset(internals->vlan_filter_bmp);
 }
 
 /* forward declaration */
@@ -1657,7 +1671,6 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        dev_info->max_tx_queues = (uint16_t)512;
 
        dev_info->min_rx_bufsize = 0;
-       dev_info->pci_dev = NULL;
 
        dev_info->rx_offload_capa = internals->rx_offload_capa;
        dev_info->tx_offload_capa = internals->tx_offload_capa;
@@ -1666,6 +1679,35 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        dev_info->reta_size = internals->reta_size;
 }
 
+static int
+bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+       int res;
+       uint8_t i;
+       struct bond_dev_private *internals = dev->data->dev_private;
+
+       /* don't do this while a slave is being added */
+       rte_spinlock_lock(&internals->lock);
+
+       if (on)
+               rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
+       else
+               rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
+
+       for (i = 0; i < internals->slave_count; i++) {
+               uint8_t port_id = internals->slaves[i].port_id;
+
+               res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
+               if (res == ENOTSUP)
+                       RTE_LOG(WARNING, PMD,
+                               "Setting VLAN filter on slave port %u not supported.\n",
+                               port_id);
+       }
+
+       rte_spinlock_unlock(&internals->lock);
+       return 0;
+}
+
 static int
 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
                uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
@@ -1923,7 +1965,7 @@ bond_ethdev_delayed_lsc_propagation(void *arg)
                return;
 
        _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
-                       RTE_ETH_EVENT_INTR_LSC);
+                       RTE_ETH_EVENT_INTR_LSC, NULL);
 }
 
 void
@@ -1985,6 +2027,16 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
                        /* Inherit eth dev link properties from first active slave */
                        link_properties_set(bonded_eth_dev,
                                        &(slave_eth_dev->data->dev_link));
+               } else {
+                       if (link_properties_valid(
+                               &bonded_eth_dev->data->dev_link, &link) != 0) {
+                               slave_eth_dev->data->dev_flags &=
+                                       (~RTE_ETH_DEV_BONDED_SLAVE);
+                               RTE_LOG(ERR, PMD,
+                                       "port %u invalid speed/duplex\n",
+                                       port_id);
+                               return;
+                       }
                }
 
                activate_slave(bonded_eth_dev, port_id);
@@ -2034,7 +2086,7 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
                                                (void *)bonded_eth_dev);
                        else
                                _rte_eth_dev_callback_process(bonded_eth_dev,
-                                               RTE_ETH_EVENT_INTR_LSC);
+                                               RTE_ETH_EVENT_INTR_LSC, NULL);
 
                } else {
                        if (internals->link_down_delay_ms > 0)
@@ -2043,7 +2095,7 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
                                                (void *)bonded_eth_dev);
                        else
                                _rte_eth_dev_callback_process(bonded_eth_dev,
-                                               RTE_ETH_EVENT_INTR_LSC);
+                                               RTE_ETH_EVENT_INTR_LSC, NULL);
                }
        }
 }
@@ -2161,6 +2213,7 @@ const struct eth_dev_ops default_dev_ops = {
        .dev_close            = bond_ethdev_close,
        .dev_configure        = bond_ethdev_configure,
        .dev_infos_get        = bond_ethdev_info,
+       .vlan_filter_set      = bond_ethdev_vlan_filter_set,
        .rx_queue_setup       = bond_ethdev_rx_queue_setup,
        .tx_queue_setup       = bond_ethdev_tx_queue_setup,
        .rx_queue_release     = bond_ethdev_rx_queue_release,
@@ -2177,7 +2230,7 @@ const struct eth_dev_ops default_dev_ops = {
 };
 
 static int
-bond_init(const char *name, const char *params)
+bond_probe(const char *name, const char *params)
 {
        struct bond_dev_private *internals;
        struct rte_kvargs *kvlist;
@@ -2244,7 +2297,7 @@ parse_error:
 }
 
 static int
-bond_uninit(const char *name)
+bond_remove(const char *name)
 {
        int  ret;
 
@@ -2508,15 +2561,15 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
        return 0;
 }
 
-static struct rte_driver bond_drv = {
-       .type = PMD_VDEV,
-       .init = bond_init,
-       .uninit = bond_uninit,
+struct rte_vdev_driver pmd_bond_drv = {
+       .probe = bond_probe,
+       .remove = bond_remove,
 };
 
-PMD_REGISTER_DRIVER(bond_drv, eth_bond);
+RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
+RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
 
-DRIVER_REGISTER_PARAM_STRING(eth_bond,
+RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
        "slave=<ifc> "
        "primary=<ifc> "
        "mode=[0-6] "
index 2bdc9ef..5a411e2 100644 (file)
@@ -36,6 +36,7 @@
 
 #include <rte_ethdev.h>
 #include <rte_spinlock.h>
+#include <rte_bitmap.h>
 
 #include "rte_eth_bond.h"
 #include "rte_eth_bond_8023ad_private.h"
@@ -62,7 +63,7 @@
 
 extern const char *pmd_bond_init_valid_arguments[];
 
-extern const char pmd_bond_driver_name[];
+extern struct rte_vdev_driver pmd_bond_drv;
 
 /** Port Queue Mapping Structure */
 struct bond_rx_queue {
@@ -172,6 +173,9 @@ struct bond_dev_private {
 
        uint32_t candidate_max_rx_pktlen;
        uint32_t max_rx_pktlen;
+
+       void *vlan_filter_bmpmem;               /* enabled vlan filter bitmap */
+       struct rte_bitmap *vlan_filter_bmp;
 };
 
 extern const struct eth_dev_ops default_dev_ops;
index 5e3bd50..beb1e3e 100644 (file)
@@ -37,6 +37,7 @@
 #define __T4_ADAPTER_H__
 
 #include <rte_mbuf.h>
+#include <rte_io.h>
 
 #include "cxgbe_compat.h"
 #include "t4_regs_values.h"
@@ -324,7 +325,7 @@ struct adapter {
        int use_unpacked_mode; /* unpacked rx mode state */
 };
 
-#define CXGBE_PCI_REG(reg) (*((volatile uint32_t *)(reg)))
+#define CXGBE_PCI_REG(reg) rte_read32(reg)
 
 static inline uint64_t cxgbe_read_addr64(volatile void *addr)
 {
@@ -350,16 +351,21 @@ static inline uint32_t cxgbe_read_addr(volatile void *addr)
 #define CXGBE_READ_REG64(adap, reg) \
        cxgbe_read_addr64(CXGBE_PCI_REG_ADDR((adap), (reg)))
 
-#define CXGBE_PCI_REG_WRITE(reg, value) ({ \
-       CXGBE_PCI_REG((reg)) = (value); })
+#define CXGBE_PCI_REG_WRITE(reg, value) rte_write32((value), (reg))
+
+#define CXGBE_PCI_REG_WRITE_RELAXED(reg, value) \
+       rte_write32_relaxed((value), (reg))
 
 #define CXGBE_WRITE_REG(adap, reg, value) \
        CXGBE_PCI_REG_WRITE(CXGBE_PCI_REG_ADDR((adap), (reg)), (value))
 
+#define CXGBE_WRITE_REG_RELAXED(adap, reg, value) \
+       CXGBE_PCI_REG_WRITE_RELAXED(CXGBE_PCI_REG_ADDR((adap), (reg)), (value))
+
 static inline uint64_t cxgbe_write_addr64(volatile void *addr, uint64_t val)
 {
-       CXGBE_PCI_REG(addr) = val;
-       CXGBE_PCI_REG(((volatile uint8_t *)(addr) + 4)) = (val >> 32);
+       CXGBE_PCI_REG_WRITE(addr, val);
+       CXGBE_PCI_REG_WRITE(((volatile uint8_t *)(addr) + 4), (val >> 32));
        return val;
 }
 
@@ -383,7 +389,7 @@ static inline u32 t4_read_reg(struct adapter *adapter, u32 reg_addr)
 }
 
 /**
- * t4_write_reg - write a HW register
+ * t4_write_reg - write a HW register with barrier
  * @adapter: the adapter
  * @reg_addr: the register address
  * @val: the value to write
@@ -397,6 +403,22 @@ static inline void t4_write_reg(struct adapter *adapter, u32 reg_addr, u32 val)
        CXGBE_WRITE_REG(adapter, reg_addr, val);
 }
 
+/**
+ * t4_write_reg_relaxed - write a HW register with no barrier
+ * @adapter: the adapter
+ * @reg_addr: the register address
+ * @val: the value to write
+ *
+ * Write a 32-bit value into the given HW register.
+ */
+static inline void t4_write_reg_relaxed(struct adapter *adapter, u32 reg_addr,
+                                       u32 val)
+{
+       CXGBE_DEBUG_REG(adapter, "setting register 0x%x to 0x%x\n", reg_addr,
+                       val);
+       CXGBE_WRITE_REG_RELAXED(adapter, reg_addr, val);
+}
+
 /**
  * t4_read_reg64 - read a 64-bit HW register
  * @adapter: the adapter
index 7e79adf..c089b06 100644 (file)
@@ -1532,7 +1532,7 @@ int t4_seeprom_write(struct adapter *adapter, u32 addr, u32 data)
 {
        unsigned int base = adapter->params.pci.vpd_cap_addr;
        int ret;
-       u32 stats_reg;
+       u32 stats_reg = 0;
        int max_poll;
 
        /* VPD Accesses must alway be 4-byte aligned!
index e68f8f5..1551cbf 100644 (file)
@@ -45,6 +45,7 @@
 #include <rte_cycles.h>
 #include <rte_spinlock.h>
 #include <rte_log.h>
+#include <rte_io.h>
 
 #define dev_printf(level, fmt, args...) \
        RTE_LOG(level, PMD, "rte_cxgbe_pmd: " fmt, ## args)
@@ -254,7 +255,7 @@ static inline unsigned long ilog2(unsigned long n)
 
 static inline void writel(unsigned int val, volatile void __iomem *addr)
 {
-       *(volatile unsigned int *)addr = val;
+       rte_write32(val, addr);
 }
 
 static inline void writeq(u64 val, volatile void __iomem *addr)
@@ -263,4 +264,9 @@ static inline void writeq(u64 val, volatile void __iomem *addr)
        writel(val >> 32, (void *)((uintptr_t)addr + 4));
 }
 
+static inline void writel_relaxed(unsigned int val, volatile void __iomem *addr)
+{
+       rte_write32_relaxed(val, addr);
+}
+
 #endif /* _CXGBE_COMPAT_H_ */
index 9208a61..4d543a7 100644 (file)
@@ -68,7 +68,7 @@
  * Macros needed to support the PCI Device ID Table ...
  */
 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
-       static struct rte_pci_id cxgb4_pci_tbl[] = {
+       static const struct rte_pci_id cxgb4_pci_tbl[] = {
 #define CH_PCI_DEVICE_ID_FUNCTION 0x4
 
 #define PCI_VENDOR_ID_CHELSIO 0x1425
@@ -147,6 +147,8 @@ static void cxgbe_dev_info_get(struct rte_eth_dev *eth_dev,
                .nb_align = 1,
        };
 
+       device_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
+
        device_info->min_rx_bufsize = CXGBE_MIN_RX_BUFSIZE;
        device_info->max_rx_pktlen = CXGBE_MAX_RX_PKTLEN;
        device_info->max_rx_queues = max_queues;
@@ -1005,7 +1007,7 @@ static int eth_cxgbe_dev_init(struct rte_eth_dev *eth_dev)
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
                return 0;
 
-       pci_dev = eth_dev->pci_dev;
+       pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
 
        snprintf(name, sizeof(name), "cxgbeadapter%d", eth_dev->data->port_id);
        adapter = rte_zmalloc(name, sizeof(*adapter), 0);
@@ -1039,33 +1041,15 @@ out_free_adapter:
 
 static struct eth_driver rte_cxgbe_pmd = {
        .pci_drv = {
-               .name = "rte_cxgbe_pmd",
                .id_table = cxgb4_pci_tbl,
                .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_cxgbe_dev_init,
        .dev_private_size = sizeof(struct port_info),
 };
 
-/*
- * Driver initialization routine.
- * Invoked once at EAL init time.
- * Register itself as the [Poll Mode] Driver of PCI CXGBE devices.
- */
-static int rte_cxgbe_pmd_init(const char *name __rte_unused,
-                             const char *params __rte_unused)
-{
-       CXGBE_FUNC_TRACE();
-
-       rte_eth_driver_register(&rte_cxgbe_pmd);
-       return 0;
-}
-
-static struct rte_driver rte_cxgbe_driver = {
-       .type = PMD_PDEV,
-       .init = rte_cxgbe_pmd_init,
-};
-
-PMD_REGISTER_DRIVER(rte_cxgbe_driver, cxgb4);
-DRIVER_REGISTER_PCI_TABLE(cxgb4, cxgb4_pci_tbl);
-
+RTE_PMD_REGISTER_PCI(net_cxgbe, rte_cxgbe_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_cxgbe, cxgb4_pci_tbl);
+RTE_PMD_REGISTER_KMOD_DEP(net_cxgbe, "* igb_uio | uio_pci_generic | vfio");
index ceaf5ab..541fc40 100644 (file)
@@ -959,7 +959,7 @@ int setup_rss(struct port_info *pi)
        dev_debug(adapter, "%s:  pi->rss_size = %u; pi->n_rx_qsets = %u\n",
                  __func__, pi->rss_size, pi->n_rx_qsets);
 
-       if (!pi->flags & PORT_RSS_DONE) {
+       if (!(pi->flags & PORT_RSS_DONE)) {
                if (adapter->flags & FULL_INIT_DONE) {
                        /* Fill default values with equal distribution */
                        for (j = 0; j < pi->rss_size; j++)
@@ -1150,7 +1150,7 @@ int cxgbe_probe(struct adapter *adapter)
                 */
 
                /* reserve an ethdev entry */
-               pi->eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI);
+               pi->eth_dev = rte_eth_dev_allocate(name);
                if (!pi->eth_dev)
                        goto out_free;
 
@@ -1163,16 +1163,14 @@ int cxgbe_probe(struct adapter *adapter)
                pi->eth_dev->data = data;
 
 allocate_mac:
-               pi->eth_dev->pci_dev = adapter->pdev;
+               pi->eth_dev->device = &adapter->pdev->device;
                pi->eth_dev->data->dev_private = pi;
                pi->eth_dev->driver = adapter->eth_dev->driver;
                pi->eth_dev->dev_ops = adapter->eth_dev->dev_ops;
                pi->eth_dev->tx_pkt_burst = adapter->eth_dev->tx_pkt_burst;
                pi->eth_dev->rx_pkt_burst = adapter->eth_dev->rx_pkt_burst;
 
-               rte_eth_copy_pci_info(pi->eth_dev, pi->eth_dev->pci_dev);
-
-               TAILQ_INIT(&pi->eth_dev->link_intr_cbs);
+               rte_eth_copy_pci_info(pi->eth_dev, adapter->pdev);
 
                pi->eth_dev->data->mac_addrs = rte_zmalloc(name,
                                                           ETHER_ADDR_LEN, 0);
index ab5a842..37b6090 100644 (file)
@@ -338,12 +338,12 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
                 * mechanism.
                 */
                if (unlikely(!q->bar2_addr)) {
-                       t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
-                                    val | V_QID(q->cntxt_id));
+                       t4_write_reg_relaxed(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
+                                            val | V_QID(q->cntxt_id));
                } else {
-                       writel(val | V_QID(q->bar2_qid),
-                              (void *)((uintptr_t)q->bar2_addr +
-                              SGE_UDB_KDOORBELL));
+                       writel_relaxed(val | V_QID(q->bar2_qid),
+                                      (void *)((uintptr_t)q->bar2_addr +
+                                      SGE_UDB_KDOORBELL));
 
                        /*
                         * This Write memory Barrier will force the write to
@@ -890,15 +890,11 @@ static inline int should_tx_packet_coalesce(struct sge_eth_txq *txq,
        struct sge_txq *q = &txq->q;
        unsigned int flits, ndesc;
        unsigned char type = 0;
-       int credits, hw_cidx = ntohs(q->stat->cidx);
-       int in_use = q->pidx - hw_cidx + flits_to_desc(q->coalesce.flits);
+       int credits;
 
        /* use coal WR type 1 when no frags are present */
        type = (mbuf->nb_segs == 1) ? 1 : 0;
 
-       if (in_use < 0)
-               in_use += q->size;
-
        if (unlikely(type != q->coalesce.type && q->coalesce.idx))
                ship_tx_pkt_coalesce_wr(adap, txq);
 
@@ -1645,7 +1641,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
        iq->size = cxgbe_roundup(iq->size, 16);
 
        snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-                eth_dev->driver->pci_drv.name, fwevtq ? "fwq_ring" : "rx_ring",
+                eth_dev->driver->pci_drv.driver.name,
+                fwevtq ? "fwq_ring" : "rx_ring",
                 eth_dev->data->port_id, queue_id);
        snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name);
 
@@ -1697,7 +1694,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
                fl->size = cxgbe_roundup(fl->size, 8);
 
                snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-                        eth_dev->driver->pci_drv.name,
+                        eth_dev->driver->pci_drv.driver.name,
                         fwevtq ? "fwq_ring" : "fl_ring",
                         eth_dev->data->port_id, queue_id);
                snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name);
@@ -1893,7 +1890,7 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
        nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
 
        snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-                eth_dev->driver->pci_drv.name, "tx_ring",
+                eth_dev->driver->pci_drv.driver.name, "tx_ring",
                 eth_dev->data->port_id, queue_id);
        snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name);
 
index 723885d..c6400bd 100644 (file)
@@ -100,7 +100,6 @@ STATIC s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw,
                                                   u16 offset);
 STATIC s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw);
 STATIC s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw);
-STATIC void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value);
 STATIC void e1000_clear_vfta_i350(struct e1000_hw *hw);
 
 STATIC void e1000_i2c_start(struct e1000_hw *hw);
index c498684..4133cdd 100644 (file)
@@ -492,6 +492,7 @@ enum e1000_promisc_type {
 void e1000_vfta_set_vf(struct e1000_hw *, u16, bool);
 void e1000_rlpml_set_vf(struct e1000_hw *, u16);
 s32 e1000_promisc_set_vf(struct e1000_hw *, enum e1000_promisc_type type);
+void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value);
 u16 e1000_rxpbs_adjust_82580(u32 data);
 s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data);
 s32 e1000_set_eee_i350(struct e1000_hw *hw, bool adv1G, bool adv100M);
index 22b96fd..f7cf83b 100644 (file)
@@ -298,6 +298,23 @@ s32 e1000_set_mac_type(struct e1000_hw *hw)
        case E1000_DEV_ID_PCH_I218_V3:
                mac->type = e1000_pch_lpt;
                break;
+       case E1000_DEV_ID_PCH_SPT_I219_LM:
+       case E1000_DEV_ID_PCH_SPT_I219_V:
+       case E1000_DEV_ID_PCH_SPT_I219_LM2:
+       case E1000_DEV_ID_PCH_SPT_I219_V2:
+       case E1000_DEV_ID_PCH_LBG_I219_LM3:
+       case E1000_DEV_ID_PCH_SPT_I219_LM4:
+       case E1000_DEV_ID_PCH_SPT_I219_V4:
+       case E1000_DEV_ID_PCH_SPT_I219_LM5:
+       case E1000_DEV_ID_PCH_SPT_I219_V5:
+               mac->type = e1000_pch_spt;
+               break;
+       case E1000_DEV_ID_PCH_CNP_I219_LM6:
+       case E1000_DEV_ID_PCH_CNP_I219_V6:
+       case E1000_DEV_ID_PCH_CNP_I219_LM7:
+       case E1000_DEV_ID_PCH_CNP_I219_V7:
+               mac->type = e1000_pch_cnp;
+               break;
        case E1000_DEV_ID_82575EB_COPPER:
        case E1000_DEV_ID_82575EB_FIBER_SERDES:
        case E1000_DEV_ID_82575GB_QUAD_COPPER:
@@ -448,6 +465,8 @@ s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device)
        case e1000_pchlan:
        case e1000_pch2lan:
        case e1000_pch_lpt:
+       case e1000_pch_spt:
+       case e1000_pch_cnp:
                e1000_init_function_pointers_ich8lan(hw);
                break;
        case e1000_82575:
@@ -632,8 +651,6 @@ s32 e1000_reset_hw(struct e1000_hw *hw)
        return -E1000_ERR_CONFIG;
 }
 
-//TREX_PATCH
-extern int eal_err_read_from_file_is_error;
 /**
  *  e1000_init_hw - Initialize hardware
  *  @hw: pointer to the HW structure
@@ -643,9 +660,6 @@ extern int eal_err_read_from_file_is_error;
  **/
 s32 e1000_init_hw(struct e1000_hw *hw)
 {
-    //TREX_PATCH
-    eal_err_read_from_file_is_error = 0;
-
        if (hw->mac.ops.init_hw)
                return hw->mac.ops.init_hw(hw);
 
index 69aa1f2..dbc2bbb 100644 (file)
@@ -198,6 +198,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define E1000_RCTL_DTYP_PS     0x00000400 /* Packet Split descriptor */
 #define E1000_RCTL_RDMTS_HALF  0x00000000 /* Rx desc min thresh size */
 #define E1000_RCTL_RDMTS_HEX   0x00010000
+#define E1000_RCTL_RDMTS1_HEX  E1000_RCTL_RDMTS_HEX
 #define E1000_RCTL_MO_SHIFT    12 /* multicast offset shift */
 #define E1000_RCTL_MO_3                0x00003000 /* multicast offset 15:4 */
 #define E1000_RCTL_BAM         0x00008000 /* broadcast enable */
@@ -468,6 +469,8 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #define ETHERNET_FCS_SIZE              4
 #define MAX_JUMBO_FRAME_SIZE           0x3F00
+/* The datasheet maximum supported RX size is 9.5KB (9728 bytes) */
+#define MAX_RX_JUMBO_FRAME_SIZE                0x2600
 #define E1000_TX_PTR_GAP               0x1F
 
 /* Extended Configuration Control and Size */
@@ -751,6 +754,12 @@ POSSIBILITY OF SUCH DAMAGE.
 #define E1000_TSYNCTXCTL_VALID         0x00000001 /* Tx timestamp valid */
 #define E1000_TSYNCTXCTL_ENABLED       0x00000010 /* enable Tx timestamping */
 
+/* HH Time Sync */
+#define E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK  0x0000F000 /* max delay */
+#define E1000_TSYNCTXCTL_SYNC_COMP_ERR         0x20000000 /* sync err */
+#define E1000_TSYNCTXCTL_SYNC_COMP             0x40000000 /* sync complete */
+#define E1000_TSYNCTXCTL_START_SYNC            0x80000000 /* initiate sync */
+
 #define E1000_TSYNCRXCTL_VALID         0x00000001 /* Rx timestamp valid */
 #define E1000_TSYNCRXCTL_TYPE_MASK     0x0000000E /* Rx type mask */
 #define E1000_TSYNCRXCTL_TYPE_L2_V2    0x00
index e4e4f76..d9de9fc 100644 (file)
@@ -136,6 +136,19 @@ struct e1000_hw;
 #define E1000_DEV_ID_PCH_I218_V2               0x15A1
 #define E1000_DEV_ID_PCH_I218_LM3              0x15A2 /* Wildcat Point PCH */
 #define E1000_DEV_ID_PCH_I218_V3               0x15A3 /* Wildcat Point PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_LM           0x156F /* Sunrise Point PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_V            0x1570 /* Sunrise Point PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_LM2          0x15B7 /* Sunrise Point-H PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_V2           0x15B8 /* Sunrise Point-H PCH */
+#define E1000_DEV_ID_PCH_LBG_I219_LM3          0x15B9 /* LEWISBURG PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_LM4          0x15D7
+#define E1000_DEV_ID_PCH_SPT_I219_V4           0x15D8
+#define E1000_DEV_ID_PCH_SPT_I219_LM5          0x15E3
+#define E1000_DEV_ID_PCH_SPT_I219_V5           0x15D6
+#define E1000_DEV_ID_PCH_CNP_I219_LM6          0x15BD
+#define E1000_DEV_ID_PCH_CNP_I219_V6           0x15BE
+#define E1000_DEV_ID_PCH_CNP_I219_LM7          0x15BB
+#define E1000_DEV_ID_PCH_CNP_I219_V7           0x15BC
 #define E1000_DEV_ID_82576                     0x10C9
 #define E1000_DEV_ID_82576_FIBER               0x10E6
 #define E1000_DEV_ID_82576_SERDES              0x10E7
@@ -221,6 +234,8 @@ enum e1000_mac_type {
        e1000_pchlan,
        e1000_pch2lan,
        e1000_pch_lpt,
+       e1000_pch_spt,
+       e1000_pch_cnp,
        e1000_82575,
        e1000_82576,
        e1000_82580,
@@ -950,11 +965,15 @@ struct e1000_dev_spec_ich8lan {
        E1000_MUTEX nvm_mutex;
        E1000_MUTEX swflag_mutex;
        bool nvm_k1_enabled;
+       bool disable_k1_off;
        bool eee_disable;
        u16 eee_lp_ability;
 #ifdef ULP_SUPPORT
        enum e1000_ulp_state ulp_state;
-#endif /* NAHUM6LP_HW && ULP_SUPPORT */
+       bool ulp_capability_disabled;
+       bool during_suspend_flow;
+       bool during_dpg_exit;
+#endif /* ULP_SUPPORT */
        u16 lat_enc;
        u16 max_ltr_enc;
        bool smbus_disable;
index 89d07e9..6dd046d 100644 (file)
@@ -94,10 +94,13 @@ STATIC s32  e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw,
                                            bool active);
 STATIC s32  e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset,
                                   u16 words, u16 *data);
+STATIC s32  e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words,
+                              u16 *data);
 STATIC s32  e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset,
                                    u16 words, u16 *data);
 STATIC s32  e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw);
 STATIC s32  e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw);
+STATIC s32  e1000_update_nvm_checksum_spt(struct e1000_hw *hw);
 STATIC s32  e1000_valid_led_default_ich8lan(struct e1000_hw *hw,
                                            u16 *data);
 STATIC s32 e1000_id_led_init_pchlan(struct e1000_hw *hw);
@@ -125,6 +128,14 @@ STATIC s32  e1000_read_flash_byte_ich8lan(struct e1000_hw *hw,
                                          u32 offset, u8 *data);
 STATIC s32  e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
                                          u8 size, u16 *data);
+STATIC s32  e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset,
+                                           u32 *data);
+STATIC s32  e1000_read_flash_dword_ich8lan(struct e1000_hw *hw,
+                                          u32 offset, u32 *data);
+STATIC s32  e1000_write_flash_data32_ich8lan(struct e1000_hw *hw,
+                                            u32 offset, u32 data);
+STATIC s32  e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw,
+                                                 u32 offset, u32 dword);
 STATIC s32  e1000_read_flash_word_ich8lan(struct e1000_hw *hw,
                                          u32 offset, u16 *data);
 STATIC s32  e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw,
@@ -233,7 +244,7 @@ STATIC bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw)
        if (ret_val)
                return false;
 out:
-       if (hw->mac.type == e1000_pch_lpt) {
+       if (hw->mac.type >= e1000_pch_lpt) {
                /* Only unforce SMBus if ME is not active */
                if (!(E1000_READ_REG(hw, E1000_FWSM) &
                    E1000_ICH_FWSM_FW_VALID)) {
@@ -277,7 +288,7 @@ STATIC void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw)
        mac_reg &= ~E1000_CTRL_LANPHYPC_VALUE;
        E1000_WRITE_REG(hw, E1000_CTRL, mac_reg);
        E1000_WRITE_FLUSH(hw);
-       usec_delay(10);
+       msec_delay(1);
        mac_reg &= ~E1000_CTRL_LANPHYPC_OVERRIDE;
        E1000_WRITE_REG(hw, E1000_CTRL, mac_reg);
        E1000_WRITE_FLUSH(hw);
@@ -334,6 +345,8 @@ STATIC s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
         */
        switch (hw->mac.type) {
        case e1000_pch_lpt:
+       case e1000_pch_spt:
+       case e1000_pch_cnp:
                if (e1000_phy_is_accessible_pchlan(hw))
                        break;
 
@@ -481,6 +494,8 @@ STATIC s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
                        /* fall-through */
                case e1000_pch2lan:
                case e1000_pch_lpt:
+               case e1000_pch_spt:
+               case e1000_pch_cnp:
                        /* In case the PHY needs to be in mdio slow mode,
                         * set slow mode and try to get the PHY id again.
                         */
@@ -623,36 +638,57 @@ STATIC s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw)
        struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
        u32 gfpreg, sector_base_addr, sector_end_addr;
        u16 i;
+       u32 nvm_size;
 
        DEBUGFUNC("e1000_init_nvm_params_ich8lan");
 
-       /* Can't read flash registers if the register set isn't mapped. */
        nvm->type = e1000_nvm_flash_sw;
-       if (!hw->flash_address) {
-               DEBUGOUT("ERROR: Flash registers not mapped\n");
-               return -E1000_ERR_CONFIG;
-       }
 
-       gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG);
+       if (hw->mac.type >= e1000_pch_spt) {
+               /* in SPT, gfpreg doesn't exist. NVM size is taken from the
+                * STRAP register. This is because in SPT the GbE Flash region
+                * is no longer accessed through the flash registers. Instead,
+                * the mechanism has changed, and the Flash region access
+                * registers are now implemented in GbE memory space.
+                */
+               nvm->flash_base_addr = 0;
+               nvm_size =
+                   (((E1000_READ_REG(hw, E1000_STRAP) >> 1) & 0x1F) + 1)
+                   * NVM_SIZE_MULTIPLIER;
+               nvm->flash_bank_size = nvm_size / 2;
+               /* Adjust to word count */
+               nvm->flash_bank_size /= sizeof(u16);
+               /* Set the base address for flash register access */
+               hw->flash_address = hw->hw_addr + E1000_FLASH_BASE_ADDR;
+       } else {
+               /* Can't read flash registers if register set isn't mapped. */
+               if (!hw->flash_address) {
+                       DEBUGOUT("ERROR: Flash registers not mapped\n");
+                       return -E1000_ERR_CONFIG;
+               }
 
-       /* sector_X_addr is a "sector"-aligned address (4096 bytes)
-        * Add 1 to sector_end_addr since this sector is included in
-        * the overall size.
-        */
-       sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK;
-       sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1;
+               gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG);
 
-       /* flash_base_addr is byte-aligned */
-       nvm->flash_base_addr = sector_base_addr << FLASH_SECTOR_ADDR_SHIFT;
+               /* sector_X_addr is a "sector"-aligned address (4096 bytes)
+                * Add 1 to sector_end_addr since this sector is included in
+                * the overall size.
+                */
+               sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK;
+               sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1;
 
-       /* find total size of the NVM, then cut in half since the total
-        * size represents two separate NVM banks.
-        */
-       nvm->flash_bank_size = ((sector_end_addr - sector_base_addr)
-                               << FLASH_SECTOR_ADDR_SHIFT);
-       nvm->flash_bank_size /= 2;
-       /* Adjust to word count */
-       nvm->flash_bank_size /= sizeof(u16);
+               /* flash_base_addr is byte-aligned */
+               nvm->flash_base_addr = sector_base_addr
+                                      << FLASH_SECTOR_ADDR_SHIFT;
+
+               /* find total size of the NVM, then cut in half since the total
+                * size represents two separate NVM banks.
+                */
+               nvm->flash_bank_size = ((sector_end_addr - sector_base_addr)
+                                       << FLASH_SECTOR_ADDR_SHIFT);
+               nvm->flash_bank_size /= 2;
+               /* Adjust to word count */
+               nvm->flash_bank_size /= sizeof(u16);
+       }
 
        nvm->word_size = E1000_SHADOW_RAM_WORDS;
 
@@ -668,8 +704,13 @@ STATIC s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw)
        /* Function Pointers */
        nvm->ops.acquire        = e1000_acquire_nvm_ich8lan;
        nvm->ops.release        = e1000_release_nvm_ich8lan;
-       nvm->ops.read           = e1000_read_nvm_ich8lan;
-       nvm->ops.update         = e1000_update_nvm_checksum_ich8lan;
+       if (hw->mac.type >= e1000_pch_spt) {
+               nvm->ops.read   = e1000_read_nvm_spt;
+               nvm->ops.update = e1000_update_nvm_checksum_spt;
+       } else {
+               nvm->ops.read   = e1000_read_nvm_ich8lan;
+               nvm->ops.update = e1000_update_nvm_checksum_ich8lan;
+       }
        nvm->ops.valid_led_default = e1000_valid_led_default_ich8lan;
        nvm->ops.validate       = e1000_validate_nvm_checksum_ich8lan;
        nvm->ops.write          = e1000_write_nvm_ich8lan;
@@ -758,6 +799,8 @@ STATIC s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
                mac->ops.rar_set = e1000_rar_set_pch2lan;
                /* fall-through */
        case e1000_pch_lpt:
+       case e1000_pch_spt:
+       case e1000_pch_cnp:
 #ifndef NO_NON_BLOCKING_PHY_MTA_UPDATE_SUPPORT
                /* multicast address update for pch2 */
                mac->ops.update_mc_addr_list =
@@ -768,7 +811,13 @@ STATIC s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
 #if defined(QV_RELEASE) || !defined(NO_PCH_LPT_B0_SUPPORT)
                /* save PCH revision_id */
                e1000_read_pci_cfg(hw, E1000_PCI_REVISION_ID_REG, &pci_cfg);
-               hw->revision_id = (u8)(pci_cfg &= 0x000F);
+               /* SPT uses full byte for revision ID,
+                * as opposed to previous generations
+                */
+               if (hw->mac.type >= e1000_pch_spt)
+                       hw->revision_id = (u8)(pci_cfg &= 0x00FF);
+               else
+                       hw->revision_id = (u8)(pci_cfg &= 0x000F);
 #endif /* QV_RELEASE || !defined(NO_PCH_LPT_B0_SUPPORT) */
                /* check management mode */
                mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan;
@@ -786,7 +835,7 @@ STATIC s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
                break;
        }
 
-       if (mac->type == e1000_pch_lpt) {
+       if (mac->type >= e1000_pch_lpt) {
                mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES;
                mac->ops.rar_set = e1000_rar_set_pch_lpt;
                mac->ops.setup_physical_interface = e1000_setup_copper_link_pch_lpt;
@@ -1015,8 +1064,9 @@ release:
                /* clear FEXTNVM6 bit 8 on link down or 10/100 */
                fextnvm6 &= ~E1000_FEXTNVM6_REQ_PLL_CLK;
 
-               if (!link || ((status & E1000_STATUS_SPEED_100) &&
-                             (status & E1000_STATUS_FD)))
+               if ((hw->phy.revision > 5) || !link ||
+                   ((status & E1000_STATUS_SPEED_100) &&
+                    (status & E1000_STATUS_FD)))
                        goto update_fextnvm6;
 
                ret_val = hw->phy.ops.read_reg(hw, I217_INBAND_CTRL, &reg);
@@ -1068,6 +1118,7 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
        u32 mac_reg;
        s32 ret_val = E1000_SUCCESS;
        u16 phy_reg;
+       u16 oem_reg = 0;
 
        if ((hw->mac.type < e1000_pch_lpt) ||
            (hw->device_id == E1000_DEV_ID_PCH_LPT_I217_LM) ||
@@ -1128,6 +1179,25 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
        mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
        E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg);
 
+       /* Si workaround for ULP entry flow on i127/rev6 h/w.  Enable
+        * LPLU and disable Gig speed when entering ULP
+        */
+       if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6)) {
+               ret_val = e1000_read_phy_reg_hv_locked(hw, HV_OEM_BITS,
+                                                      &oem_reg);
+               if (ret_val)
+                       goto release;
+
+               phy_reg = oem_reg;
+               phy_reg |= HV_OEM_BITS_LPLU | HV_OEM_BITS_GBE_DIS;
+
+               ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS,
+                                                       phy_reg);
+
+               if (ret_val)
+                       goto release;
+       }
+
 skip_smbus:
        if (!to_sx) {
                /* Change the 'Link Status Change' interrupt to trigger
@@ -1184,6 +1254,14 @@ skip_smbus:
                E1000_WRITE_REG(hw, E1000_TCTL, mac_reg);
        }
 
+       if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6) &&
+           to_sx && (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
+               ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS,
+                                                       oem_reg);
+               if (ret_val)
+                       goto release;
+       }
+
 release:
        hw->phy.ops.release(hw);
 out:
@@ -1240,10 +1318,10 @@ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
                        E1000_WRITE_REG(hw, E1000_H2ME, mac_reg);
                }
 
-               /* Poll up to 100msec for ME to clear ULP_CFG_DONE */
+               /* Poll up to 300msec for ME to clear ULP_CFG_DONE. */
                while (E1000_READ_REG(hw, E1000_FWSM) &
                       E1000_FWSM_ULP_CFG_DONE) {
-                       if (i++ == 10) {
+                       if (i++ == 30) {
                                ret_val = -E1000_ERR_PHY;
                                goto out;
                        }
@@ -1343,6 +1421,8 @@ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
                             I218_ULP_CONFIG1_RESET_TO_SMBUS |
                             I218_ULP_CONFIG1_WOL_HOST |
                             I218_ULP_CONFIG1_INBAND_EXIT |
+                            I218_ULP_CONFIG1_EN_ULP_LANPHYPC |
+                            I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST |
                             I218_ULP_CONFIG1_DISABLE_SMB_PERST);
                e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
 
@@ -1360,6 +1440,8 @@ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
 
                        if (hw->mac.autoneg)
                                e1000_phy_setup_autoneg(hw);
+                       else
+                               e1000_setup_copper_link_generic(hw);
 
                        e1000_sw_lcd_config_ich8lan(hw);
 
@@ -1397,6 +1479,8 @@ out:
 }
 
 #endif /* ULP_SUPPORT */
+
+
 /**
  *  e1000_check_for_copper_link_ich8lan - Check for link (Copper)
  *  @hw: pointer to the HW structure
@@ -1456,8 +1540,7 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
         * aggressive resulting in many collisions. To avoid this, increase
         * the IPG and reduce Rx latency in the PHY.
         */
-       if (((hw->mac.type == e1000_pch2lan) ||
-            (hw->mac.type == e1000_pch_lpt)) && link) {
+       if ((hw->mac.type >= e1000_pch2lan) && link) {
                u16 speed, duplex;
 
                e1000_get_speed_and_duplex_copper_generic(hw, &speed, &duplex);
@@ -1468,6 +1551,10 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
                        tipg_reg |= 0xFF;
                        /* Reduce Rx latency in analog PHY */
                        emi_val = 0;
+               } else if (hw->mac.type >= e1000_pch_spt &&
+                          duplex == FULL_DUPLEX && speed != SPEED_1000) {
+                       tipg_reg |= 0xC;
+                       emi_val = 1;
                } else {
                        /* Roll back the default values */
                        tipg_reg |= 0x08;
@@ -1486,10 +1573,78 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
                        emi_addr = I217_RX_CONFIG;
                ret_val = e1000_write_emi_reg_locked(hw, emi_addr, emi_val);
 
+
+               if (hw->mac.type >= e1000_pch_lpt) {
+                       u16 phy_reg;
+
+                       hw->phy.ops.read_reg_locked(hw, I217_PLL_CLOCK_GATE_REG,
+                                                   &phy_reg);
+                       phy_reg &= ~I217_PLL_CLOCK_GATE_MASK;
+                       if (speed == SPEED_100 || speed == SPEED_10)
+                               phy_reg |= 0x3E8;
+                       else
+                               phy_reg |= 0xFA;
+                       hw->phy.ops.write_reg_locked(hw,
+                                                    I217_PLL_CLOCK_GATE_REG,
+                                                    phy_reg);
+
+                       if (speed == SPEED_1000) {
+                               hw->phy.ops.read_reg_locked(hw, HV_PM_CTRL,
+                                                           &phy_reg);
+
+                               phy_reg |= HV_PM_CTRL_K1_CLK_REQ;
+
+                               hw->phy.ops.write_reg_locked(hw, HV_PM_CTRL,
+                                                            phy_reg);
+                               }
+                }
                hw->phy.ops.release(hw);
 
                if (ret_val)
                        return ret_val;
+
+               if (hw->mac.type >= e1000_pch_spt) {
+                       u16 data;
+                       u16 ptr_gap;
+
+                       if (speed == SPEED_1000) {
+                               ret_val = hw->phy.ops.acquire(hw);
+                               if (ret_val)
+                                       return ret_val;
+
+                               ret_val = hw->phy.ops.read_reg_locked(hw,
+                                                             PHY_REG(776, 20),
+                                                             &data);
+                               if (ret_val) {
+                                       hw->phy.ops.release(hw);
+                                       return ret_val;
+                               }
+
+                               ptr_gap = (data & (0x3FF << 2)) >> 2;
+                               if (ptr_gap < 0x18) {
+                                       data &= ~(0x3FF << 2);
+                                       data |= (0x18 << 2);
+                                       ret_val =
+                                               hw->phy.ops.write_reg_locked(hw,
+                                                       PHY_REG(776, 20), data);
+                               }
+                               hw->phy.ops.release(hw);
+                               if (ret_val)
+                                       return ret_val;
+                       } else {
+                               ret_val = hw->phy.ops.acquire(hw);
+                               if (ret_val)
+                                       return ret_val;
+
+                               ret_val = hw->phy.ops.write_reg_locked(hw,
+                                                            PHY_REG(776, 20),
+                                                            0xC023);
+                               hw->phy.ops.release(hw);
+                               if (ret_val)
+                                       return ret_val;
+
+                       }
+               }
        }
 
        /* I217 Packet Loss issue:
@@ -1497,7 +1652,7 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
         * on power up.
         * Set the Beacon Duration for I217 to 8 usec
         */
-       if (hw->mac.type == e1000_pch_lpt) {
+       if (hw->mac.type >= e1000_pch_lpt) {
                u32 mac_reg;
 
                mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM4);
@@ -1519,10 +1674,29 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
        hw->dev_spec.ich8lan.eee_lp_ability = 0;
 
        /* Configure K0s minimum time */
-       if (hw->mac.type == e1000_pch_lpt) {
+       if (hw->mac.type >= e1000_pch_lpt) {
                e1000_configure_k0s_lpt(hw, K1_ENTRY_LATENCY, K1_MIN_TIME);
        }
 
+       if (hw->mac.type >= e1000_pch_lpt) {
+               u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6);
+
+               if (hw->mac.type == e1000_pch_spt) {
+                       /* FEXTNVM6 K1-off workaround - for SPT only */
+                       u32 pcieanacfg = E1000_READ_REG(hw, E1000_PCIEANACFG);
+
+                       if (pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE)
+                               fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE;
+                       else
+                               fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE;
+               }
+
+               if (hw->dev_spec.ich8lan.disable_k1_off == true)
+                       fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE;
+
+               E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6);
+       }
+
        if (!link)
                return E1000_SUCCESS; /* No link detected */
 
@@ -1616,6 +1790,8 @@ void e1000_init_function_pointers_ich8lan(struct e1000_hw *hw)
        case e1000_pchlan:
        case e1000_pch2lan:
        case e1000_pch_lpt:
+       case e1000_pch_spt:
+       case e1000_pch_cnp:
                hw->phy.ops.init_params = e1000_init_phy_params_pchlan;
                break;
        default:
@@ -2081,6 +2257,8 @@ STATIC s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
        case e1000_pchlan:
        case e1000_pch2lan:
        case e1000_pch_lpt:
+       case e1000_pch_spt:
+       case e1000_pch_cnp:
                sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M;
                break;
        default:
@@ -3204,6 +3382,41 @@ STATIC s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
        DEBUGFUNC("e1000_valid_nvm_bank_detect_ich8lan");
 
        switch (hw->mac.type) {
+       case e1000_pch_spt:
+       case e1000_pch_cnp:
+               bank1_offset = nvm->flash_bank_size;
+               act_offset = E1000_ICH_NVM_SIG_WORD;
+
+               /* set bank to 0 in case flash read fails */
+               *bank = 0;
+
+               /* Check bank 0 */
+               ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset,
+                                                        &nvm_dword);
+               if (ret_val)
+                       return ret_val;
+               sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
+               if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+                   E1000_ICH_NVM_SIG_VALUE) {
+                       *bank = 0;
+                       return E1000_SUCCESS;
+               }
+
+               /* Check bank 1 */
+               ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset +
+                                                        bank1_offset,
+                                                        &nvm_dword);
+               if (ret_val)
+                       return ret_val;
+               sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
+               if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+                   E1000_ICH_NVM_SIG_VALUE) {
+                       *bank = 1;
+                       return E1000_SUCCESS;
+               }
+
+               DEBUGOUT("ERROR: No valid NVM bank present\n");
+               return -E1000_ERR_NVM;
        case e1000_ich8lan:
        case e1000_ich9lan:
                eecd = E1000_READ_REG(hw, E1000_EECD);
@@ -3250,6 +3463,99 @@ STATIC s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
        }
 }
 
+/**
+ *  e1000_read_nvm_spt - NVM access for SPT
+ *  @hw: pointer to the HW structure
+ *  @offset: The offset (in bytes) of the word(s) to read.
+ *  @words: Size of data to read in words.
+ *  @data: pointer to the word(s) to read at offset.
+ *
+ *  Reads a word(s) from the NVM
+ **/
+STATIC s32 e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words,
+                             u16 *data)
+{
+       struct e1000_nvm_info *nvm = &hw->nvm;
+       struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+       u32 act_offset;
+       s32 ret_val = E1000_SUCCESS;
+       u32 bank = 0;
+       u32 dword = 0;
+       u16 offset_to_read;
+       u16 i;
+
+       DEBUGFUNC("e1000_read_nvm_spt");
+
+       if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) ||
+           (words == 0)) {
+               DEBUGOUT("nvm parameter(s) out of bounds\n");
+               ret_val = -E1000_ERR_NVM;
+               goto out;
+       }
+
+       nvm->ops.acquire(hw);
+
+       ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank);
+       if (ret_val != E1000_SUCCESS) {
+               DEBUGOUT("Could not detect valid bank, assuming bank 0\n");
+               bank = 0;
+       }
+
+       act_offset = (bank) ? nvm->flash_bank_size : 0;
+       act_offset += offset;
+
+       ret_val = E1000_SUCCESS;
+
+       for (i = 0; i < words; i += 2) {
+               if (words - i == 1) {
+                       if (dev_spec->shadow_ram[offset+i].modified) {
+                               data[i] = dev_spec->shadow_ram[offset+i].value;
+                       } else {
+                               offset_to_read = act_offset + i -
+                                                ((act_offset + i) % 2);
+                               ret_val =
+                                  e1000_read_flash_dword_ich8lan(hw,
+                                                                offset_to_read,
+                                                                &dword);
+                               if (ret_val)
+                                       break;
+                               if ((act_offset + i) % 2 == 0)
+                                       data[i] = (u16)(dword & 0xFFFF);
+                               else
+                                       data[i] = (u16)((dword >> 16) & 0xFFFF);
+                       }
+               } else {
+                       offset_to_read = act_offset + i;
+                       if (!(dev_spec->shadow_ram[offset+i].modified) ||
+                           !(dev_spec->shadow_ram[offset+i+1].modified)) {
+                               ret_val =
+                                  e1000_read_flash_dword_ich8lan(hw,
+                                                                offset_to_read,
+                                                                &dword);
+                               if (ret_val)
+                                       break;
+                       }
+                       if (dev_spec->shadow_ram[offset+i].modified)
+                               data[i] = dev_spec->shadow_ram[offset+i].value;
+                       else
+                               data[i] = (u16) (dword & 0xFFFF);
+                       if (dev_spec->shadow_ram[offset+i].modified)
+                               data[i+1] =
+                                  dev_spec->shadow_ram[offset+i+1].value;
+                       else
+                               data[i+1] = (u16) (dword >> 16 & 0xFFFF);
+               }
+       }
+
+       nvm->ops.release(hw);
+
+out:
+       if (ret_val)
+               DEBUGOUT1("NVM read error: %d\n", ret_val);
+
+       return ret_val;
+}
+
 /**
  *  e1000_read_nvm_ich8lan - Read word(s) from the NVM
  *  @hw: pointer to the HW structure
@@ -3337,7 +3643,11 @@ STATIC s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
        /* Clear FCERR and DAEL in hw status by writing 1 */
        hsfsts.hsf_status.flcerr = 1;
        hsfsts.hsf_status.dael = 1;
-       E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval);
+       if (hw->mac.type >= e1000_pch_spt)
+               E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+                                     hsfsts.regval & 0xFFFF);
+       else
+               E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval);
 
        /* Either we should have a hardware SPI cycle in progress
         * bit to check against, in order to start a new cycle or
@@ -3353,7 +3663,12 @@ STATIC s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
                 * Begin by setting Flash Cycle Done.
                 */
                hsfsts.hsf_status.flcdone = 1;
-               E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval);
+               if (hw->mac.type >= e1000_pch_spt)
+                       E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+                                             hsfsts.regval & 0xFFFF);
+               else
+                       E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS,
+                                               hsfsts.regval);
                ret_val = E1000_SUCCESS;
        } else {
                s32 i;
@@ -3375,8 +3690,12 @@ STATIC s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
                         * now set the Flash Cycle Done.
                         */
                        hsfsts.hsf_status.flcdone = 1;
-                       E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS,
-                                               hsfsts.regval);
+                       if (hw->mac.type >= e1000_pch_spt)
+                               E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+                                                     hsfsts.regval & 0xFFFF);
+                       else
+                               E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS,
+                                                       hsfsts.regval);
                } else {
                        DEBUGOUT("Flash controller busy, cannot get access\n");
                }
@@ -3401,10 +3720,17 @@ STATIC s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout)
        DEBUGFUNC("e1000_flash_cycle_ich8lan");
 
        /* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */
-       hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
+       if (hw->mac.type >= e1000_pch_spt)
+               hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16;
+       else
+               hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
        hsflctl.hsf_ctrl.flcgo = 1;
 
-       E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval);
+       if (hw->mac.type >= e1000_pch_spt)
+               E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+                                     hsflctl.regval << 16);
+       else
+               E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval);
 
        /* wait till FDONE bit is set to 1 */
        do {
@@ -3420,6 +3746,29 @@ STATIC s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout)
        return -E1000_ERR_NVM;
 }
 
+/**
+ *  e1000_read_flash_dword_ich8lan - Read dword from flash
+ *  @hw: pointer to the HW structure
+ *  @offset: offset to data location
+ *  @data: pointer to the location for storing the data
+ *
+ *  Reads the flash dword at offset into data.  Offset is converted
+ *  to bytes before read.
+ **/
+STATIC s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw, u32 offset,
+                                         u32 *data)
+{
+       DEBUGFUNC("e1000_read_flash_dword_ich8lan");
+
+       if (!data)
+               return -E1000_ERR_NVM;
+
+       /* Must convert word offset into bytes. */
+       offset <<= 1;
+
+       return e1000_read_flash_data32_ich8lan(hw, offset, data);
+}
+
 /**
  *  e1000_read_flash_word_ich8lan - Read word from flash
  *  @hw: pointer to the HW structure
@@ -3457,7 +3806,13 @@ STATIC s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset,
        s32 ret_val;
        u16 word = 0;
 
-       ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word);
+       /* In SPT, only 32 bits access is supported,
+        * so this function should not be called.
+        */
+       if (hw->mac.type >= e1000_pch_spt)
+               return -E1000_ERR_NVM;
+       else
+               ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word);
 
        if (ret_val)
                return ret_val;
@@ -3543,6 +3898,83 @@ STATIC s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
        return ret_val;
 }
 
+/**
+ *  e1000_read_flash_data32_ich8lan - Read dword from NVM
+ *  @hw: pointer to the HW structure
+ *  @offset: The offset (in bytes) of the dword to read.
+ *  @data: Pointer to the dword to store the value read.
+ *
+ *  Reads a byte or word from the NVM using the flash access registers.
+ **/
+STATIC s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset,
+                                          u32 *data)
+{
+       union ich8_hws_flash_status hsfsts;
+       union ich8_hws_flash_ctrl hsflctl;
+       u32 flash_linear_addr;
+       s32 ret_val = -E1000_ERR_NVM;
+       u8 count = 0;
+
+       DEBUGFUNC("e1000_read_flash_data_ich8lan");
+
+               if (offset > ICH_FLASH_LINEAR_ADDR_MASK ||
+                   hw->mac.type < e1000_pch_spt)
+                       return -E1000_ERR_NVM;
+       flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) +
+                            hw->nvm.flash_base_addr);
+
+       do {
+               usec_delay(1);
+               /* Steps */
+               ret_val = e1000_flash_cycle_init_ich8lan(hw);
+               if (ret_val != E1000_SUCCESS)
+                       break;
+               /* In SPT, This register is in Lan memory space, not flash.
+                * Therefore, only 32 bit access is supported
+                */
+               hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16;
+
+               /* 0b/1b corresponds to 1 or 2 byte size, respectively. */
+               hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1;
+               hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ;
+               /* In SPT, This register is in Lan memory space, not flash.
+                * Therefore, only 32 bit access is supported
+                */
+               E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+                                     (u32)hsflctl.regval << 16);
+               E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr);
+
+               ret_val = e1000_flash_cycle_ich8lan(hw,
+                                               ICH_FLASH_READ_COMMAND_TIMEOUT);
+
+               /* Check if FCERR is set to 1, if set to 1, clear it
+                * and try the whole sequence a few more times, else
+                * read in (shift in) the Flash Data0, the order is
+                * least significant byte first msb to lsb
+                */
+               if (ret_val == E1000_SUCCESS) {
+                       *data = E1000_READ_FLASH_REG(hw, ICH_FLASH_FDATA0);
+                       break;
+               } else {
+                       /* If we've gotten here, then things are probably
+                        * completely hosed, but if the error condition is
+                        * detected, it won't hurt to give it another try...
+                        * ICH_FLASH_CYCLE_REPEAT_COUNT times.
+                        */
+                       hsfsts.regval = E1000_READ_FLASH_REG16(hw,
+                                                             ICH_FLASH_HSFSTS);
+                       if (hsfsts.hsf_status.flcerr) {
+                               /* Repeat for some time before giving up. */
+                               continue;
+                       } else if (!hsfsts.hsf_status.flcdone) {
+                               DEBUGOUT("Timeout error - flash cycle did not complete.\n");
+                               break;
+                       }
+               }
+       } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT);
+
+       return ret_val;
+}
 
 /**
  *  e1000_write_nvm_ich8lan - Write word(s) to the NVM
@@ -3580,6 +4012,175 @@ STATIC s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words,
        return E1000_SUCCESS;
 }
 
+/**
+ *  e1000_update_nvm_checksum_spt - Update the checksum for NVM
+ *  @hw: pointer to the HW structure
+ *
+ *  The NVM checksum is updated by calling the generic update_nvm_checksum,
+ *  which writes the checksum to the shadow ram.  The changes in the shadow
+ *  ram are then committed to the EEPROM by processing each bank at a time
+ *  checking for the modified bit and writing only the pending changes.
+ *  After a successful commit, the shadow ram is cleared and is ready for
+ *  future writes.
+ **/
+STATIC s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw)
+{
+       struct e1000_nvm_info *nvm = &hw->nvm;
+       struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+       u32 i, act_offset, new_bank_offset, old_bank_offset, bank;
+       s32 ret_val;
+       u32 dword = 0;
+
+       DEBUGFUNC("e1000_update_nvm_checksum_spt");
+
+       ret_val = e1000_update_nvm_checksum_generic(hw);
+       if (ret_val)
+               goto out;
+
+       if (nvm->type != e1000_nvm_flash_sw)
+               goto out;
+
+       nvm->ops.acquire(hw);
+
+       /* We're writing to the opposite bank so if we're on bank 1,
+        * write to bank 0 etc.  We also need to erase the segment that
+        * is going to be written
+        */
+       ret_val =  e1000_valid_nvm_bank_detect_ich8lan(hw, &bank);
+       if (ret_val != E1000_SUCCESS) {
+               DEBUGOUT("Could not detect valid bank, assuming bank 0\n");
+               bank = 0;
+       }
+
+       if (bank == 0) {
+               new_bank_offset = nvm->flash_bank_size;
+               old_bank_offset = 0;
+               ret_val = e1000_erase_flash_bank_ich8lan(hw, 1);
+               if (ret_val)
+                       goto release;
+       } else {
+               old_bank_offset = nvm->flash_bank_size;
+               new_bank_offset = 0;
+               ret_val = e1000_erase_flash_bank_ich8lan(hw, 0);
+               if (ret_val)
+                       goto release;
+       }
+       for (i = 0; i < E1000_SHADOW_RAM_WORDS; i += 2) {
+               /* Determine whether to write the value stored
+                * in the other NVM bank or a modified value stored
+                * in the shadow RAM
+                */
+               ret_val = e1000_read_flash_dword_ich8lan(hw,
+                                                        i + old_bank_offset,
+                                                        &dword);
+
+               if (dev_spec->shadow_ram[i].modified) {
+                       dword &= 0xffff0000;
+                       dword |= (dev_spec->shadow_ram[i].value & 0xffff);
+               }
+               if (dev_spec->shadow_ram[i + 1].modified) {
+                       dword &= 0x0000ffff;
+                       dword |= ((dev_spec->shadow_ram[i + 1].value & 0xffff)
+                                 << 16);
+               }
+               if (ret_val)
+                       break;
+
+               /* If the word is 0x13, then make sure the signature bits
+                * (15:14) are 11b until the commit has completed.
+                * This will allow us to write 10b which indicates the
+                * signature is valid.  We want to do this after the write
+                * has completed so that we don't mark the segment valid
+                * while the write is still in progress
+                */
+               if (i == E1000_ICH_NVM_SIG_WORD - 1)
+                       dword |= E1000_ICH_NVM_SIG_MASK << 16;
+
+               /* Convert offset to bytes. */
+               act_offset = (i + new_bank_offset) << 1;
+
+               usec_delay(100);
+
+               /* Write the data to the new bank. Offset in words*/
+               act_offset = i + new_bank_offset;
+               ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset,
+                                                               dword);
+               if (ret_val)
+                       break;
+        }
+
+       /* Don't bother writing the segment valid bits if sector
+        * programming failed.
+        */
+       if (ret_val) {
+               DEBUGOUT("Flash commit failed.\n");
+               goto release;
+       }
+
+       /* Finally validate the new segment by setting bit 15:14
+        * to 10b in word 0x13 , this can be done without an
+        * erase as well since these bits are 11 to start with
+        * and we need to change bit 14 to 0b
+        */
+       act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD;
+
+       /*offset in words but we read dword*/
+       --act_offset;
+       ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword);
+
+       if (ret_val)
+               goto release;
+
+       dword &= 0xBFFFFFFF;
+       ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword);
+
+       if (ret_val)
+               goto release;
+
+       /* And invalidate the previously valid segment by setting
+        * its signature word (0x13) high_byte to 0b. This can be
+        * done without an erase because flash erase sets all bits
+        * to 1's. We can write 1's to 0's without an erase
+        */
+       act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1;
+
+       /* offset in words but we read dword*/
+       act_offset = old_bank_offset + E1000_ICH_NVM_SIG_WORD - 1;
+       ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword);
+
+       if (ret_val)
+               goto release;
+
+       dword &= 0x00FFFFFF;
+       ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword);
+
+       if (ret_val)
+               goto release;
+
+       /* Great!  Everything worked, we can now clear the cached entries. */
+       for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) {
+               dev_spec->shadow_ram[i].modified = false;
+               dev_spec->shadow_ram[i].value = 0xFFFF;
+       }
+
+release:
+       nvm->ops.release(hw);
+
+       /* Reload the EEPROM, or else modifications will not appear
+        * until after the next adapter reset.
+        */
+       if (!ret_val) {
+               nvm->ops.reload(hw);
+               msec_delay(10);
+       }
+
+out:
+       if (ret_val)
+               DEBUGOUT1("NVM update error: %d\n", ret_val);
+
+       return ret_val;
+}
+
 /**
  *  e1000_update_nvm_checksum_ich8lan - Update the checksum for NVM
  *  @hw: pointer to the HW structure
@@ -3757,6 +4358,8 @@ STATIC s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
         */
        switch (hw->mac.type) {
        case e1000_pch_lpt:
+       case e1000_pch_spt:
+       case e1000_pch_cnp:
                word = NVM_COMPAT;
                valid_csum_mask = NVM_COMPAT_VALID_CSUM;
                break;
@@ -3804,8 +4407,13 @@ STATIC s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
 
        DEBUGFUNC("e1000_write_ich8_data");
 
-       if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK)
-               return -E1000_ERR_NVM;
+       if (hw->mac.type >= e1000_pch_spt) {
+               if (size != 4 || offset > ICH_FLASH_LINEAR_ADDR_MASK)
+                       return -E1000_ERR_NVM;
+       } else {
+               if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK)
+                       return -E1000_ERR_NVM;
+       }
 
        flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) +
                             hw->nvm.flash_base_addr);
@@ -3816,12 +4424,29 @@ STATIC s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
                ret_val = e1000_flash_cycle_init_ich8lan(hw);
                if (ret_val != E1000_SUCCESS)
                        break;
-               hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
+               /* In SPT, This register is in Lan memory space, not
+                * flash.  Therefore, only 32 bit access is supported
+                */
+               if (hw->mac.type >= e1000_pch_spt)
+                       hsflctl.regval =
+                           E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16;
+               else
+                       hsflctl.regval =
+                           E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
 
                /* 0b/1b corresponds to 1 or 2 byte size, respectively. */
                hsflctl.hsf_ctrl.fldbcount = size - 1;
                hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE;
-               E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval);
+               /* In SPT, This register is in Lan memory space,
+                * not flash.  Therefore, only 32 bit access is
+                * supported
+                */
+               if (hw->mac.type >= e1000_pch_spt)
+                       E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+                                             hsflctl.regval << 16);
+               else
+                       E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL,
+                                               hsflctl.regval);
 
                E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr);
 
@@ -3859,6 +4484,94 @@ STATIC s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
        return ret_val;
 }
 
+/**
+*  e1000_write_flash_data32_ich8lan - Writes 4 bytes to the NVM
+*  @hw: pointer to the HW structure
+*  @offset: The offset (in bytes) of the dwords to read.
+*  @data: The 4 bytes to write to the NVM.
+*
+*  Writes one/two/four bytes to the NVM using the flash access registers.
+**/
+STATIC s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset,
+                                           u32 data)
+{
+       union ich8_hws_flash_status hsfsts;
+       union ich8_hws_flash_ctrl hsflctl;
+       u32 flash_linear_addr;
+       s32 ret_val;
+       u8 count = 0;
+
+       DEBUGFUNC("e1000_write_flash_data32_ich8lan");
+
+       if (hw->mac.type >= e1000_pch_spt) {
+               if (offset > ICH_FLASH_LINEAR_ADDR_MASK)
+                       return -E1000_ERR_NVM;
+       }
+       flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) +
+                            hw->nvm.flash_base_addr);
+       do {
+               usec_delay(1);
+               /* Steps */
+               ret_val = e1000_flash_cycle_init_ich8lan(hw);
+               if (ret_val != E1000_SUCCESS)
+                       break;
+
+               /* In SPT, This register is in Lan memory space, not
+                * flash.  Therefore, only 32 bit access is supported
+                */
+               if (hw->mac.type >= e1000_pch_spt)
+                       hsflctl.regval = E1000_READ_FLASH_REG(hw,
+                                                             ICH_FLASH_HSFSTS)
+                                        >> 16;
+               else
+                       hsflctl.regval = E1000_READ_FLASH_REG16(hw,
+                                                             ICH_FLASH_HSFCTL);
+
+               hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1;
+               hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE;
+
+               /* In SPT, This register is in Lan memory space,
+                * not flash.  Therefore, only 32 bit access is
+                * supported
+                */
+               if (hw->mac.type >= e1000_pch_spt)
+                       E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+                                             hsflctl.regval << 16);
+               else
+                       E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL,
+                                               hsflctl.regval);
+
+               E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr);
+
+               E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FDATA0, data);
+
+               /* check if FCERR is set to 1 , if set to 1, clear it
+                * and try the whole sequence a few more times else done
+                */
+               ret_val = e1000_flash_cycle_ich8lan(hw,
+                                              ICH_FLASH_WRITE_COMMAND_TIMEOUT);
+
+               if (ret_val == E1000_SUCCESS)
+                       break;
+
+               /* If we're here, then things are most likely
+                * completely hosed, but if the error condition
+                * is detected, it won't hurt to give it another
+                * try...ICH_FLASH_CYCLE_REPEAT_COUNT times.
+                */
+               hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS);
+
+               if (hsfsts.hsf_status.flcerr)
+                       /* Repeat for some time before giving up. */
+                       continue;
+               if (!hsfsts.hsf_status.flcdone) {
+                       DEBUGOUT("Timeout error - flash cycle did not complete.\n");
+                       break;
+               }
+       } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT);
+
+       return ret_val;
+}
 
 /**
  *  e1000_write_flash_byte_ich8lan - Write a single byte to NVM
@@ -3878,7 +4591,42 @@ STATIC s32 e1000_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset,
        return e1000_write_flash_data_ich8lan(hw, offset, 1, word);
 }
 
+/**
+*  e1000_retry_write_flash_dword_ich8lan - Writes a dword to NVM
+*  @hw: pointer to the HW structure
+*  @offset: The offset of the word to write.
+*  @dword: The dword to write to the NVM.
+*
+*  Writes a single dword to the NVM using the flash access registers.
+*  Goes through a retry algorithm before giving up.
+**/
+STATIC s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw,
+                                                u32 offset, u32 dword)
+{
+       s32 ret_val;
+       u16 program_retries;
 
+       DEBUGFUNC("e1000_retry_write_flash_dword_ich8lan");
+
+       /* Must convert word offset into bytes. */
+       offset <<= 1;
+
+       ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword);
+
+       if (!ret_val)
+               return ret_val;
+       for (program_retries = 0; program_retries < 100; program_retries++) {
+               DEBUGOUT2("Retrying Byte %8.8X at offset %u\n", dword, offset);
+               usec_delay(100);
+               ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword);
+               if (ret_val == E1000_SUCCESS)
+                       break;
+       }
+       if (program_retries == 100)
+               return -E1000_ERR_NVM;
+
+       return E1000_SUCCESS;
+}
 
 /**
  *  e1000_retry_write_flash_byte_ich8lan - Writes a single byte to NVM
@@ -3988,12 +4736,22 @@ STATIC s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank)
                        /* Write a value 11 (block Erase) in Flash
                         * Cycle field in hw flash control
                         */
-                       hsflctl.regval =
-                           E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
+                       if (hw->mac.type >= e1000_pch_spt)
+                               hsflctl.regval =
+                                   E1000_READ_FLASH_REG(hw,
+                                                        ICH_FLASH_HSFSTS)>>16;
+                       else
+                               hsflctl.regval =
+                                   E1000_READ_FLASH_REG16(hw,
+                                                          ICH_FLASH_HSFCTL);
 
                        hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE;
-                       E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL,
-                                               hsflctl.regval);
+                       if (hw->mac.type >= e1000_pch_spt)
+                               E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+                                                     hsflctl.regval << 16);
+                       else
+                               E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL,
+                                                       hsflctl.regval);
 
                        /* Write the last 24 bits of an index within the
                         * block into Flash Linear address field in Flash
@@ -4426,7 +5184,7 @@ STATIC void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw)
        E1000_WRITE_REG(hw, E1000_RFCTL, reg);
 
        /* Enable ECC on Lynxpoint */
-       if (hw->mac.type == e1000_pch_lpt) {
+       if (hw->mac.type >= e1000_pch_lpt) {
                reg = E1000_READ_REG(hw, E1000_PBECCSTS);
                reg |= E1000_PBECCSTS_ECC_ENABLE;
                E1000_WRITE_REG(hw, E1000_PBECCSTS, reg);
@@ -4858,7 +5616,8 @@ void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw)
                if ((device_id == E1000_DEV_ID_PCH_LPTLP_I218_LM) ||
                    (device_id == E1000_DEV_ID_PCH_LPTLP_I218_V) ||
                    (device_id == E1000_DEV_ID_PCH_I218_LM3) ||
-                   (device_id == E1000_DEV_ID_PCH_I218_V3)) {
+                   (device_id == E1000_DEV_ID_PCH_I218_V3) ||
+                   (hw->mac.type >= e1000_pch_spt)) {
                        u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6);
 
                        E1000_WRITE_REG(hw, E1000_FEXTNVM6,
index 33e77fb..bc4ed1d 100644 (file)
@@ -121,6 +121,18 @@ POSSIBILITY OF SUCH DAMAGE.
 #if !defined(EXTERNAL_RELEASE) || defined(ULP_SUPPORT)
 #define E1000_FEXTNVM7_DISABLE_SMB_PERST       0x00000020
 #endif /* !EXTERNAL_RELEASE || ULP_SUPPORT */
+#define E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS      0x00000800
+#define E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS       0x00001000
+#define E1000_FEXTNVM11_DISABLE_PB_READ                0x00000200
+#define E1000_FEXTNVM11_DISABLE_MULR_FIX       0x00002000
+
+/* bit24: RXDCTL thresholds granularity: 0 - cache lines, 1 - descriptors */
+#define E1000_RXDCTL_THRESH_UNIT_DESC  0x01000000
+
+#define NVM_SIZE_MULTIPLIER 4096  /*multiplier for NVMS field*/
+#define E1000_FLASH_BASE_ADDR 0xE000 /*offset of NVM access regs*/
+#define E1000_CTRL_EXT_NVMVS 0x3 /*NVM valid sector */
+#define E1000_TARC0_CB_MULTIQ_3_REQ    (1 << 28 | 1 << 29)
 #define PCIE_ICH8_SNOOP_ALL    PCIE_NO_SNOOP_ALL
 
 #define E1000_ICH_RAR_ENTRIES  7
@@ -198,6 +210,10 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I218_ULP_CONFIG1_INBAND_EXIT   0x0020 /* Inband on ULP exit */
 #define I218_ULP_CONFIG1_WOL_HOST      0x0040 /* WoL Host on ULP exit */
 #define I218_ULP_CONFIG1_RESET_TO_SMBUS        0x0100 /* Reset to SMBus mode */
+/* enable ULP even if when phy powered down via lanphypc */
+#define I218_ULP_CONFIG1_EN_ULP_LANPHYPC       0x0400
+/* disable clear of sticky ULP on PERST */
+#define I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST       0x0800
 #define I218_ULP_CONFIG1_DISABLE_SMB_PERST     0x1000 /* Disable on PERST# */
 
 #endif /* !EXTERNAL_RELEASE || ULP_SUPPORT */
@@ -234,9 +250,12 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* PHY Power Management Control */
 #define HV_PM_CTRL             PHY_REG(770, 17)
-#define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA 0x100
+#define HV_PM_CTRL_K1_CLK_REQ          0x200
 #define HV_PM_CTRL_K1_ENABLE           0x4000
 
+#define I217_PLL_CLOCK_GATE_REG        PHY_REG(772, 28)
+#define I217_PLL_CLOCK_GATE_MASK       0x07FF
+
 #define SW_FLAG_TIMEOUT                1000 /* SW Semaphore flag timeout in ms */
 
 /* Inband Control */
index 6daf16b..a92fd22 100644 (file)
@@ -430,15 +430,21 @@ STATIC s32 e1000_check_for_rst_vf(struct e1000_hw *hw,
 STATIC s32 e1000_obtain_mbx_lock_vf(struct e1000_hw *hw)
 {
        s32 ret_val = -E1000_ERR_MBX;
+       int count = 10;
 
        DEBUGFUNC("e1000_obtain_mbx_lock_vf");
 
-       /* Take ownership of the buffer */
-       E1000_WRITE_REG(hw, E1000_V2PMAILBOX(0), E1000_V2PMAILBOX_VFU);
+       do {
+               /* Take ownership of the buffer */
+               E1000_WRITE_REG(hw, E1000_V2PMAILBOX(0), E1000_V2PMAILBOX_VFU);
 
-       /* reserve mailbox for vf use */
-       if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU)
-               ret_val = E1000_SUCCESS;
+               /* reserve mailbox for vf use */
+               if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) {
+                       ret_val = E1000_SUCCESS;
+                       break;
+               }
+               usec_delay(1000);
+       } while (count-- > 0);
 
        return ret_val;
 }
@@ -645,18 +651,26 @@ STATIC s32 e1000_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number)
 {
        s32 ret_val = -E1000_ERR_MBX;
        u32 p2v_mailbox;
+       int count = 10;
 
        DEBUGFUNC("e1000_obtain_mbx_lock_pf");
 
-       /* Take ownership of the buffer */
-       E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU);
+       do {
+               /* Take ownership of the buffer */
+               E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number),
+                               E1000_P2VMAILBOX_PFU);
 
-       /* reserve mailbox for vf use */
-       p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number));
-       if (p2v_mailbox & E1000_P2VMAILBOX_PFU)
-               ret_val = E1000_SUCCESS;
+               /* reserve mailbox for pf use */
+               p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number));
+               if (p2v_mailbox & E1000_P2VMAILBOX_PFU) {
+                       ret_val = E1000_SUCCESS;
+                       break;
+               }
+               usec_delay(1000);
+       } while (count-- > 0);
 
        return ret_val;
+
 }
 
 /**
index 762acd1..75c2282 100644 (file)
@@ -1295,6 +1295,7 @@ void e1000_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
        case e1000_82575:
        case e1000_82576:
        case e1000_82580:
+       case e1000_i354:
                hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test);
                /* Use this format, unless EETRACK ID exists,
                 * then use alternate format
index 47a1948..b886804 100644 (file)
@@ -44,6 +44,7 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_byteorder.h>
+#include <rte_io.h>
 
 #include "../e1000_logs.h"
 
@@ -94,17 +95,18 @@ typedef int         bool;
 
 #define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS)
 
-#define E1000_PCI_REG(reg) (*((volatile uint32_t *)(reg)))
+#define E1000_PCI_REG(reg)     rte_read32(reg)
 
-#define E1000_PCI_REG16(reg) (*((volatile uint16_t *)(reg)))
+#define E1000_PCI_REG16(reg)   rte_read16(reg)
 
-#define E1000_PCI_REG_WRITE(reg, value) do { \
-       E1000_PCI_REG((reg)) = (rte_cpu_to_le_32(value)); \
-} while (0)
+#define E1000_PCI_REG_WRITE(reg, value)                        \
+       rte_write32((rte_cpu_to_le_32(value)), reg)
 
-#define E1000_PCI_REG_WRITE16(reg, value) do { \
-       E1000_PCI_REG16((reg)) = (rte_cpu_to_le_16(value)); \
-} while (0)
+#define E1000_PCI_REG_WRITE_RELAXED(reg, value)                \
+       rte_write32_relaxed((rte_cpu_to_le_32(value)), reg)
+
+#define E1000_PCI_REG_WRITE16(reg, value)              \
+       rte_write16((rte_cpu_to_le_16(value)), reg)
 
 #define E1000_PCI_REG_ADDR(hw, reg) \
        ((volatile uint32_t *)((char *)(hw)->hw_addr + (reg)))
index 84531a9..364a726 100644 (file)
@@ -66,6 +66,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #define E1000_FEXTNVM4 0x00024  /* Future Extended NVM 4 - RW */
 #define E1000_FEXTNVM6 0x00010  /* Future Extended NVM 6 - RW */
 #define E1000_FEXTNVM7 0x000E4  /* Future Extended NVM 7 - RW */
+#define E1000_FEXTNVM9 0x5BB4  /* Future Extended NVM 9 - RW */
+#define E1000_FEXTNVM11        0x5BBC  /* Future Extended NVM 11 - RW */
 #define E1000_PCIEANACFG       0x00F18 /* PCIE Analog Config */
 #define E1000_FCT      0x00030  /* Flow Control Type - RW */
 #define E1000_CONNSW   0x00034  /* Copper/Fiber switch control - RW */
@@ -109,6 +111,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define E1000_PBA      0x01000  /* Packet Buffer Allocation - RW */
 #define E1000_PBS      0x01008  /* Packet Buffer Size */
 #define E1000_PBECCSTS 0x0100C  /* Packet Buffer ECC Status - RW */
+#define E1000_IOSFPC   0x00F28  /* TX corrupted data  */
 #define E1000_EEMNGCTL 0x01010  /* MNG EEprom Control */
 #define E1000_EEMNGCTL_I210    0x01010  /* i210 MNG EEprom Mode Control */
 #define E1000_EEARBC   0x01024  /* EEPROM Auto Read Bus Control */
@@ -591,6 +594,10 @@ POSSIBILITY OF SUCH DAMAGE.
 #define E1000_TIMADJL  0x0B60C /* Time sync time adjustment offset Low - RW */
 #define E1000_TIMADJH  0x0B610 /* Time sync time adjustment offset High - RW */
 #define E1000_TSAUXC   0x0B640 /* Timesync Auxiliary Control register */
+#define        E1000_SYSSTMPL  0x0B648 /* HH Timesync system stamp low register */
+#define        E1000_SYSSTMPH  0x0B64C /* HH Timesync system stamp hi register */
+#define        E1000_PLTSTMPL  0x0B640 /* HH Timesync platform stamp low register */
+#define        E1000_PLTSTMPH  0x0B644 /* HH Timesync platform stamp hi register */
 #define E1000_SYSTIMR  0x0B6F8 /* System time register Residue */
 #define E1000_TSICR    0x0B66C /* Interrupt Cause Register */
 #define E1000_TSIM     0x0B674 /* Interrupt Mask Register */
index 6c25c8d..81a6dbb 100644 (file)
 #define E1000_MISC_VEC_ID               RTE_INTR_VEC_ZERO_OFFSET
 #define E1000_RX_VEC_START              RTE_INTR_VEC_RXTX_OFFSET
 
+#define IGB_TX_MAX_SEG     UINT8_MAX
+#define IGB_TX_MAX_MTU_SEG UINT8_MAX
+#define EM_TX_MAX_SEG      UINT8_MAX
+#define EM_TX_MAX_MTU_SEG  UINT8_MAX
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
        uint32_t flags;
@@ -286,6 +291,8 @@ struct e1000_adapter {
 #define E1000_DEV_PRIVATE_TO_FILTER_INFO(adapter) \
        (&((struct e1000_adapter *)adapter)->filter)
 
+#define E1000_DEV_TO_PCI(eth_dev) \
+       RTE_DEV_TO_PCI((eth_dev)->device)
 /*
  * RX/TX IGB function prototypes
  */
@@ -315,6 +322,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
                uint16_t nb_pkts);
 
+uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
                uint16_t nb_pkts);
 
@@ -376,6 +386,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                uint16_t nb_pkts);
 
+uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts);
+
 uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                uint16_t nb_pkts);
 
index ad104ed..d778785 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -83,7 +83,8 @@ static int eth_em_flow_ctrl_set(struct rte_eth_dev *dev,
 static int eth_em_interrupt_setup(struct rte_eth_dev *dev);
 static int eth_em_rxq_interrupt_setup(struct rte_eth_dev *dev);
 static int eth_em_interrupt_get_status(struct rte_eth_dev *dev);
-static int eth_em_interrupt_action(struct rte_eth_dev *dev);
+static int eth_em_interrupt_action(struct rte_eth_dev *dev,
+                                  struct rte_intr_handle *handle);
 static void eth_em_interrupt_handler(struct rte_intr_handle *handle,
                                                        void *param);
 
@@ -168,6 +169,19 @@ static const struct rte_pci_id pci_id_em_map[] = {
        { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_I218_V2) },
        { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_I218_LM3) },
        { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_I218_V3) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_LM) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_V) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_LM2) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_V2) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_LBG_I219_LM3) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_LM4) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_V4) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_LM5) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_V5) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_CNP_I219_LM6) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_CNP_I219_V6) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_CNP_I219_LM7) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_CNP_I219_V7) },
        { .vendor_id = 0, /* sentinel */ },
 };
 
@@ -278,6 +292,19 @@ eth_em_dev_is_ich8(struct e1000_hw *hw)
        case E1000_DEV_ID_PCH_I218_LM2:
        case E1000_DEV_ID_PCH_I218_V3:
        case E1000_DEV_ID_PCH_I218_LM3:
+       case E1000_DEV_ID_PCH_SPT_I219_LM:
+       case E1000_DEV_ID_PCH_SPT_I219_V:
+       case E1000_DEV_ID_PCH_SPT_I219_LM2:
+       case E1000_DEV_ID_PCH_SPT_I219_V2:
+       case E1000_DEV_ID_PCH_LBG_I219_LM3:
+       case E1000_DEV_ID_PCH_SPT_I219_LM4:
+       case E1000_DEV_ID_PCH_SPT_I219_V4:
+       case E1000_DEV_ID_PCH_SPT_I219_LM5:
+       case E1000_DEV_ID_PCH_SPT_I219_V5:
+       case E1000_DEV_ID_PCH_CNP_I219_LM6:
+       case E1000_DEV_ID_PCH_CNP_I219_V6:
+       case E1000_DEV_ID_PCH_CNP_I219_LM7:
+       case E1000_DEV_ID_PCH_CNP_I219_V7:
                return 1;
        default:
                return 0;
@@ -287,7 +314,8 @@ eth_em_dev_is_ich8(struct e1000_hw *hw)
 static int
 eth_em_dev_init(struct rte_eth_dev *eth_dev)
 {
-       struct rte_pci_device *pci_dev;
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct e1000_adapter *adapter =
                E1000_DEV_PRIVATE(eth_dev->data->dev_private);
        struct e1000_hw *hw =
@@ -295,11 +323,10 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
        struct e1000_vfta * shadow_vfta =
                E1000_DEV_PRIVATE_TO_VFTA(eth_dev->data->dev_private);
 
-       pci_dev = eth_dev->pci_dev;
-
        eth_dev->dev_ops = &eth_em_ops;
        eth_dev->rx_pkt_burst = (eth_rx_burst_t)&eth_em_recv_pkts;
        eth_dev->tx_pkt_burst = (eth_tx_burst_t)&eth_em_xmit_pkts;
+       eth_dev->tx_pkt_prepare = (eth_tx_prep_t)&eth_em_prep_pkts;
 
        /* for secondary processes, we don't initialise any further as primary
         * has already done this work. Only check we don't need a different
@@ -312,6 +339,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
        }
 
        rte_eth_copy_pci_info(eth_dev, pci_dev);
+       eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 
        hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
        hw->device_id = pci_dev->id.device_id;
@@ -351,8 +379,8 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
                     eth_dev->data->port_id, pci_dev->id.vendor_id,
                     pci_dev->id.device_id);
 
-       rte_intr_callback_register(&(pci_dev->intr_handle),
-               eth_em_interrupt_handler, (void *)eth_dev);
+       rte_intr_callback_register(intr_handle,
+                                  eth_em_interrupt_handler, eth_dev);
 
        return 0;
 }
@@ -360,17 +388,16 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
 static int
 eth_em_dev_uninit(struct rte_eth_dev *eth_dev)
 {
-       struct rte_pci_device *pci_dev;
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev);
        struct e1000_adapter *adapter =
                E1000_DEV_PRIVATE(eth_dev->data->dev_private);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        PMD_INIT_FUNC_TRACE();
 
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
                return -EPERM;
 
-       pci_dev = eth_dev->pci_dev;
-
        if (adapter->stopped == 0)
                eth_em_close(eth_dev);
 
@@ -382,32 +409,25 @@ eth_em_dev_uninit(struct rte_eth_dev *eth_dev)
        eth_dev->data->mac_addrs = NULL;
 
        /* disable uio intr before callback unregister */
-       rte_intr_disable(&(pci_dev->intr_handle));
-       rte_intr_callback_unregister(&(pci_dev->intr_handle),
-               eth_em_interrupt_handler, (void *)eth_dev);
+       rte_intr_disable(intr_handle);
+       rte_intr_callback_unregister(intr_handle,
+                                    eth_em_interrupt_handler, eth_dev);
 
        return 0;
 }
 
 static struct eth_driver rte_em_pmd = {
        .pci_drv = {
-               .name = "rte_em_pmd",
                .id_table = pci_id_em_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
-                       RTE_PCI_DRV_DETACHABLE,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_em_dev_init,
        .eth_dev_uninit = eth_em_dev_uninit,
        .dev_private_size = sizeof(struct e1000_adapter),
 };
 
-static int
-rte_em_pmd_init(const char *name __rte_unused, const char *params __rte_unused)
-{
-       rte_eth_driver_register(&rte_em_pmd);
-       return 0;
-}
-
 static int
 em_hw_init(struct e1000_hw *hw)
 {
@@ -546,6 +566,8 @@ em_set_pba(struct e1000_hw *hw)
                case e1000_pchlan:
                case e1000_pch2lan:
                case e1000_pch_lpt:
+               case e1000_pch_spt:
+               case e1000_pch_cnp:
                        pba = E1000_PBA_26K;
                        break;
                default:
@@ -562,7 +584,9 @@ eth_em_start(struct rte_eth_dev *dev)
                E1000_DEV_PRIVATE(dev->data->dev_private);
        struct e1000_hw *hw =
                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev =
+               E1000_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        int ret, mask;
        uint32_t intr_vector = 0;
        uint32_t *speeds;
@@ -615,7 +639,7 @@ eth_em_start(struct rte_eth_dev *dev)
                                        dev->data->nb_rx_queues * sizeof(int), 0);
                if (intr_handle->intr_vec == NULL) {
                        PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
-                                               " intr_vec\n", dev->data->nb_rx_queues);
+                                               " intr_vec", dev->data->nb_rx_queues);
                        return -ENOMEM;
                }
 
@@ -645,6 +669,7 @@ eth_em_start(struct rte_eth_dev *dev)
        speeds = &dev->data->dev_conf.link_speeds;
        if (*speeds == ETH_LINK_SPEED_AUTONEG) {
                hw->phy.autoneg_advertised = E1000_ALL_SPEED_DUPLEX;
+               hw->mac.autoneg = 1;
        } else {
                num_speeds = 0;
                autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0;
@@ -680,6 +705,17 @@ eth_em_start(struct rte_eth_dev *dev)
                }
                if (num_speeds == 0 || (!autoneg && (num_speeds > 1)))
                        goto error_invalid_config;
+
+               /* Set/reset the mac.autoneg based on the link speed,
+                * fixed or not
+                */
+               if (!autoneg) {
+                       hw->mac.autoneg = 0;
+                       hw->mac.forced_speed_duplex =
+                                       hw->phy.autoneg_advertised;
+               } else {
+                       hw->mac.autoneg = 1;
+               }
        }
 
        e1000_setup_link(hw);
@@ -700,7 +736,7 @@ eth_em_start(struct rte_eth_dev *dev)
                                                (void *)dev);
                if (dev->data->dev_conf.intr_conf.lsc != 0)
                        PMD_INIT_LOG(INFO, "lsc won't enable because of"
-                                    " no intr multiplex\n");
+                                    " no intr multiplexn");
        }
        /* check if rxq interrupt is enabled */
        if (dev->data->dev_conf.intr_conf.rxq != 0)
@@ -732,7 +768,8 @@ eth_em_stop(struct rte_eth_dev *dev)
 {
        struct rte_eth_link link;
        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        em_rxq_intr_disable(hw);
        em_lsc_intr_disable(hw);
@@ -847,7 +884,9 @@ em_hardware_init(struct e1000_hw *hw)
                hw->fc.low_water = 0x5048;
                hw->fc.pause_time = 0x0650;
                hw->fc.refresh_time = 0x0400;
-       } else if (hw->mac.type == e1000_pch_lpt) {
+       } else if (hw->mac.type == e1000_pch_lpt ||
+                  hw->mac.type == e1000_pch_spt ||
+                  hw->mac.type == e1000_pch_cnp) {
                hw->fc.requested_mode = e1000_fc_full;
        }
 
@@ -993,9 +1032,11 @@ static int
 eth_em_rx_queue_intr_enable(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id)
 {
        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        em_rxq_intr_enable(hw);
-       rte_intr_enable(&dev->pci_dev->intr_handle);
+       rte_intr_enable(intr_handle);
 
        return 0;
 }
@@ -1020,6 +1061,8 @@ em_get_max_pktlen(const struct e1000_hw *hw)
        case e1000_ich10lan:
        case e1000_pch2lan:
        case e1000_pch_lpt:
+       case e1000_pch_spt:
+       case e1000_pch_cnp:
        case e1000_82574:
        case e1000_80003es2lan: /* 9K Jumbo Frame size */
        case e1000_82583:
@@ -1039,6 +1082,7 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
+       dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device);
        dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */
        dev_info->max_rx_pktlen = em_get_max_pktlen(hw);
        dev_info->max_mac_addrs = hw->mac.rar_entry_count;
@@ -1073,6 +1117,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                .nb_max = E1000_MAX_RING_DESC,
                .nb_min = E1000_MIN_RING_DESC,
                .nb_align = EM_TXD_ALIGN,
+               .nb_seg_max = EM_TX_MAX_SEG,
+               .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG,
        };
 
        dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
@@ -1530,8 +1576,10 @@ eth_em_interrupt_get_status(struct rte_eth_dev *dev)
  *  - On failure, a negative value.
  */
 static int
-eth_em_interrupt_action(struct rte_eth_dev *dev)
+eth_em_interrupt_action(struct rte_eth_dev *dev,
+                       struct rte_intr_handle *intr_handle)
 {
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev);
        struct e1000_hw *hw =
                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct e1000_interrupt *intr =
@@ -1544,7 +1592,7 @@ eth_em_interrupt_action(struct rte_eth_dev *dev)
                return -1;
 
        intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE;
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       rte_intr_enable(intr_handle);
 
        /* set get_link_status to check register later */
        hw->mac.get_link_status = 1;
@@ -1565,8 +1613,8 @@ eth_em_interrupt_action(struct rte_eth_dev *dev)
                PMD_INIT_LOG(INFO, " Port %d: Link Down", dev->data->port_id);
        }
        PMD_INIT_LOG(DEBUG, "PCI Address: %04d:%02d:%02d:%d",
-                    dev->pci_dev->addr.domain, dev->pci_dev->addr.bus,
-                    dev->pci_dev->addr.devid, dev->pci_dev->addr.function);
+                    pci_dev->addr.domain, pci_dev->addr.bus,
+                    pci_dev->addr.devid, pci_dev->addr.function);
 
        tctl = E1000_READ_REG(hw, E1000_TCTL);
        rctl = E1000_READ_REG(hw, E1000_RCTL);
@@ -1598,14 +1646,14 @@ eth_em_interrupt_action(struct rte_eth_dev *dev)
  *  void
  */
 static void
-eth_em_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
-                                                       void *param)
+eth_em_interrupt_handler(struct rte_intr_handle *handle,
+                        void *param)
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
 
        eth_em_interrupt_get_status(dev);
-       eth_em_interrupt_action(dev);
-       _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
+       eth_em_interrupt_action(dev, handle);
+       _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
 }
 
 static int
@@ -1799,10 +1847,6 @@ eth_em_set_mc_addr_list(struct rte_eth_dev *dev,
        return 0;
 }
 
-struct rte_driver em_pmd_drv = {
-       .type = PMD_PDEV,
-       .init = rte_em_pmd_init,
-};
-
-PMD_REGISTER_DRIVER(em_pmd_drv, em);
-DRIVER_REGISTER_PCI_TABLE(em, pci_id_em_map);
+RTE_PMD_REGISTER_PCI(net_e1000_em, rte_em_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_e1000_em, pci_id_em_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_e1000_em, "* igb_uio | uio_pci_generic | vfio");
index 6d8750a..d099d6a 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -56,7 +56,6 @@
 #include <rte_lcore.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
-#include <rte_ring.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
@@ -67,6 +66,7 @@
 #include <rte_udp.h>
 #include <rte_tcp.h>
 #include <rte_sctp.h>
+#include <rte_net.h>
 #include <rte_string_fns.h>
 
 #include "e1000_logs.h"
 
 #define E1000_RXDCTL_GRAN      0x01000000 /* RXDCTL Granularity */
 
+#define E1000_TX_OFFLOAD_MASK ( \
+               PKT_TX_IP_CKSUM |       \
+               PKT_TX_L4_MASK |        \
+               PKT_TX_VLAN_PKT)
+
+#define E1000_TX_OFFLOAD_NOTSUP_MASK \
+               (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -611,12 +619,49 @@ end_of_tx:
        PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
                (unsigned) txq->port_id, (unsigned) txq->queue_id,
                (unsigned) tx_id, (unsigned) nb_tx);
-       E1000_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
+       E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
        txq->tx_tail = tx_id;
 
        return nb_tx;
 }
 
+/*********************************************************************
+ *
+ *  TX prep functions
+ *
+ **********************************************************************/
+uint16_t
+eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts)
+{
+       int i, ret;
+       struct rte_mbuf *m;
+
+       for (i = 0; i < nb_pkts; i++) {
+               m = tx_pkts[i];
+
+               if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) {
+                       rte_errno = -ENOTSUP;
+                       return i;
+               }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+               ret = rte_validate_tx_offload(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+#endif
+               ret = rte_net_intel_cksum_prepare(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+       }
+
+       return i;
+}
+
 /*********************************************************************
  *
  *  RX functions
index fbf4d09..d939774 100644 (file)
@@ -120,6 +120,8 @@ static int eth_igb_xstats_get_names(struct rte_eth_dev *dev,
                                    unsigned limit);
 static void eth_igb_stats_reset(struct rte_eth_dev *dev);
 static void eth_igb_xstats_reset(struct rte_eth_dev *dev);
+static int eth_igb_fw_version_get(struct rte_eth_dev *dev,
+                                  char *fw_version, size_t fw_size);
 static void eth_igb_infos_get(struct rte_eth_dev *dev,
                              struct rte_eth_dev_info *dev_info);
 static const uint32_t *eth_igb_supported_ptypes_get(struct rte_eth_dev *dev);
@@ -132,7 +134,8 @@ static int  eth_igb_flow_ctrl_set(struct rte_eth_dev *dev,
 static int eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev);
 static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev);
 static int eth_igb_interrupt_get_status(struct rte_eth_dev *dev);
-static int eth_igb_interrupt_action(struct rte_eth_dev *dev);
+static int eth_igb_interrupt_action(struct rte_eth_dev *dev,
+                                   struct rte_intr_handle *handle);
 static void eth_igb_interrupt_handler(struct rte_intr_handle *handle,
                                                        void *param);
 static int  igb_hardware_init(struct e1000_hw *hw);
@@ -306,22 +309,57 @@ static enum e1000_fc_mode igb_fc_setting = e1000_fc_full;
  * The set of PCI devices this driver supports
  */
 static const struct rte_pci_id pci_id_igb_map[] = {
-
-#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
-#include "rte_pci_dev_ids.h"
-
-{0},
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD) },
+
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER) },
+
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER) },
+
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_DA4) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP) },
+       { .vendor_id = 0, /* sentinel */ },
 };
 
 /*
  * The set of PCI devices this driver supports (for 82576&I350 VF)
  */
 static const struct rte_pci_id pci_id_igbvf_map[] = {
-
-#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
-#include "rte_pci_dev_ids.h"
-
-{0},
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF_HV) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF) },
+       { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF_HV) },
+       { .vendor_id = 0, /* sentinel */ },
 };
 
 static const struct rte_eth_desc_lim rx_desc_lim = {
@@ -334,6 +372,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = {
        .nb_max = E1000_MAX_RING_DESC,
        .nb_min = E1000_MIN_RING_DESC,
        .nb_align = IGB_RXD_ALIGN,
+       .nb_seg_max = IGB_TX_MAX_SEG,
+       .nb_mtu_seg_max = IGB_TX_MAX_MTU_SEG,
 };
 
 static const struct eth_dev_ops eth_igb_ops = {
@@ -353,6 +393,7 @@ static const struct eth_dev_ops eth_igb_ops = {
        .xstats_get_names     = eth_igb_xstats_get_names,
        .stats_reset          = eth_igb_stats_reset,
        .xstats_reset         = eth_igb_xstats_reset,
+       .fw_version_get       = eth_igb_fw_version_get,
        .dev_infos_get        = eth_igb_infos_get,
        .dev_supported_ptypes_get = eth_igb_supported_ptypes_get,
        .mtu_set              = eth_igb_mtu_set,
@@ -633,15 +674,16 @@ igb_pf_reset_hw(struct e1000_hw *hw)
 }
 
 static void
-igb_identify_hardware(struct rte_eth_dev *dev)
+igb_identify_hardware(struct rte_eth_dev *dev, struct rte_pci_device *pci_dev)
 {
        struct e1000_hw *hw =
                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
-       hw->vendor_id = dev->pci_dev->id.vendor_id;
-       hw->device_id = dev->pci_dev->id.device_id;
-       hw->subsystem_vendor_id = dev->pci_dev->id.subsystem_vendor_id;
-       hw->subsystem_device_id = dev->pci_dev->id.subsystem_device_id;
+
+       hw->vendor_id = pci_dev->id.vendor_id;
+       hw->device_id = pci_dev->id.device_id;
+       hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
+       hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
 
        e1000_set_mac_type(hw);
 
@@ -708,7 +750,7 @@ static int
 eth_igb_dev_init(struct rte_eth_dev *eth_dev)
 {
        int error = 0;
-       struct rte_pci_device *pci_dev;
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev);
        struct e1000_hw *hw =
                E1000_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
        struct e1000_vfta * shadow_vfta =
@@ -720,11 +762,10 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev)
 
        uint32_t ctrl_ext;
 
-       pci_dev = eth_dev->pci_dev;
-
        eth_dev->dev_ops = &eth_igb_ops;
        eth_dev->rx_pkt_burst = &eth_igb_recv_pkts;
        eth_dev->tx_pkt_burst = &eth_igb_xmit_pkts;
+       eth_dev->tx_pkt_prepare = &eth_igb_prep_pkts;
 
        /* for secondary processes, we don't initialise any further as primary
         * has already done this work. Only check we don't need a different
@@ -736,10 +777,11 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev)
        }
 
        rte_eth_copy_pci_info(eth_dev, pci_dev);
+       eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 
        hw->hw_addr= (void *)pci_dev->mem_resource[0].addr;
 
-       igb_identify_hardware(eth_dev);
+       igb_identify_hardware(eth_dev, pci_dev);
        if (e1000_setup_init_funcs(hw, FALSE) != E1000_SUCCESS) {
                error = -EIO;
                goto err_late;
@@ -873,6 +915,7 @@ static int
 eth_igb_dev_uninit(struct rte_eth_dev *eth_dev)
 {
        struct rte_pci_device *pci_dev;
+       struct rte_intr_handle *intr_handle;
        struct e1000_hw *hw;
        struct e1000_adapter *adapter =
                E1000_DEV_PRIVATE(eth_dev->data->dev_private);
@@ -883,7 +926,8 @@ eth_igb_dev_uninit(struct rte_eth_dev *eth_dev)
                return -EPERM;
 
        hw = E1000_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
-       pci_dev = eth_dev->pci_dev;
+       pci_dev = E1000_DEV_TO_PCI(eth_dev);
+       intr_handle = &pci_dev->intr_handle;
 
        if (adapter->stopped == 0)
                eth_igb_close(eth_dev);
@@ -902,9 +946,9 @@ eth_igb_dev_uninit(struct rte_eth_dev *eth_dev)
        igb_pf_host_uninit(eth_dev);
 
        /* disable uio intr before callback unregister */
-       rte_intr_disable(&(pci_dev->intr_handle));
-       rte_intr_callback_unregister(&(pci_dev->intr_handle),
-               eth_igb_interrupt_handler, (void *)eth_dev);
+       rte_intr_disable(intr_handle);
+       rte_intr_callback_unregister(intr_handle,
+                                    eth_igb_interrupt_handler, eth_dev);
 
        return 0;
 }
@@ -916,6 +960,7 @@ static int
 eth_igbvf_dev_init(struct rte_eth_dev *eth_dev)
 {
        struct rte_pci_device *pci_dev;
+       struct rte_intr_handle *intr_handle;
        struct e1000_adapter *adapter =
                E1000_DEV_PRIVATE(eth_dev->data->dev_private);
        struct e1000_hw *hw =
@@ -928,6 +973,7 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev)
        eth_dev->dev_ops = &igbvf_eth_dev_ops;
        eth_dev->rx_pkt_burst = &eth_igb_recv_pkts;
        eth_dev->tx_pkt_burst = &eth_igb_xmit_pkts;
+       eth_dev->tx_pkt_prepare = &eth_igb_prep_pkts;
 
        /* for secondary processes, we don't initialise any further as primary
         * has already done this work. Only check we don't need a different
@@ -938,9 +984,9 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev)
                return 0;
        }
 
-       pci_dev = eth_dev->pci_dev;
-
+       pci_dev = E1000_DEV_TO_PCI(eth_dev);
        rte_eth_copy_pci_info(eth_dev, pci_dev);
+       eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 
        hw->device_id = pci_dev->id.device_id;
        hw->vendor_id = pci_dev->id.vendor_id;
@@ -1003,9 +1049,9 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev)
                     eth_dev->data->port_id, pci_dev->id.vendor_id,
                     pci_dev->id.device_id, "igb_mac_82576_vf");
 
-       rte_intr_callback_register(&pci_dev->intr_handle,
-                                  eth_igbvf_interrupt_handler,
-                                  (void *)eth_dev);
+       intr_handle = &pci_dev->intr_handle;
+       rte_intr_callback_register(intr_handle,
+                                  eth_igbvf_interrupt_handler, eth_dev);
 
        return 0;
 }
@@ -1015,7 +1061,7 @@ eth_igbvf_dev_uninit(struct rte_eth_dev *eth_dev)
 {
        struct e1000_adapter *adapter =
                E1000_DEV_PRIVATE(eth_dev->data->dev_private);
-       struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev);
 
        PMD_INIT_FUNC_TRACE();
 
@@ -1043,10 +1089,10 @@ eth_igbvf_dev_uninit(struct rte_eth_dev *eth_dev)
 
 static struct eth_driver rte_igb_pmd = {
        .pci_drv = {
-               .name = "rte_igb_pmd",
                .id_table = pci_id_igb_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
-                       RTE_PCI_DRV_DETACHABLE,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_igb_dev_init,
        .eth_dev_uninit = eth_igb_dev_uninit,
@@ -1058,22 +1104,16 @@ static struct eth_driver rte_igb_pmd = {
  */
 static struct eth_driver rte_igbvf_pmd = {
        .pci_drv = {
-               .name = "rte_igbvf_pmd",
                .id_table = pci_id_igbvf_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_igbvf_dev_init,
        .eth_dev_uninit = eth_igbvf_dev_uninit,
        .dev_private_size = sizeof(struct e1000_adapter),
 };
 
-static int
-rte_igb_pmd_init(const char *name __rte_unused, const char *params __rte_unused)
-{
-       rte_eth_driver_register(&rte_igb_pmd);
-       return 0;
-}
-
 static void
 igb_vmdq_vlan_hw_filter_enable(struct rte_eth_dev *dev)
 {
@@ -1085,20 +1125,6 @@ igb_vmdq_vlan_hw_filter_enable(struct rte_eth_dev *dev)
        E1000_WRITE_REG(hw, E1000_RCTL, rctl);
 }
 
-/*
- * VF Driver initialization routine.
- * Invoked one at EAL init time.
- * Register itself as the [Virtual Poll Mode] Driver of PCI IGB devices.
- */
-static int
-rte_igbvf_pmd_init(const char *name __rte_unused, const char *params __rte_unused)
-{
-       PMD_INIT_FUNC_TRACE();
-
-       rte_eth_driver_register(&rte_igbvf_pmd);
-       return 0;
-}
-
 static int
 igb_check_mq_mode(struct rte_eth_dev *dev)
 {
@@ -1201,7 +1227,8 @@ eth_igb_start(struct rte_eth_dev *dev)
                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct e1000_adapter *adapter =
                E1000_DEV_PRIVATE(dev->data->dev_private);
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        int ret, mask;
        uint32_t intr_vector = 0;
        uint32_t ctrl_ext;
@@ -1265,7 +1292,7 @@ eth_igb_start(struct rte_eth_dev *dev)
                                    dev->data->nb_rx_queues * sizeof(int), 0);
                if (intr_handle->intr_vec == NULL) {
                        PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
-                                    " intr_vec\n", dev->data->nb_rx_queues);
+                                    " intr_vec", dev->data->nb_rx_queues);
                        return -ENOMEM;
                }
        }
@@ -1311,6 +1338,7 @@ eth_igb_start(struct rte_eth_dev *dev)
        speeds = &dev->data->dev_conf.link_speeds;
        if (*speeds == ETH_LINK_SPEED_AUTONEG) {
                hw->phy.autoneg_advertised = E1000_ALL_SPEED_DUPLEX;
+               hw->mac.autoneg = 1;
        } else {
                num_speeds = 0;
                autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0;
@@ -1346,6 +1374,17 @@ eth_igb_start(struct rte_eth_dev *dev)
                }
                if (num_speeds == 0 || (!autoneg && (num_speeds > 1)))
                        goto error_invalid_config;
+
+               /* Set/reset the mac.autoneg based on the link speed,
+                * fixed or not
+                */
+               if (!autoneg) {
+                       hw->mac.autoneg = 0;
+                       hw->mac.forced_speed_duplex =
+                                       hw->phy.autoneg_advertised;
+               } else {
+                       hw->mac.autoneg = 1;
+               }
        }
 
        e1000_setup_link(hw);
@@ -1360,7 +1399,7 @@ eth_igb_start(struct rte_eth_dev *dev)
                                             (void *)dev);
                if (dev->data->dev_conf.intr_conf.lsc != 0)
                        PMD_INIT_LOG(INFO, "lsc won't enable because of"
-                                    " no intr multiplex\n");
+                                    " no intr multiplex");
        }
 
        /* check if rxq interrupt is enabled */
@@ -1397,11 +1436,12 @@ eth_igb_stop(struct rte_eth_dev *dev)
        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct e1000_filter_info *filter_info =
                E1000_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev);
        struct rte_eth_link link;
        struct e1000_flex_filter *p_flex;
        struct e1000_5tuple_filter *p_5tuple, *p_5tuple_next;
        struct e1000_2tuple_filter *p_2tuple, *p_2tuple_next;
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        igb_intr_disable(hw);
 
@@ -1501,7 +1541,8 @@ eth_igb_close(struct rte_eth_dev *dev)
        struct e1000_adapter *adapter =
                E1000_DEV_PRIVATE(dev->data->dev_private);
        struct rte_eth_link link;
-       struct rte_pci_device *pci_dev;
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        eth_igb_stop(dev);
        adapter->stopped = 1;
@@ -1521,10 +1562,9 @@ eth_igb_close(struct rte_eth_dev *dev)
 
        igb_dev_free_queues(dev);
 
-       pci_dev = dev->pci_dev;
-       if (pci_dev->intr_handle.intr_vec) {
-               rte_free(pci_dev->intr_handle.intr_vec);
-               pci_dev->intr_handle.intr_vec = NULL;
+       if (intr_handle->intr_vec) {
+               rte_free(intr_handle->intr_vec);
+               intr_handle->intr_vec = NULL;
        }
 
        memset(&link, 0, sizeof(link));
@@ -1948,11 +1988,64 @@ eth_igbvf_stats_reset(struct rte_eth_dev *dev)
               offsetof(struct e1000_vf_stats, gprc));
 }
 
+static int
+eth_igb_fw_version_get(struct rte_eth_dev *dev, char *fw_version,
+                      size_t fw_size)
+{
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct e1000_fw_version fw;
+       int ret;
+
+       e1000_get_fw_version(hw, &fw);
+
+       switch (hw->mac.type) {
+       case e1000_i210:
+       case e1000_i211:
+               if (!(e1000_get_flash_presence_i210(hw))) {
+                       ret = snprintf(fw_version, fw_size,
+                                "%2d.%2d-%d",
+                                fw.invm_major, fw.invm_minor,
+                                fw.invm_img_type);
+                       break;
+               }
+               /* fall through */
+       default:
+               /* if option rom is valid, display its version too */
+               if (fw.or_valid) {
+                       ret = snprintf(fw_version, fw_size,
+                                "%d.%d, 0x%08x, %d.%d.%d",
+                                fw.eep_major, fw.eep_minor, fw.etrack_id,
+                                fw.or_major, fw.or_build, fw.or_patch);
+               /* no option rom */
+               } else {
+                       if (fw.etrack_id != 0X0000) {
+                               ret = snprintf(fw_version, fw_size,
+                                        "%d.%d, 0x%08x",
+                                        fw.eep_major, fw.eep_minor,
+                                        fw.etrack_id);
+                       } else {
+                               ret = snprintf(fw_version, fw_size,
+                                        "%d.%d.%d",
+                                        fw.eep_major, fw.eep_minor,
+                                        fw.eep_build);
+                       }
+               }
+               break;
+       }
+
+       ret += 1; /* add the size of '\0' */
+       if (fw_size < (u32)ret)
+               return ret;
+       else
+               return 0;
+}
+
 static void
 eth_igb_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
+       dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device);
        dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */
        dev_info->max_rx_pktlen  = 0x3FFF; /* See RLPML register. */
        dev_info->max_mac_addrs = hw->mac.rar_entry_count;
@@ -2081,6 +2174,7 @@ eth_igbvf_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
+       dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device);
        dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */
        dev_info->max_rx_pktlen  = 0x3FFF; /* See RLPML register. */
        dev_info->max_mac_addrs = hw->mac.rar_entry_count;
@@ -2605,12 +2699,14 @@ eth_igb_interrupt_get_status(struct rte_eth_dev *dev)
  *  - On failure, a negative value.
  */
 static int
-eth_igb_interrupt_action(struct rte_eth_dev *dev)
+eth_igb_interrupt_action(struct rte_eth_dev *dev,
+                        struct rte_intr_handle *intr_handle)
 {
        struct e1000_hw *hw =
                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct e1000_interrupt *intr =
                E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev);
        uint32_t tctl, rctl;
        struct rte_eth_link link;
        int ret;
@@ -2621,7 +2717,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev)
        }
 
        igb_intr_enable(dev);
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       rte_intr_enable(intr_handle);
 
        if (intr->flags & E1000_FLAG_NEED_LINK_UPDATE) {
                intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE;
@@ -2649,10 +2745,10 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev)
                }
 
                PMD_INIT_LOG(DEBUG, "PCI Address: %04d:%02d:%02d:%d",
-                            dev->pci_dev->addr.domain,
-                            dev->pci_dev->addr.bus,
-                            dev->pci_dev->addr.devid,
-                            dev->pci_dev->addr.function);
+                            pci_dev->addr.domain,
+                            pci_dev->addr.bus,
+                            pci_dev->addr.devid,
+                            pci_dev->addr.function);
                tctl = E1000_READ_REG(hw, E1000_TCTL);
                rctl = E1000_READ_REG(hw, E1000_RCTL);
                if (link.link_status) {
@@ -2667,7 +2763,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev)
                E1000_WRITE_REG(hw, E1000_TCTL, tctl);
                E1000_WRITE_REG(hw, E1000_RCTL, rctl);
                E1000_WRITE_FLUSH(hw);
-               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
        }
 
        return 0;
@@ -2685,13 +2781,12 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev)
  *  void
  */
 static void
-eth_igb_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
-                                                       void *param)
+eth_igb_interrupt_handler(struct rte_intr_handle *handle, void *param)
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
 
        eth_igb_interrupt_get_status(dev);
-       eth_igb_interrupt_action(dev);
+       eth_igb_interrupt_action(dev, handle);
 }
 
 static int
@@ -2727,11 +2822,11 @@ void igbvf_mbx_process(struct rte_eth_dev *dev)
 
        /* PF reset VF event */
        if (in_msg == E1000_PF_CONTROL_MSG)
-               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET);
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, NULL);
 }
 
 static int
-eth_igbvf_interrupt_action(struct rte_eth_dev *dev)
+eth_igbvf_interrupt_action(struct rte_eth_dev *dev, struct rte_intr_handle *intr_handle)
 {
        struct e1000_interrupt *intr =
                E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
@@ -2742,19 +2837,19 @@ eth_igbvf_interrupt_action(struct rte_eth_dev *dev)
        }
 
        igbvf_intr_enable(dev);
-       rte_intr_enable(&dev->pci_dev->intr_handle);
+       rte_intr_enable(intr_handle);
 
        return 0;
 }
 
 static void
-eth_igbvf_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
+eth_igbvf_interrupt_handler(struct rte_intr_handle *handle,
                            void *param)
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
 
        eth_igbvf_interrupt_get_status(dev);
-       eth_igbvf_interrupt_action(dev);
+       eth_igbvf_interrupt_action(dev, handle);
 }
 
 static int
@@ -3027,8 +3122,9 @@ igbvf_dev_start(struct rte_eth_dev *dev)
                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct e1000_adapter *adapter =
                E1000_DEV_PRIVATE(dev->data->dev_private);
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        int ret;
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
        uint32_t intr_vector = 0;
 
        PMD_INIT_FUNC_TRACE();
@@ -3063,7 +3159,7 @@ igbvf_dev_start(struct rte_eth_dev *dev)
                                    dev->data->nb_rx_queues * sizeof(int), 0);
                if (!intr_handle->intr_vec) {
                        PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
-                                    " intr_vec\n", dev->data->nb_rx_queues);
+                                    " intr_vec", dev->data->nb_rx_queues);
                        return -ENOMEM;
                }
        }
@@ -3082,7 +3178,8 @@ igbvf_dev_start(struct rte_eth_dev *dev)
 static void
 igbvf_dev_stop(struct rte_eth_dev *dev)
 {
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -3281,7 +3378,7 @@ eth_igb_rss_reta_update(struct rte_eth_dev *dev,
        if (reta_size != ETH_RSS_RETA_SIZE_128) {
                PMD_DRV_LOG(ERR, "The size of hash lookup table configured "
                        "(%d) doesn't match the number hardware can supported "
-                       "(%d)\n", reta_size, ETH_RSS_RETA_SIZE_128);
+                       "(%d)", reta_size, ETH_RSS_RETA_SIZE_128);
                return -EINVAL;
        }
 
@@ -3322,7 +3419,7 @@ eth_igb_rss_reta_query(struct rte_eth_dev *dev,
        if (reta_size != ETH_RSS_RETA_SIZE_128) {
                PMD_DRV_LOG(ERR, "The size of hash lookup table configured "
                        "(%d) doesn't match the number hardware can supported "
-                       "(%d)\n", reta_size, ETH_RSS_RETA_SIZE_128);
+                       "(%d)", reta_size, ETH_RSS_RETA_SIZE_128);
                return -EINVAL;
        }
 
@@ -3443,7 +3540,7 @@ eth_igb_syn_filter_handle(struct rte_eth_dev *dev,
                                (struct rte_eth_syn_filter *)arg);
                break;
        default:
-               PMD_DRV_LOG(ERR, "unsupported operation %u\n", filter_op);
+               PMD_DRV_LOG(ERR, "unsupported operation %u", filter_op);
                ret = -EINVAL;
                break;
        }
@@ -5049,16 +5146,6 @@ eth_igb_set_eeprom(struct rte_eth_dev *dev,
        return nvm->ops.write(hw,  first, length, data);
 }
 
-static struct rte_driver pmd_igb_drv = {
-       .type = PMD_PDEV,
-       .init = rte_igb_pmd_init,
-};
-
-static struct rte_driver pmd_igbvf_drv = {
-       .type = PMD_PDEV,
-       .init = rte_igbvf_pmd_init,
-};
-
 static int
 eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
@@ -5077,6 +5164,8 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
        struct e1000_hw *hw =
                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        uint32_t mask = 1 << queue_id;
        uint32_t regval;
 
@@ -5084,7 +5173,7 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
        E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
        E1000_WRITE_FLUSH(hw);
 
-       rte_intr_enable(&dev->pci_dev->intr_handle);
+       rte_intr_enable(intr_handle);
 
        return 0;
 }
@@ -5148,8 +5237,8 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
        uint32_t vec = E1000_MISC_VEC_ID;
        uint32_t base = E1000_MISC_VEC_ID;
        uint32_t misc_shift = 0;
-
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        /* won't configure msix register if no mapping is done
         * between intr vector and event fd
@@ -5220,7 +5309,9 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
        E1000_WRITE_FLUSH(hw);
 }
 
-PMD_REGISTER_DRIVER(pmd_igb_drv, igb);
-DRIVER_REGISTER_PCI_TABLE(igb, pci_id_igb_map);
-PMD_REGISTER_DRIVER(pmd_igbvf_drv, igbvf);
-DRIVER_REGISTER_PCI_TABLE(igbvf, pci_id_igbvf_map);
+RTE_PMD_REGISTER_PCI(net_e1000_igb, rte_igb_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_e1000_igb, pci_id_igb_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_e1000_igb, "* igb_uio | uio_pci_generic | vfio");
+RTE_PMD_REGISTER_PCI(net_e1000_igb_vf, rte_igbvf_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_e1000_igb_vf, pci_id_igbvf_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_e1000_igb_vf, "* igb_uio | vfio");
index 5845bc2..67da3c2 100644 (file)
@@ -57,7 +57,9 @@
 static inline uint16_t
 dev_num_vf(struct rte_eth_dev *eth_dev)
 {
-       return eth_dev->pci_dev->max_vfs;
+       struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev);
+
+       return pci_dev->max_vfs;
 }
 
 static inline
index 9d80a0b..45f3f24 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -56,7 +56,6 @@
 #include <rte_lcore.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
-#include <rte_ring.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
@@ -66,6 +65,7 @@
 #include <rte_udp.h>
 #include <rte_tcp.h>
 #include <rte_sctp.h>
+#include <rte_net.h>
 #include <rte_string_fns.h>
 
 #include "e1000_logs.h"
@@ -79,6 +79,9 @@
                PKT_TX_L4_MASK |                 \
                PKT_TX_TCP_SEG)
 
+#define IGB_TX_OFFLOAD_NOTSUP_MASK \
+               (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -606,7 +609,7 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
        /*
         * Set the Transmit Descriptor Tail (TDT).
         */
-       E1000_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
+       E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
        PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
                   (unsigned) txq->port_id, (unsigned) txq->queue_id,
                   (unsigned) tx_id, (unsigned) nb_tx);
@@ -615,6 +618,52 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
        return nb_tx;
 }
 
+/*********************************************************************
+ *
+ *  TX prep functions
+ *
+ **********************************************************************/
+uint16_t
+eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts)
+{
+       int i, ret;
+       struct rte_mbuf *m;
+
+       for (i = 0; i < nb_pkts; i++) {
+               m = tx_pkts[i];
+
+               /* Check some limitations for TSO in hardware */
+               if (m->ol_flags & PKT_TX_TCP_SEG)
+                       if ((m->tso_segsz > IGB_TSO_MAX_MSS) ||
+                                       (m->l2_len + m->l3_len + m->l4_len >
+                                       IGB_TSO_MAX_HDRLEN)) {
+                               rte_errno = -EINVAL;
+                               return i;
+                       }
+
+               if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) {
+                       rte_errno = -ENOTSUP;
+                       return i;
+               }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+               ret = rte_validate_tx_offload(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+#endif
+               ret = rte_net_intel_cksum_prepare(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+       }
+
+       return i;
+}
+
 /*********************************************************************
  *
  *  RX functions
@@ -748,7 +797,9 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status)
         */
 
        static uint64_t error_to_pkt_flags_map[4] = {
-               0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
+               PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
+               PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
+               PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
                PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
        };
        return error_to_pkt_flags_map[(rx_status >>
@@ -1363,6 +1414,7 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
 
        igb_reset_tx_queue(txq, dev);
        dev->tx_pkt_burst = eth_igb_xmit_pkts;
+       dev->tx_pkt_prepare = &eth_igb_prep_pkts;
        dev->data->tx_queues[queue_idx] = txq;
 
        return 0;
@@ -1528,7 +1580,7 @@ eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
                                desc - rxq->nb_rx_desc]);
        }
 
-       return 0;
+       return desc;
 }
 
 int
index dddb1dc..84e4840 100644 (file)
@@ -266,32 +266,35 @@ void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring)
        memset(ring->descs, 0, ring->size);
 }
 
-int vnic_dev_alloc_desc_ring(__attribute__((unused)) struct vnic_dev *vdev,
+int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev,
        struct vnic_dev_ring *ring,
-       unsigned int desc_count, unsigned int desc_size, unsigned int socket_id,
+       unsigned int desc_count, unsigned int desc_size,
+       __attribute__((unused)) unsigned int socket_id,
        char *z_name)
 {
-       const struct rte_memzone *rz;
+       void *alloc_addr = NULL;
+       dma_addr_t alloc_pa = 0;
 
        vnic_dev_desc_ring_size(ring, desc_count, desc_size);
-
-       rz = rte_memzone_reserve_aligned(z_name,
-               ring->size_unaligned, socket_id,
-               0, ENIC_ALIGN);
-       if (!rz) {
+       alloc_addr = vdev->alloc_consistent(vdev->priv,
+                                           ring->size_unaligned,
+                                           &alloc_pa, (u8 *)z_name);
+       if (!alloc_addr) {
                pr_err("Failed to allocate ring (size=%d), aborting\n",
                        (int)ring->size);
                return -ENOMEM;
        }
-
-       ring->descs_unaligned = rz->addr;
-       if (!ring->descs_unaligned) {
+       ring->descs_unaligned = alloc_addr;
+       if (!alloc_pa) {
                pr_err("Failed to map allocated ring (size=%d), aborting\n",
                        (int)ring->size);
+               vdev->free_consistent(vdev->priv,
+                                     ring->size_unaligned,
+                                     alloc_addr,
+                                     alloc_pa);
                return -ENOMEM;
        }
-
-       ring->base_addr_unaligned = (dma_addr_t)rz->phys_addr;
+       ring->base_addr_unaligned = alloc_pa;
 
        ring->base_addr = VNIC_ALIGN(ring->base_addr_unaligned,
                ring->base_align);
@@ -308,8 +311,13 @@ int vnic_dev_alloc_desc_ring(__attribute__((unused)) struct vnic_dev *vdev,
 void vnic_dev_free_desc_ring(__attribute__((unused))  struct vnic_dev *vdev,
        struct vnic_dev_ring *ring)
 {
-       if (ring->descs)
+       if (ring->descs) {
+               vdev->free_consistent(vdev->priv,
+                                     ring->size_unaligned,
+                                     ring->descs_unaligned,
+                                     ring->base_addr_unaligned);
                ring->descs = NULL;
+       }
 }
 
 static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
@@ -668,7 +676,6 @@ int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast,
             (allmulti ? CMD_PFILTER_ALL_MULTICAST : 0);
 
        err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER, &a0, &a1, wait);
-
        if (err)
                pr_err("Can't set packet filter\n");
 
index 0e700a1..10a40c1 100644 (file)
@@ -87,9 +87,11 @@ void vnic_rq_init_start(struct vnic_rq *rq, unsigned int cq_index,
        iowrite32(0, &rq->ctrl->error_status);
        iowrite32(fetch_index, &rq->ctrl->fetch_index);
        iowrite32(posted_index, &rq->ctrl->posted_index);
-       if (rq->is_sop)
-               iowrite32(((rq->is_sop << 10) | rq->data_queue_idx),
+       if (rq->data_queue_enable)
+               iowrite32(((1 << 10) | rq->data_queue_idx),
                          &rq->ctrl->data_ring);
+       else
+               iowrite32(0, &rq->ctrl->data_ring);
 }
 
 void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index,
index fd9e170..f3fd39f 100644 (file)
@@ -91,11 +91,13 @@ struct vnic_rq {
        uint16_t rxst_idx;
        uint32_t tot_pkts;
        uint16_t data_queue_idx;
+       uint8_t data_queue_enable;
        uint8_t is_sop;
        uint8_t in_use;
        struct rte_mbuf *pkt_first_seg;
        struct rte_mbuf *pkt_last_seg;
        unsigned int max_mbufs_per_pkt;
+       uint16_t tot_nb_desc;
 };
 
 static inline unsigned int vnic_rq_desc_avail(struct vnic_rq *rq)
index 9117cc7..a454017 100644 (file)
@@ -60,6 +60,7 @@
 #define ENIC_RQ_MAX            16
 #define ENIC_CQ_MAX            (ENIC_WQ_MAX + (ENIC_RQ_MAX / 2))
 #define ENIC_INTR_MAX          (ENIC_CQ_MAX + 2)
+#define ENIC_MAX_MAC_ADDR      64
 
 #define VLAN_ETH_HLEN           18
 
@@ -97,13 +98,11 @@ struct enic_fdir {
        void (*copy_fltr_fn)(struct filter_v2 *filt,
                             struct rte_eth_fdir_input *input,
                             struct rte_eth_fdir_masks *masks);
-
 };
 
 struct enic_soft_stats {
        rte_atomic64_t rx_nombuf;
        rte_atomic64_t rx_packet_errors;
-    rte_atomic64_t tx_oversized;
 };
 
 struct enic_memzone_entry {
@@ -168,17 +167,32 @@ struct enic {
        /* linked list storing memory allocations */
        LIST_HEAD(enic_memzone_list, enic_memzone_entry) memzone_list;
        rte_spinlock_t memzone_list_lock;
+       rte_spinlock_t mtu_lock;
 
 };
 
-static inline unsigned int enic_sop_rq(unsigned int rq)
+/* Get the CQ index from a Start of Packet(SOP) RQ index */
+static inline unsigned int enic_sop_rq_idx_to_cq_idx(unsigned int sop_idx)
 {
-       return rq * 2;
+       return sop_idx / 2;
 }
 
-static inline unsigned int enic_data_rq(unsigned int rq)
+/* Get the RTE RQ index from a Start of Packet(SOP) RQ index */
+static inline unsigned int enic_sop_rq_idx_to_rte_idx(unsigned int sop_idx)
 {
-       return rq * 2 + 1;
+       return sop_idx / 2;
+}
+
+/* Get the Start of Packet(SOP) RQ index from a RTE RQ index */
+static inline unsigned int enic_rte_rq_idx_to_sop_idx(unsigned int rte_idx)
+{
+       return rte_idx * 2;
+}
+
+/* Get the Data RQ index from a RTE RQ index */
+static inline unsigned int enic_rte_rq_idx_to_data_idx(unsigned int rte_idx)
+{
+       return rte_idx * 2 + 1;
 }
 
 static inline unsigned int enic_vnic_rq_count(struct enic *enic)
@@ -252,7 +266,7 @@ extern int enic_stop_rq(struct enic *enic, uint16_t queue_idx);
 extern void enic_free_rq(void *rxq);
 extern int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
        unsigned int socket_id, struct rte_mempool *mp,
-       uint16_t nb_desc);
+       uint16_t nb_desc, uint16_t free_thresh);
 extern int enic_set_rss_nic_cfg(struct enic *enic);
 extern int enic_set_vnic_res(struct enic *enic);
 extern void enic_set_hdr_split_size(struct enic *enic, u16 split_hdr_size);
@@ -264,8 +278,8 @@ extern void enic_dev_stats_get(struct enic *enic,
        struct rte_eth_stats *r_stats);
 extern void enic_dev_stats_clear(struct enic *enic);
 extern void enic_add_packet_filter(struct enic *enic);
-extern void enic_set_mac_address(struct enic *enic, uint8_t *mac_addr);
-extern void enic_del_mac_address(struct enic *enic);
+void enic_set_mac_address(struct enic *enic, uint8_t *mac_addr);
+void enic_del_mac_address(struct enic *enic, int mac_index);
 extern unsigned int enic_cleanup_wq(struct enic *enic, struct vnic_wq *wq);
 extern void enic_send_pkt(struct enic *enic, struct vnic_wq *wq,
                          struct rte_mbuf *tx_pkt, unsigned short len,
@@ -278,14 +292,17 @@ extern int enic_clsf_init(struct enic *enic);
 extern void enic_clsf_destroy(struct enic *enic);
 uint16_t enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                        uint16_t nb_pkts);
+uint16_t enic_dummy_recv_pkts(__rte_unused void *rx_queue,
+                             __rte_unused struct rte_mbuf **rx_pkts,
+                             __rte_unused uint16_t nb_pkts);
 uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                               uint16_t nb_pkts);
 int enic_set_mtu(struct enic *enic, uint16_t new_mtu);
+int enic_link_update(struct enic *enic);
 void enic_fdir_info(struct enic *enic);
 void enic_fdir_info_get(struct enic *enic, struct rte_eth_fdir_info *stats);
 void copy_fltr_v1(struct filter_v2 *fltr, struct rte_eth_fdir_input *input,
-                 struct rte_eth_fdir_masks *masks);
-void copy_fltr_v2(__rte_unused struct filter_v2 *fltr,
-                 __rte_unused struct rte_eth_fdir_input *input,
                  __rte_unused struct rte_eth_fdir_masks *masks);
+void copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input,
+                 struct rte_eth_fdir_masks *masks);
 #endif /* _ENIC_H_ */
index 1610951..bcf479a 100644 (file)
@@ -74,7 +74,7 @@ void enic_fdir_stats_get(struct enic *enic, struct rte_eth_fdir_stats *stats)
 
 void enic_fdir_info_get(struct enic *enic, struct rte_eth_fdir_info *info)
 {
-       info->mode = enic->fdir.modes;
+       info->mode = (enum rte_fdir_mode)enic->fdir.modes;
        info->flow_types_mask[0] = enic->fdir.types_mask;
 }
 
@@ -107,7 +107,6 @@ enic_set_layer(struct filter_generic_1 *gp, unsigned int flag,
        memcpy(gp->layer[layer].val, val, len);
 }
 
-
 /* Copy Flow Director filter to a VIC ipv4 filter (for Cisco VICs
  * without advanced filter support.
  */
@@ -133,28 +132,6 @@ copy_fltr_v1(struct filter_v2 *fltr, struct rte_eth_fdir_input *input,
        fltr->u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE;
 }
 
-#define TREX_PATCH
-#ifdef TREX_PATCH
-void
-copy_fltr_recv_all(struct filter_v2 *fltr, struct rte_eth_fdir_input *input,
-             struct rte_eth_fdir_masks *masks) {
-       struct filter_generic_1 *gp = &fltr->u.generic_1;
-       memset(gp, 0, sizeof(*gp));
-
-    struct ether_hdr eth_mask, eth_val;
-    memset(&eth_mask, 0, sizeof(eth_mask));
-    memset(&eth_val, 0, sizeof(eth_val));
-
-    eth_val.ether_type = 0xdead;
-    eth_mask.ether_type = 0;
-
-    gp->position = 0;
-    enic_set_layer(gp, 0, FILTER_GENERIC_1_L2,
-                              &eth_mask, &eth_val, sizeof(struct ether_hdr));
-
-}
-#endif
-
 /* Copy Flow Director filter to a VIC generic filter (requires advanced
  * filter support.
  */
@@ -165,15 +142,8 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input,
        struct filter_generic_1 *gp = &fltr->u.generic_1;
        int i;
 
-       RTE_ASSERT(enic->adv_filters);
-
        fltr->type = FILTER_DPDK_1;
        memset(gp, 0, sizeof(*gp));
-#ifdef TREX_PATCH
-    // important for this to be below 2.
-    // If added with position 2, IPv6 UDP and ICMP seems to be caught by some other rule
-    gp->position = 1;
-#endif
 
        if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_UDP) {
                struct udp_hdr udp_mask, udp_val;
@@ -185,7 +155,7 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input,
                        udp_val.src_port = input->flow.udp4_flow.src_port;
                }
                if (input->flow.udp4_flow.dst_port) {
-                       udp_mask.src_port = masks->dst_port_mask;
+                       udp_mask.dst_port = masks->dst_port_mask;
                        udp_val.dst_port = input->flow.udp4_flow.dst_port;
                }
 
@@ -241,13 +211,9 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input,
                memset(&ip4_val, 0, sizeof(struct ipv4_hdr));
 
                if (input->flow.ip4_flow.tos) {
-                       ip4_mask.type_of_service = masks->ipv4_mask.tos;
+                       ip4_mask.type_of_service = 0xff;
                        ip4_val.type_of_service = input->flow.ip4_flow.tos;
                }
-               if (input->flow.ip4_flow.ip_id) {
-                       ip4_mask.packet_id = 0xffff;
-                       ip4_val.packet_id = input->flow.ip4_flow.ip_id;
-               }
                if (input->flow.ip4_flow.ttl) {
                        ip4_mask.time_to_live = 0xff;
                        ip4_val.time_to_live = input->flow.ip4_flow.ttl;
@@ -333,7 +299,7 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input,
                memset(&ipv6_val, 0, sizeof(struct ipv6_hdr));
 
                if (input->flow.ipv6_flow.proto) {
-                       ipv6_mask.proto = masks->ipv6_mask.proto;
+                       ipv6_mask.proto = 0xff;
                        ipv6_val.proto = input->flow.ipv6_flow.proto;
                }
                for (i = 0; i < 4; i++) {
@@ -349,8 +315,8 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input,
                                        input->flow.ipv6_flow.dst_ip[i];
                }
                if (input->flow.ipv6_flow.tc) {
-                       ipv6_mask.vtc_flow = ((uint32_t)masks->ipv6_mask.tc<<12);
-                       ipv6_val.vtc_flow = input->flow.ipv6_flow.tc << 12;
+                       ipv6_mask.vtc_flow = 0x00ff0000;
+                       ipv6_val.vtc_flow = input->flow.ipv6_flow.tc << 16;
                }
                if (input->flow.ipv6_flow.hop_limits) {
                        ipv6_mask.hop_limits = 0xff;
@@ -372,11 +338,7 @@ int enic_fdir_del_fltr(struct enic *enic, struct rte_eth_fdir_filter *params)
        case -EINVAL:
        case -ENOENT:
                enic->fdir.stats.f_remove++;
-#ifdef TREX_PATCH
-               return pos;
-#else
                return -EINVAL;
-#endif
        default:
                /* The entry is present in the table */
                key = enic->fdir.nodes[pos];
@@ -420,7 +382,7 @@ int enic_fdir_add_fltr(struct enic *enic, struct rte_eth_fdir_filter *params)
        }
 
        /* Get the enicpmd RQ from the DPDK Rx queue */
-       queue = enic_sop_rq(params->action.rx_queue);
+       queue = enic_rte_rq_idx_to_sop_idx(params->action.rx_queue);
 
        if (!enic->rq[queue].in_use)
                return -EINVAL;
@@ -487,18 +449,8 @@ int enic_fdir_add_fltr(struct enic *enic, struct rte_eth_fdir_filter *params)
        key->filter = *params;
        key->rq_index = queue;
 
-#ifdef TREX_PATCH
-    switch (params->soft_id) {
-    case 100:
-        copy_fltr_recv_all(&fltr, &params->input, &enic->rte_dev->data->dev_conf.fdir_conf.mask);
-        break;
-    default:
-#endif
-        enic->fdir.copy_fltr_fn(&fltr, &params->input,
-                                &enic->rte_dev->data->dev_conf.fdir_conf.mask);
-#ifdef TREX_PATCH
-    }
-#endif
+       enic->fdir.copy_fltr_fn(&fltr, &params->input,
+                               &enic->rte_dev->data->dev_conf.fdir_conf.mask);
 
        if (!vnic_dev_classifier(enic->vdev, CLSF_ADD, &queue, &fltr)) {
                key->fltr_id = queue;
index 5dbd983..fc58bb4 100644 (file)
@@ -41,6 +41,7 @@
 #include <rte_atomic.h>
 #include <rte_malloc.h>
 #include <rte_log.h>
+#include <rte_io.h>
 
 #define ENIC_PAGE_ALIGN 4096UL
 #define ENIC_ALIGN      ENIC_PAGE_ALIGN
@@ -95,42 +96,52 @@ typedef         unsigned long long  dma_addr_t;
 
 static inline uint32_t ioread32(volatile void *addr)
 {
-       return *(volatile uint32_t *)addr;
+       return rte_read32(addr);
 }
 
 static inline uint16_t ioread16(volatile void *addr)
 {
-       return *(volatile uint16_t *)addr;
+       return rte_read16(addr);
 }
 
 static inline uint8_t ioread8(volatile void *addr)
 {
-       return *(volatile uint8_t *)addr;
+       return rte_read8(addr);
 }
 
 static inline void iowrite32(uint32_t val, volatile void *addr)
 {
-       *(volatile uint32_t *)addr = val;
+       rte_write32(val, addr);
+}
+
+static inline void iowrite32_relaxed(uint32_t val, volatile void *addr)
+{
+       rte_write32_relaxed(val, addr);
 }
 
 static inline void iowrite16(uint16_t val, volatile void *addr)
 {
-       *(volatile uint16_t *)addr = val;
+       rte_write16(val, addr);
 }
 
 static inline void iowrite8(uint8_t val, volatile void *addr)
 {
-       *(volatile uint8_t *)addr = val;
+       rte_write8(val, addr);
 }
 
 static inline unsigned int readl(volatile void __iomem *addr)
 {
-       return *(volatile unsigned int *)addr;
+       return rte_read32(addr);
+}
+
+static inline unsigned int readl_relaxed(volatile void __iomem *addr)
+{
+       return rte_read32_relaxed(addr);
 }
 
 static inline void writel(unsigned int val, volatile void __iomem *addr)
 {
-       *(volatile unsigned int *)addr = val;
+       rte_write32(val, addr);
 }
 
 #define min_t(type, x, y) ({                    \
index 6a86e23..bffa870 100644 (file)
@@ -154,7 +154,7 @@ static int enicpmd_dev_setup_intr(struct enic *enic)
                return 0;
        /* check start of packet (SOP) RQs only in case scatter is disabled. */
        for (index = 0; index < enic->rq_count; index++) {
-               if (!enic->rq[enic_sop_rq(index)].ctrl)
+               if (!enic->rq[enic_rte_rq_idx_to_sop_idx(index)].ctrl)
                        break;
        }
        if (enic->rq_count != index)
@@ -262,6 +262,35 @@ static void enicpmd_dev_rx_queue_release(void *rxq)
        enic_free_rq(rxq);
 }
 
+static uint32_t enicpmd_dev_rx_queue_count(struct rte_eth_dev *dev,
+                                          uint16_t rx_queue_id)
+{
+       struct enic *enic = pmd_priv(dev);
+       uint32_t queue_count = 0;
+       struct vnic_cq *cq;
+       uint32_t cq_tail;
+       uint16_t cq_idx;
+       int rq_num;
+
+       if (rx_queue_id >= dev->data->nb_rx_queues) {
+               dev_err(enic, "Invalid RX queue id=%d", rx_queue_id);
+               return 0;
+       }
+
+       rq_num = enic_rte_rq_idx_to_sop_idx(rx_queue_id);
+       cq = &enic->cq[enic_cq_rq(enic, rq_num)];
+       cq_idx = cq->to_clean;
+
+       cq_tail = ioread32(&cq->ctrl->cq_tail);
+
+       if (cq_tail < cq_idx)
+               cq_tail += cq->ring.desc_count;
+
+       queue_count = cq_tail - cq_idx;
+
+       return queue_count;
+}
+
 static int enicpmd_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
        uint16_t queue_idx,
        uint16_t nb_desc,
@@ -284,18 +313,15 @@ static int enicpmd_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
        }
 
        eth_dev->data->rx_queues[queue_idx] =
-               (void *)&enic->rq[enic_sop_rq(queue_idx)];
+               (void *)&enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
 
-       ret = enic_alloc_rq(enic, queue_idx, socket_id, mp, nb_desc);
+       ret = enic_alloc_rq(enic, queue_idx, socket_id, mp, nb_desc,
+                           rx_conf->rx_free_thresh);
        if (ret) {
                dev_err(enic, "error in allocating rq\n");
                return ret;
        }
 
-       enic->rq[queue_idx].rx_free_thresh = rx_conf->rx_free_thresh;
-       dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
-                       enic->rq[queue_idx].rx_free_thresh);
-
        return enicpmd_dev_setup_intr(enic);
 }
 
@@ -405,17 +431,9 @@ static int enicpmd_dev_link_update(struct rte_eth_dev *eth_dev,
        __rte_unused int wait_to_complete)
 {
        struct enic *enic = pmd_priv(eth_dev);
-       int ret;
-       int link_status = 0;
 
        ENICPMD_FUNC_TRACE();
-       link_status = enic_get_link_status(enic);
-       ret = (link_status == enic->link_status);
-       enic->link_status = link_status;
-       eth_dev->data->dev_link.link_status = link_status;
-       eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
-       eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
-       return ret;
+       return enic_link_update(enic);
 }
 
 static void enicpmd_dev_stats_get(struct rte_eth_dev *eth_dev,
@@ -435,22 +453,19 @@ static void enicpmd_dev_stats_reset(struct rte_eth_dev *eth_dev)
        enic_dev_stats_clear(enic);
 }
 
-
-
-
 static void enicpmd_dev_info_get(struct rte_eth_dev *eth_dev,
        struct rte_eth_dev_info *device_info)
 {
        struct enic *enic = pmd_priv(eth_dev);
 
        ENICPMD_FUNC_TRACE();
+       device_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
        /* Scattered Rx uses two receive queues per rx queue exposed to dpdk */
        device_info->max_rx_queues = enic->conf_rq_count / 2;
        device_info->max_tx_queues = enic->conf_wq_count;
        device_info->min_rx_bufsize = ENIC_MIN_MTU;
-       device_info->max_rx_pktlen = enic->rte_dev->data->mtu
-                                  + ETHER_HDR_LEN + 4;
-       device_info->max_mac_addrs = 1;
+       device_info->max_rx_pktlen = enic->max_mtu + ETHER_HDR_LEN + 4;
+       device_info->max_mac_addrs = ENIC_MAX_MAC_ADDR;
        device_info->rx_offload_capa =
                DEV_RX_OFFLOAD_VLAN_STRIP |
                DEV_RX_OFFLOAD_IPV4_CKSUM |
@@ -460,17 +475,18 @@ static void enicpmd_dev_info_get(struct rte_eth_dev *eth_dev,
                DEV_TX_OFFLOAD_VLAN_INSERT |
                DEV_TX_OFFLOAD_IPV4_CKSUM  |
                DEV_TX_OFFLOAD_UDP_CKSUM   |
-               DEV_TX_OFFLOAD_TCP_CKSUM;
+               DEV_TX_OFFLOAD_TCP_CKSUM   |
+               DEV_TX_OFFLOAD_TCP_TSO;
        device_info->default_rxconf = (struct rte_eth_rxconf) {
                .rx_free_thresh = ENIC_DEFAULT_RX_FREE_THRESH
        };
-
-    device_info->speed_capa = ETH_LINK_SPEED_40G;
 }
 
 static const uint32_t *enicpmd_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 {
        static const uint32_t ptypes[] = {
+               RTE_PTYPE_L2_ETHER,
+               RTE_PTYPE_L2_ETHER_VLAN,
                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
                RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
                RTE_PTYPE_L4_TCP,
@@ -531,12 +547,12 @@ static void enicpmd_add_mac_addr(struct rte_eth_dev *eth_dev,
        enic_set_mac_address(enic, mac_addr->addr_bytes);
 }
 
-static void enicpmd_remove_mac_addr(struct rte_eth_dev *eth_dev, __rte_unused uint32_t index)
+static void enicpmd_remove_mac_addr(struct rte_eth_dev *eth_dev, uint32_t index)
 {
        struct enic *enic = pmd_priv(eth_dev);
 
        ENICPMD_FUNC_TRACE();
-       enic_del_mac_address(enic);
+       enic_del_mac_address(enic, index);
 }
 
 static int enicpmd_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu)
@@ -575,7 +591,7 @@ static const struct eth_dev_ops enicpmd_eth_dev_ops = {
        .tx_queue_stop        = enicpmd_dev_tx_queue_stop,
        .rx_queue_setup       = enicpmd_dev_rx_queue_setup,
        .rx_queue_release     = enicpmd_dev_rx_queue_release,
-       .rx_queue_count       = NULL,
+       .rx_queue_count       = enicpmd_dev_rx_queue_count,
        .rx_descriptor_done   = NULL,
        .tx_queue_setup       = enicpmd_dev_tx_queue_setup,
        .tx_queue_release     = enicpmd_dev_tx_queue_release,
@@ -607,7 +623,7 @@ static int eth_enicpmd_dev_init(struct rte_eth_dev *eth_dev)
        eth_dev->rx_pkt_burst = &enic_recv_pkts;
        eth_dev->tx_pkt_burst = &enic_xmit_pkts;
 
-       pdev = eth_dev->pci_dev;
+       pdev = RTE_DEV_TO_PCI(eth_dev->device);
        rte_eth_copy_pci_info(eth_dev, pdev);
        enic->pdev = pdev;
        addr = &pdev->addr;
@@ -620,32 +636,15 @@ static int eth_enicpmd_dev_init(struct rte_eth_dev *eth_dev)
 
 static struct eth_driver rte_enic_pmd = {
        .pci_drv = {
-               .name = "rte_enic_pmd",
                .id_table = pci_id_enic_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_enicpmd_dev_init,
        .dev_private_size = sizeof(struct enic),
 };
 
-/* Driver initialization routine.
- * Invoked once at EAL init time.
- * Register as the [Poll Mode] Driver of Cisco ENIC device.
- */
-static int
-rte_enic_pmd_init(__rte_unused const char *name,
-        __rte_unused const char *params)
-{
-       ENICPMD_FUNC_TRACE();
-
-       rte_eth_driver_register(&rte_enic_pmd);
-       return 0;
-}
-
-static struct rte_driver rte_enic_driver = {
-       .type = PMD_PDEV,
-       .init = rte_enic_pmd_init,
-};
-
-PMD_REGISTER_DRIVER(rte_enic_driver, enic);
-DRIVER_REGISTER_PCI_TABLE(enic, pci_id_enic_map);
+RTE_PMD_REGISTER_PCI(net_enic, rte_enic_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_enic, pci_id_enic_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_enic, "* igb_uio | uio_pci_generic | vfio");
index 4530dcf..21e8ede 100644 (file)
@@ -137,7 +137,6 @@ static void enic_clear_soft_stats(struct enic *enic)
        struct enic_soft_stats *soft_stats = &enic->soft_stats;
        rte_atomic64_clear(&soft_stats->rx_nombuf);
        rte_atomic64_clear(&soft_stats->rx_packet_errors);
-    rte_atomic64_clear(&soft_stats->tx_oversized);
 }
 
 static void enic_init_soft_stats(struct enic *enic)
@@ -145,7 +144,6 @@ static void enic_init_soft_stats(struct enic *enic)
        struct enic_soft_stats *soft_stats = &enic->soft_stats;
        rte_atomic64_init(&soft_stats->rx_nombuf);
        rte_atomic64_init(&soft_stats->rx_packet_errors);
-    rte_atomic64_init(&soft_stats->tx_oversized);
        enic_clear_soft_stats(enic);
 }
 
@@ -168,7 +166,6 @@ void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
                return;
        }
 
-
        /* The number of truncated packets can only be calculated by
         * subtracting a hardware counter from error packets received by
         * the driver. Note: this causes transient inaccuracies in the
@@ -177,26 +174,28 @@ void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
         * which can make ibytes be slightly higher than it should be.
         */
        rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
-       rx_truncated = rx_packet_errors - stats->rx.rx_errors -
-               stats->rx.rx_no_bufs;
+       rx_truncated = rx_packet_errors - stats->rx.rx_errors;
 
        r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
        r_stats->opackets = stats->tx.tx_frames_ok;
 
-       r_stats->ibytes = stats->rx.rx_unicast_bytes_ok+stats->rx.rx_multicast_bytes_ok+stats->rx.rx_broadcast_bytes_ok;
+       r_stats->ibytes = stats->rx.rx_bytes_ok;
        r_stats->obytes = stats->tx.tx_bytes_ok;
 
        r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
-       r_stats->oerrors = stats->tx.tx_errors + rte_atomic64_read(&soft_stats->tx_oversized);
+       r_stats->oerrors = stats->tx.tx_errors;
 
        r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
 
        r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
 }
 
-void enic_del_mac_address(struct enic *enic)
+void enic_del_mac_address(struct enic *enic, int mac_index)
 {
-       if (vnic_dev_del_addr(enic->vdev, enic->mac_addr))
+       struct rte_eth_dev *eth_dev = enic->rte_dev;
+       uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
+
+       if (vnic_dev_del_addr(enic->vdev, mac_addr))
                dev_err(enic, "del mac addr failed\n");
 }
 
@@ -209,15 +208,6 @@ void enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
                return;
        }
 
-       err = vnic_dev_del_addr(enic->vdev, enic->mac_addr);
-       if (err) {
-               dev_err(enic, "del mac addr failed\n");
-               return;
-       }
-
-       ether_addr_copy((struct ether_addr *)mac_addr,
-               (struct ether_addr *)enic->mac_addr);
-
        err = vnic_dev_add_addr(enic->vdev, mac_addr);
        if (err) {
                dev_err(enic, "add mac addr failed\n");
@@ -244,14 +234,14 @@ void enic_init_vnic_resources(struct enic *enic)
        struct vnic_rq *data_rq;
 
        for (index = 0; index < enic->rq_count; index++) {
-               cq_idx = enic_cq_rq(enic, enic_sop_rq(index));
+               cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
 
-               vnic_rq_init(&enic->rq[enic_sop_rq(index)],
+               vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
                        cq_idx,
                        error_interrupt_enable,
                        error_interrupt_offset);
 
-               data_rq = &enic->rq[enic_data_rq(index)];
+               data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)];
                if (data_rq->in_use)
                        vnic_rq_init(data_rq,
                                     cq_idx,
@@ -414,14 +404,32 @@ enic_free_consistent(void *priv,
        rte_free(mze);
 }
 
+int enic_link_update(struct enic *enic)
+{
+       struct rte_eth_dev *eth_dev = enic->rte_dev;
+       int ret;
+       int link_status = 0;
+
+       link_status = enic_get_link_status(enic);
+       ret = (link_status == enic->link_status);
+       enic->link_status = link_status;
+       eth_dev->data->dev_link.link_status = link_status;
+       eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
+       eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
+       return ret;
+}
+
 static void
 enic_intr_handler(__rte_unused struct rte_intr_handle *handle,
        void *arg)
 {
-       struct enic *enic = pmd_priv((struct rte_eth_dev *)arg);
+       struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
+       struct enic *enic = pmd_priv(dev);
 
        vnic_intr_return_all_credits(&enic->intr);
 
+       enic_link_update(enic);
+       _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
        enic_log_q_error(enic);
 }
 
@@ -433,7 +441,13 @@ int enic_enable(struct enic *enic)
 
        eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
        eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
-       vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
+
+       /* vnic notification of link status has already been turned on in
+        * enic_dev_init() which is called during probe time.  Here we are
+        * just turning on interrupt vector 0 if needed.
+        */
+       if (eth_dev->data->dev_conf.intr_conf.lsc)
+               vnic_dev_notify_set(enic->vdev, 0);
 
        if (enic_clsf_init(enic))
                dev_warning(enic, "Init of hash table for clsf failed."\
@@ -441,17 +455,17 @@ int enic_enable(struct enic *enic)
 
        for (index = 0; index < enic->rq_count; index++) {
                err = enic_alloc_rx_queue_mbufs(enic,
-                       &enic->rq[enic_sop_rq(index)]);
+                       &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
                if (err) {
                        dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
                        return err;
                }
                err = enic_alloc_rx_queue_mbufs(enic,
-                       &enic->rq[enic_data_rq(index)]);
+                       &enic->rq[enic_rte_rq_idx_to_data_idx(index)]);
                if (err) {
                        /* release the allocated mbufs for the sop rq*/
                        enic_rxmbuf_queue_release(enic,
-                               &enic->rq[enic_sop_rq(index)]);
+                               &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
 
                        dev_err(enic, "Failed to alloc data RX queue mbufs\n");
                        return err;
@@ -520,7 +534,10 @@ void enic_free_rq(void *rxq)
        if (rq_data->in_use)
                vnic_rq_free(rq_data);
 
-       vnic_cq_free(&enic->cq[rq_sop->index]);
+       vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
+
+       rq_sop->in_use = 0;
+       rq_data->in_use = 0;
 }
 
 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
@@ -545,8 +562,10 @@ int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
 
 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
 {
-       struct vnic_rq *rq_sop = &enic->rq[enic_sop_rq(queue_idx)];
-       struct vnic_rq *rq_data = &enic->rq[rq_sop->data_queue_idx];
+       struct vnic_rq *rq_sop;
+       struct vnic_rq *rq_data;
+       rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
+       rq_data = &enic->rq[rq_sop->data_queue_idx];
        struct rte_eth_dev *eth_dev = enic->rte_dev;
 
        if (rq_data->in_use)
@@ -560,8 +579,10 @@ int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
 {
        int ret1 = 0, ret2 = 0;
        struct rte_eth_dev *eth_dev = enic->rte_dev;
-       struct vnic_rq *rq_sop = &enic->rq[enic_sop_rq(queue_idx)];
-       struct vnic_rq *rq_data = &enic->rq[rq_sop->data_queue_idx];
+       struct vnic_rq *rq_sop;
+       struct vnic_rq *rq_data;
+       rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
+       rq_data = &enic->rq[rq_sop->data_queue_idx];
 
        ret2 = vnic_rq_disable(rq_sop);
        rte_mb();
@@ -579,16 +600,17 @@ int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
 
 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
        unsigned int socket_id, struct rte_mempool *mp,
-       uint16_t nb_desc)
+       uint16_t nb_desc, uint16_t free_thresh)
 {
        int rc;
-       uint16_t sop_queue_idx = enic_sop_rq(queue_idx);
-       uint16_t data_queue_idx = enic_data_rq(queue_idx);
+       uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
+       uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx);
        struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
        struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
        unsigned int mbuf_size, mbufs_per_pkt;
        unsigned int nb_sop_desc, nb_data_desc;
        uint16_t min_sop, max_sop, min_data, max_data;
+       uint16_t mtu = enic->rte_dev->data->mtu;
 
        rq_sop->is_sop = 1;
        rq_sop->data_queue_idx = data_queue_idx;
@@ -599,14 +621,18 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
        rq_data->socket_id = socket_id;
        rq_data->mp = mp;
        rq_sop->in_use = 1;
+       rq_sop->rx_free_thresh = free_thresh;
+       rq_data->rx_free_thresh = free_thresh;
+       dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
+                 free_thresh);
 
        mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
                               RTE_PKTMBUF_HEADROOM);
 
        if (enic->rte_dev->data->dev_conf.rxmode.enable_scatter) {
-               dev_info(enic, "Scatter rx mode enabled\n");
+               dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
                /* ceil((mtu + ETHER_HDR_LEN + 4)/mbuf_size) */
-               mbufs_per_pkt = ((enic->config.mtu + ETHER_HDR_LEN + 4) +
+               mbufs_per_pkt = ((mtu + ETHER_HDR_LEN + 4) +
                                 (mbuf_size - 1)) / mbuf_size;
        } else {
                dev_info(enic, "Scatter rx mode disabled\n");
@@ -614,10 +640,13 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
        }
 
        if (mbufs_per_pkt > 1) {
-               dev_info(enic, "Scatter rx mode in use\n");
+               dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
+               rq_sop->data_queue_enable = 1;
                rq_data->in_use = 1;
        } else {
-               dev_info(enic, "Scatter rx mode not being used\n");
+               dev_info(enic, "Rq %u Scatter rx mode not being used\n",
+                        queue_idx);
+               rq_sop->data_queue_enable = 0;
                rq_data->in_use = 0;
        }
 
@@ -654,7 +683,7 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
        }
        if (mbufs_per_pkt > 1) {
                dev_info(enic, "For mtu %d and mbuf size %d valid rx descriptor range is %d to %d\n",
-                        enic->config.mtu, mbuf_size, min_sop + min_data,
+                        mtu, mbuf_size, min_sop + min_data,
                         max_sop + max_data);
        }
        dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
@@ -705,6 +734,8 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
                        goto err_free_sop_mbuf;
        }
 
+       rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
+
        return 0;
 
 err_free_sop_mbuf:
@@ -801,6 +832,10 @@ int enic_disable(struct enic *enic)
 
        vnic_intr_mask(&enic->intr);
        (void)vnic_intr_masked(&enic->intr); /* flush write */
+       rte_intr_disable(&enic->pdev->intr_handle);
+       rte_intr_callback_unregister(&enic->pdev->intr_handle,
+                                    enic_intr_handler,
+                                    (void *)enic->rte_dev);
 
        vnic_dev_disable(enic->vdev);
 
@@ -822,8 +857,14 @@ int enic_disable(struct enic *enic)
                }
        }
 
+       /* If we were using interrupts, set the interrupt vector to -1
+        * to disable interrupts.  We are not disabling link notifcations,
+        * though, as we want the polling of link status to continue working.
+        */
+       if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
+               vnic_dev_notify_set(enic->vdev, -1);
+
        vnic_dev_set_reset_flag(enic->vdev, 1);
-       vnic_dev_notify_unset(enic->vdev);
 
        for (i = 0; i < enic->wq_count; i++)
                vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
@@ -925,7 +966,7 @@ static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits)
 
        for (i = 0; i < (1 << rss_hash_bits); i++)
                (*rss_cpu_buf_va).cpu[i / 4].b[i % 4] =
-                       enic_sop_rq(i % enic->rq_count);
+                       enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
 
        err = enic_set_rss_cpu(enic,
                rss_cpu_buf_pa,
@@ -1025,6 +1066,9 @@ static void enic_dev_deinit(struct enic *enic)
 {
        struct rte_eth_dev *eth_dev = enic->rte_dev;
 
+       /* stop link status checking */
+       vnic_dev_notify_unset(enic->vdev);
+
        rte_free(eth_dev->data->mac_addrs);
 }
 
@@ -1066,6 +1110,56 @@ int enic_set_vnic_res(struct enic *enic)
        return rc;
 }
 
+/* Initialize the completion queue for an RQ */
+static int
+enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
+{
+       struct vnic_rq *sop_rq, *data_rq;
+       unsigned int cq_idx = enic_cq_rq(enic, rq_idx);
+       int rc = 0;
+
+       sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
+       data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
+
+       vnic_cq_clean(&enic->cq[cq_idx]);
+       vnic_cq_init(&enic->cq[cq_idx],
+                    0 /* flow_control_enable */,
+                    1 /* color_enable */,
+                    0 /* cq_head */,
+                    0 /* cq_tail */,
+                    1 /* cq_tail_color */,
+                    0 /* interrupt_enable */,
+                    1 /* cq_entry_enable */,
+                    0 /* cq_message_enable */,
+                    0 /* interrupt offset */,
+                    0 /* cq_message_addr */);
+
+
+       vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
+                          enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
+                          sop_rq->ring.desc_count - 1, 1, 0);
+       if (data_rq->in_use) {
+               vnic_rq_init_start(data_rq,
+                                  enic_cq_rq(enic,
+                                  enic_rte_rq_idx_to_data_idx(rq_idx)), 0,
+                                  data_rq->ring.desc_count - 1, 1, 0);
+       }
+
+       rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
+       if (rc)
+               return rc;
+
+       if (data_rq->in_use) {
+               rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
+               if (rc) {
+                       enic_rxmbuf_queue_release(enic, sop_rq);
+                       return rc;
+               }
+       }
+
+       return 0;
+}
+
 /* The Cisco NIC can send and receive packets up to a max packet size
  * determined by the NIC type and firmware. There is also an MTU
  * configured into the NIC via the CIMC/UCSM management interface
@@ -1075,6 +1169,9 @@ int enic_set_vnic_res(struct enic *enic)
  */
 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
 {
+       unsigned int rq_idx;
+       struct vnic_rq *rq;
+       int rc = 0;
        uint16_t old_mtu;       /* previous setting */
        uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
        struct rte_eth_dev *eth_dev = enic->rte_dev;
@@ -1082,10 +1179,6 @@ int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
        old_mtu = eth_dev->data->mtu;
        config_mtu = enic->config.mtu;
 
-       /* only works with Rx scatter disabled */
-       if (enic->rte_dev->data->dev_conf.rxmode.enable_scatter)
-               return -ENOTSUP;
-
        if (new_mtu > enic->max_mtu) {
                dev_err(enic,
                        "MTU not updated: requested (%u) greater than max (%u)\n",
@@ -1103,11 +1196,83 @@ int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
                        "MTU (%u) is greater than value configured in NIC (%u)\n",
                        new_mtu, config_mtu);
 
+       /* The easy case is when scatter is disabled. However if the MTU
+        * becomes greater than the mbuf data size, packet drops will ensue.
+        */
+       if (!enic->rte_dev->data->dev_conf.rxmode.enable_scatter) {
+               eth_dev->data->mtu = new_mtu;
+               goto set_mtu_done;
+       }
+
+       /* Rx scatter is enabled so reconfigure RQ's on the fly. The point is to
+        * change Rx scatter mode if necessary for better performance. I.e. if
+        * MTU was greater than the mbuf size and now it's less, scatter Rx
+        * doesn't have to be used and vice versa.
+         */
+       rte_spinlock_lock(&enic->mtu_lock);
+
+       /* Stop traffic on all RQs */
+       for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
+               rq = &enic->rq[rq_idx];
+               if (rq->is_sop && rq->in_use) {
+                       rc = enic_stop_rq(enic,
+                                         enic_sop_rq_idx_to_rte_idx(rq_idx));
+                       if (rc) {
+                               dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
+                               goto set_mtu_done;
+                       }
+               }
+       }
+
+       /* replace Rx funciton with a no-op to avoid getting stale pkts */
+       eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
+       rte_mb();
+
+       /* Allow time for threads to exit the real Rx function. */
+       usleep(100000);
+
+       /* now it is safe to reconfigure the RQs */
+
        /* update the mtu */
        eth_dev->data->mtu = new_mtu;
 
+       /* free and reallocate RQs with the new MTU */
+       for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
+               rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
+
+               enic_free_rq(rq);
+               rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
+                                  rq->tot_nb_desc, rq->rx_free_thresh);
+               if (rc) {
+                       dev_err(enic,
+                               "Fatal MTU alloc error- No traffic will pass\n");
+                       goto set_mtu_done;
+               }
+
+               rc = enic_reinit_rq(enic, rq_idx);
+               if (rc) {
+                       dev_err(enic,
+                               "Fatal MTU RQ reinit- No traffic will pass\n");
+                       goto set_mtu_done;
+               }
+       }
+
+       /* put back the real receive function */
+       rte_mb();
+       eth_dev->rx_pkt_burst = enic_recv_pkts;
+       rte_mb();
+
+       /* restart Rx traffic */
+       for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
+               rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
+               if (rq->is_sop && rq->in_use)
+                       enic_start_rq(enic, rq_idx);
+       }
+
+set_mtu_done:
        dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
-       return 0;
+       rte_spinlock_unlock(&enic->mtu_lock);
+       return rc;
 }
 
 static int enic_dev_init(struct enic *enic)
@@ -1125,24 +1290,32 @@ static int enic_dev_init(struct enic *enic)
                return err;
        }
 
+       /* Get available resource counts */
+       enic_get_res_counts(enic);
+       if (enic->conf_rq_count == 1) {
+               dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
+               dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
+               dev_err(enic, "See the ENIC PMD guide for more information.\n");
+               return -EINVAL;
+       }
+
        /* Get the supported filters */
        enic_fdir_info(enic);
 
-       eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN, 0);
+       eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN
+                                               * ENIC_MAX_MAC_ADDR, 0);
        if (!eth_dev->data->mac_addrs) {
                dev_err(enic, "mac addr storage alloc failed, aborting.\n");
                return -1;
        }
        ether_addr_copy((struct ether_addr *) enic->mac_addr,
-               &eth_dev->data->mac_addrs[0]);
-
-
-       /* Get available resource counts
-       */
-       enic_get_res_counts(enic);
+                       eth_dev->data->mac_addrs);
 
        vnic_dev_set_reset_flag(enic->vdev, 0);
 
+       /* set up link status checking */
+       vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
+
        return 0;
 
 }
index 140c6ef..8a230a1 100644 (file)
@@ -89,11 +89,10 @@ int enic_get_vnic_config(struct enic *enic)
        /* max packet size is only defined in newer VIC firmware
         * and will be 0 for legacy firmware and VICs
         */
-       if (c->max_pkt_size > ENIC_DEFAULT_RX_MAX_PKT_SIZE)
+       if (c->max_pkt_size > ENIC_DEFAULT_MAX_PKT_SIZE)
                enic->max_mtu = c->max_pkt_size - (ETHER_HDR_LEN + 4);
        else
-               enic->max_mtu = ENIC_DEFAULT_RX_MAX_PKT_SIZE 
-                        - (ETHER_HDR_LEN + 4);
+               enic->max_mtu = ENIC_DEFAULT_MAX_PKT_SIZE - (ETHER_HDR_LEN + 4);
        if (c->mtu == 0)
                c->mtu = 1500;
 
index cda2da1..303530e 100644 (file)
 #define ENIC_MIN_MTU                   68
 
 /* Does not include (possible) inserted VLAN tag and FCS */
-#define ENIC_DEFAULT_RX_MAX_PKT_SIZE   9022
-
-/* Does not include (possible) inserted VLAN tag and FCS */
-#define ENIC_TX_MAX_PKT_SIZE               9208
-
+#define ENIC_DEFAULT_MAX_PKT_SIZE      9022
 
 #define ENIC_MULTICAST_PERFECT_FILTERS 32
 #define ENIC_UNICAST_PERFECT_FILTERS   32
index cd15502..26b83ae 100644 (file)
@@ -37,6 +37,9 @@
 #include "enic_compat.h"
 #include "rq_enet_desc.h"
 #include "enic.h"
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
 
 #define RTE_PMD_USE_PREFETCH
 
@@ -129,6 +132,60 @@ enic_cq_rx_desc_n_bytes(struct cq_desc *cqd)
                CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
 }
 
+/* Find the offset to L5. This is needed by enic TSO implementation.
+ * Return 0 if not a TCP packet or can't figure out the length.
+ */
+static inline uint8_t tso_header_len(struct rte_mbuf *mbuf)
+{
+       struct ether_hdr *eh;
+       struct vlan_hdr *vh;
+       struct ipv4_hdr *ip4;
+       struct ipv6_hdr *ip6;
+       struct tcp_hdr *th;
+       uint8_t hdr_len;
+       uint16_t ether_type;
+
+       /* offset past Ethernet header */
+       eh = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+       ether_type = eh->ether_type;
+       hdr_len = sizeof(struct ether_hdr);
+       if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
+               vh = rte_pktmbuf_mtod_offset(mbuf, struct vlan_hdr *, hdr_len);
+               ether_type = vh->eth_proto;
+               hdr_len += sizeof(struct vlan_hdr);
+       }
+
+       /* offset past IP header */
+       switch (rte_be_to_cpu_16(ether_type)) {
+       case ETHER_TYPE_IPv4:
+               ip4 = rte_pktmbuf_mtod_offset(mbuf, struct ipv4_hdr *, hdr_len);
+               if (ip4->next_proto_id != IPPROTO_TCP)
+                       return 0;
+               hdr_len += (ip4->version_ihl & 0xf) * 4;
+               break;
+       case ETHER_TYPE_IPv6:
+               ip6 = rte_pktmbuf_mtod_offset(mbuf, struct ipv6_hdr *, hdr_len);
+               if (ip6->proto != IPPROTO_TCP)
+                       return 0;
+               hdr_len += sizeof(struct ipv6_hdr);
+               break;
+       default:
+               return 0;
+       }
+
+       if ((hdr_len + sizeof(struct tcp_hdr)) > mbuf->pkt_len)
+               return 0;
+
+       /* offset past TCP header */
+       th = rte_pktmbuf_mtod_offset(mbuf, struct tcp_hdr *, hdr_len);
+       hdr_len += (th->data_off >> 4) * 4;
+
+       if (hdr_len > mbuf->pkt_len)
+               return 0;
+
+       return hdr_len;
+}
+
 static inline uint8_t
 enic_cq_rx_check_err(struct cq_desc *cqd)
 {
@@ -149,30 +206,18 @@ enic_cq_rx_flags_to_pkt_type(struct cq_desc *cqd)
        uint8_t cqrd_flags = cqrd->flags;
        static const uint32_t cq_type_table[128] __rte_cache_aligned = {
                [0x00] = RTE_PTYPE_UNKNOWN,
-               [0x20] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
-                         | RTE_PTYPE_L4_NONFRAG,
-               [0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
-                         | RTE_PTYPE_L4_UDP,
-               [0x24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
-                         | RTE_PTYPE_L4_TCP,
-               [0x60] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
-                         | RTE_PTYPE_L4_FRAG,
-               [0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
-                         | RTE_PTYPE_L4_UDP,
-               [0x64] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
-                         | RTE_PTYPE_L4_TCP,
-               [0x10] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
-                         | RTE_PTYPE_L4_NONFRAG,
-               [0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
-                         | RTE_PTYPE_L4_UDP,
-               [0x14] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
-                         | RTE_PTYPE_L4_TCP,
-               [0x50] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
-                         | RTE_PTYPE_L4_FRAG,
-               [0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
-                         | RTE_PTYPE_L4_UDP,
-               [0x54] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
-                         | RTE_PTYPE_L4_TCP,
+               [0x20] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG,
+               [0x22] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP,
+               [0x24] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP,
+               [0x60] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+               [0x62] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP,
+               [0x64] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP,
+               [0x10] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG,
+               [0x12] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP,
+               [0x14] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP,
+               [0x50] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+               [0x52] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP,
+               [0x54] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP,
                /* All others reserved */
        };
        cqrd_flags &= CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT
@@ -185,9 +230,10 @@ static inline void
 enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
 {
        struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
-       uint16_t ciflags, bwflags, pkt_flags = 0;
+       uint16_t ciflags, bwflags, pkt_flags = 0, vlan_tci;
        ciflags = enic_cq_rx_desc_ciflags(cqrd);
        bwflags = enic_cq_rx_desc_bwflags(cqrd);
+       vlan_tci = enic_cq_rx_desc_vlan(cqrd);
 
        mbuf->ol_flags = 0;
 
@@ -195,13 +241,17 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
        if (unlikely(!enic_cq_rx_desc_eop(ciflags)))
                goto mbuf_flags_done;
 
-       /* VLAN stripping */
+       /* VLAN STRIPPED flag. The L2 packet type updated here also */
        if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) {
                pkt_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
-               mbuf->vlan_tci = enic_cq_rx_desc_vlan(cqrd);
+               mbuf->packet_type |= RTE_PTYPE_L2_ETHER;
        } else {
-               mbuf->vlan_tci = 0;
+               if (vlan_tci != 0)
+                       mbuf->packet_type |= RTE_PTYPE_L2_ETHER_VLAN;
+               else
+                       mbuf->packet_type |= RTE_PTYPE_L2_ETHER;
        }
+       mbuf->vlan_tci = vlan_tci;
 
        /* RSS flag */
        if (enic_cq_rx_desc_rss_type(cqrd)) {
@@ -210,13 +260,25 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
        }
 
        /* checksum flags */
-       if (!enic_cq_rx_desc_csum_not_calc(cqrd) &&
-               (mbuf->packet_type & RTE_PTYPE_L3_IPV4)) {
-               if (unlikely(!enic_cq_rx_desc_ipv4_csum_ok(cqrd)))
-                       pkt_flags |= PKT_RX_IP_CKSUM_BAD;
-               if (mbuf->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)) {
-                       if (unlikely(!enic_cq_rx_desc_tcp_udp_csum_ok(cqrd)))
-                               pkt_flags |= PKT_RX_L4_CKSUM_BAD;
+       if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) {
+               if (enic_cq_rx_desc_csum_not_calc(cqrd))
+                       pkt_flags |= (PKT_RX_IP_CKSUM_UNKNOWN &
+                                    PKT_RX_L4_CKSUM_UNKNOWN);
+               else {
+                       uint32_t l4_flags;
+                       l4_flags = mbuf->packet_type & RTE_PTYPE_L4_MASK;
+
+                       if (enic_cq_rx_desc_ipv4_csum_ok(cqrd))
+                               pkt_flags |= PKT_RX_IP_CKSUM_GOOD;
+                       else
+                               pkt_flags |= PKT_RX_IP_CKSUM_BAD;
+
+                       if (l4_flags & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)) {
+                               if (enic_cq_rx_desc_tcp_udp_csum_ok(cqrd))
+                                       pkt_flags |= PKT_RX_L4_CKSUM_GOOD;
+                               else
+                                       pkt_flags |= PKT_RX_L4_CKSUM_BAD;
+                       }
                }
        }
 
@@ -224,6 +286,17 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
        mbuf->ol_flags = pkt_flags;
 }
 
+/* dummy receive function to replace actual function in
+ * order to do safe reconfiguration operations.
+ */
+uint16_t
+enic_dummy_recv_pkts(__rte_unused void *rx_queue,
+                    __rte_unused struct rte_mbuf **rx_pkts,
+                    __rte_unused uint16_t nb_pkts)
+{
+       return 0;
+}
+
 uint16_t
 enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
               uint16_t nb_pkts)
@@ -373,10 +446,11 @@ enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
                rte_mb();
                if (data_rq->in_use)
-                       iowrite32(data_rq->posted_index,
-                                 &data_rq->ctrl->posted_index);
+                       iowrite32_relaxed(data_rq->posted_index,
+                                         &data_rq->ctrl->posted_index);
                rte_compiler_barrier();
-               iowrite32(sop_rq->posted_index, &sop_rq->ctrl->posted_index);
+               iowrite32_relaxed(sop_rq->posted_index,
+                                 &sop_rq->ctrl->posted_index);
        }
 
 
@@ -459,6 +533,8 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
        uint8_t vlan_tag_insert;
        uint8_t eop;
        uint64_t bus_addr;
+       uint8_t offload_mode;
+       uint16_t header_len;
 
        enic_cleanup_wq(enic, wq);
        wq_desc_avail = vnic_wq_desc_avail(wq);
@@ -470,23 +546,16 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
        for (index = 0; index < nb_pkts; index++) {
                tx_pkt = *tx_pkts++;
-               pkt_len = tx_pkt->pkt_len;
-               data_len = tx_pkt->data_len;
-               ol_flags = tx_pkt->ol_flags;
                nb_segs = tx_pkt->nb_segs;
-
-               if (pkt_len > ENIC_TX_MAX_PKT_SIZE) {
-                       rte_pktmbuf_free(tx_pkt);
-                       rte_atomic64_inc(&enic->soft_stats.tx_oversized);
-                       continue;
-               }
-
                if (nb_segs > wq_desc_avail) {
                        if (index > 0)
                                goto post;
                        goto done;
                }
 
+               pkt_len = tx_pkt->pkt_len;
+               data_len = tx_pkt->data_len;
+               ol_flags = tx_pkt->ol_flags;
                mss = 0;
                vlan_id = 0;
                vlan_tag_insert = 0;
@@ -497,13 +566,17 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                desc_p = descs + head_idx;
 
                eop = (data_len == pkt_len);
-
-               if (ol_flags & ol_flags_mask) {
-                       if (ol_flags & PKT_TX_VLAN_PKT) {
-                               vlan_tag_insert = 1;
-                               vlan_id = tx_pkt->vlan_tci;
+               offload_mode = WQ_ENET_OFFLOAD_MODE_CSUM;
+               header_len = 0;
+
+               if (tx_pkt->tso_segsz) {
+                       header_len = tso_header_len(tx_pkt);
+                       if (header_len) {
+                               offload_mode = WQ_ENET_OFFLOAD_MODE_TSO;
+                               mss = tx_pkt->tso_segsz;
                        }
-
+               }
+               if ((ol_flags & ol_flags_mask) && (header_len == 0)) {
                        if (ol_flags & PKT_TX_IP_CKSUM)
                                mss |= ENIC_CALC_IP_CKSUM;
 
@@ -516,8 +589,14 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                        }
                }
 
-               wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, 0, 0, eop,
-                                eop, 0, vlan_tag_insert, vlan_id, 0);
+               if (ol_flags & PKT_TX_VLAN_PKT) {
+                       vlan_tag_insert = 1;
+                       vlan_id = tx_pkt->vlan_tci;
+               }
+
+               wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, header_len,
+                                offload_mode, eop, eop, 0, vlan_tag_insert,
+                                vlan_id, 0);
 
                *desc_p = desc_tmp;
                buf = &wq->bufs[head_idx];
@@ -537,8 +616,9 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                                           + tx_pkt->data_off);
                                wq_enet_desc_enc((struct wq_enet_desc *)
                                                 &desc_tmp, bus_addr, data_len,
-                                                mss, 0, 0, eop, eop, 0,
-                                                vlan_tag_insert, vlan_id, 0);
+                                                mss, 0, offload_mode, eop, eop,
+                                                0, vlan_tag_insert, vlan_id,
+                                                0);
 
                                *desc_p = desc_tmp;
                                buf = &wq->bufs[head_idx];
@@ -550,7 +630,7 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
        }
  post:
        rte_wmb();
-       iowrite32(head_idx, &wq->ctrl->posted_index);
+       iowrite32_relaxed(head_idx, &wq->ctrl->posted_index);
  done:
        wq->ring.desc_avail = wq_desc_avail;
        wq->head_idx = head_idx;
index a21daa2..f07b678 100644 (file)
@@ -39,6 +39,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <rte_atomic.h>
 #include <rte_byteorder.h>
 #include <rte_cycles.h>
+#include <rte_io.h>
+
 #include "../fm10k_logs.h"
 
 /* TODO: this does not look like it should be used... */
@@ -88,17 +90,16 @@ typedef int        bool;
 #endif
 
 /* offsets are WORD offsets, not BYTE offsets */
-#define FM10K_WRITE_REG(hw, reg, val)    \
-       ((((volatile uint32_t *)(hw)->hw_addr)[(reg)]) = ((uint32_t)(val)))
-#define FM10K_READ_REG(hw, reg)          \
-       (((volatile uint32_t *)(hw)->hw_addr)[(reg)])
+#define FM10K_WRITE_REG(hw, reg, val)          \
+       rte_write32((val), ((hw)->hw_addr + (reg)))
+
+#define FM10K_READ_REG(hw, reg) rte_read32(((hw)->hw_addr + (reg)))
+
 #define FM10K_WRITE_FLUSH(a) FM10K_READ_REG(a, FM10K_CTRL)
 
-#define FM10K_PCI_REG(reg) (*((volatile uint32_t *)(reg)))
+#define FM10K_PCI_REG(reg) rte_read32(reg)
 
-#define FM10K_PCI_REG_WRITE(reg, value) do { \
-       FM10K_PCI_REG((reg)) = (value); \
-} while (0)
+#define FM10K_PCI_REG_WRITE(reg, value) rte_write32((value), (reg))
 
 /* not implemented */
 #define FM10K_READ_PCI_WORD(hw, reg)     0
index 05aa1a2..c6fed21 100644 (file)
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc))
 
+#define FM10K_TX_MAX_SEG     UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t offset);
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
        uint16_t nb_pkts);
 
+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+       uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
index 01f4a72..dd021e4 100644 (file)
 #define MAX_QUERY_SWITCH_STATE_TIMES 10
 /* Wait interval to get switch status */
 #define WAIT_SWITCH_MSG_US    100000
+/* A period of quiescence for switch */
+#define FM10K_SWITCH_QUIESCE_US 10000
 /* Number of chars per uint32 type */
 #define CHARS_PER_UINT32 (sizeof(uint32_t))
 #define BIT_MASK_PER_UINT32 ((1 << CHARS_PER_UINT32) - 1)
 
 /* default 1:1 map from queue ID to interrupt vector ID */
-#define Q2V(dev, queue_id) (dev->pci_dev->intr_handle.intr_vec[queue_id])
+#define Q2V(pci_dev, queue_id) ((pci_dev)->intr_handle.intr_vec[queue_id])
 
 /* First 64 Logical ports for PF/VMDQ, second 64 for Flow director */
 #define MAX_LPORT_NUM    128
@@ -675,7 +677,7 @@ fm10k_dev_tx_init(struct rte_eth_dev *dev)
                /* Enable use of FTAG bit in TX descriptor, PFVTCTL
                 * register is read-only for VF.
                 */
-               if (fm10k_check_ftag(dev->pci_dev->devargs)) {
+               if (fm10k_check_ftag(dev->device->devargs)) {
                        if (hw->mac.type == fm10k_mac_pf) {
                                FM10K_WRITE_REG(hw, FM10K_PFVTCTL(i),
                                                FM10K_PFVTCTL_FTAG_DESC_ENABLE);
@@ -693,8 +695,9 @@ fm10k_dev_tx_init(struct rte_eth_dev *dev)
                                base_addr >> (CHAR_BIT * sizeof(uint32_t)));
                FM10K_WRITE_REG(hw, FM10K_TDLEN(i), size);
 
-               /* assign default SGLORT for each TX queue */
-               FM10K_WRITE_REG(hw, FM10K_TX_SGLORT(i), hw->mac.dglort_map);
+               /* assign default SGLORT for each TX queue by PF */
+               if (hw->mac.type == fm10k_mac_pf)
+                       FM10K_WRITE_REG(hw, FM10K_TX_SGLORT(i), hw->mac.dglort_map);
        }
 
        /* set up vector or scalar TX function as appropriate */
@@ -708,7 +711,8 @@ fm10k_dev_rx_init(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct fm10k_macvlan_filter_info *macvlan;
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device);
+       struct rte_intr_handle *intr_handle = &pdev->intr_handle;
        int i, ret;
        struct fm10k_rx_queue *rxq;
        uint64_t base_addr;
@@ -722,13 +726,13 @@ fm10k_dev_rx_init(struct rte_eth_dev *dev)
        i = 0;
        if (rte_intr_dp_is_en(intr_handle)) {
                for (; i < dev->data->nb_rx_queues; i++) {
-                       FM10K_WRITE_REG(hw, FM10K_RXINT(i), Q2V(dev, i));
+                       FM10K_WRITE_REG(hw, FM10K_RXINT(i), Q2V(pdev, i));
                        if (hw->mac.type == fm10k_mac_pf)
-                               FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(dev, i)),
+                               FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, i)),
                                        FM10K_ITR_AUTOMASK |
                                        FM10K_ITR_MASK_CLEAR);
                        else
-                               FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(dev, i)),
+                               FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, i)),
                                        FM10K_ITR_AUTOMASK |
                                        FM10K_ITR_MASK_CLEAR);
                }
@@ -1168,7 +1172,8 @@ static void
 fm10k_dev_stop(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device);
+       struct rte_intr_handle *intr_handle = &pdev->intr_handle;
        int i;
 
        PMD_INIT_FUNC_TRACE();
@@ -1187,10 +1192,10 @@ fm10k_dev_stop(struct rte_eth_dev *dev)
                        FM10K_WRITE_REG(hw, FM10K_RXINT(i),
                                3 << FM10K_RXINT_TIMER_SHIFT);
                        if (hw->mac.type == fm10k_mac_pf)
-                               FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(dev, i)),
+                               FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, i)),
                                        FM10K_ITR_MASK_SET);
                        else
-                               FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(dev, i)),
+                               FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, i)),
                                        FM10K_ITR_MASK_SET);
                }
        }
@@ -1233,6 +1238,9 @@ fm10k_dev_close(struct rte_eth_dev *dev)
                MAX_LPORT_NUM, false);
        fm10k_mbx_unlock(hw);
 
+       /* allow 10ms for device to quiesce */
+       rte_delay_us(FM10K_SWITCH_QUIESCE_US);
+
        /* Stop mailbox service first */
        fm10k_close_mbx_service(hw);
        fm10k_dev_stop(dev);
@@ -1309,6 +1317,7 @@ fm10k_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
        for (i = 0; i < FM10K_NB_HW_XSTATS; i++) {
                xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
                        fm10k_hw_stats_strings[count].offset);
+               xstats[count].id = count;
                count++;
        }
 
@@ -1318,12 +1327,14 @@ fm10k_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
                        xstats[count].value =
                                *(uint64_t *)(((char *)&hw_stats->q[q]) +
                                fm10k_hw_stats_rx_q_strings[i].offset);
+                       xstats[count].id = count;
                        count++;
                }
                for (i = 0; i < FM10K_NB_TX_Q_XSTATS; i++) {
                        xstats[count].value =
                                *(uint64_t *)(((char *)&hw_stats->q[q]) +
                                fm10k_hw_stats_tx_q_strings[i].offset);
+                       xstats[count].id = count;
                        count++;
                }
        }
@@ -1381,16 +1392,18 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
        struct rte_eth_dev_info *dev_info)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device);
 
        PMD_INIT_FUNC_TRACE();
 
+       dev_info->pci_dev            = pdev;
        dev_info->min_rx_bufsize     = FM10K_MIN_RX_BUF_SIZE;
        dev_info->max_rx_pktlen      = FM10K_MAX_PKT_SIZE;
        dev_info->max_rx_queues      = hw->mac.max_queues;
        dev_info->max_tx_queues      = hw->mac.max_queues;
        dev_info->max_mac_addrs      = FM10K_MAX_MACADDR_NUM;
        dev_info->max_hash_mac_addrs = 0;
-       dev_info->max_vfs            = dev->pci_dev->max_vfs;
+       dev_info->max_vfs            = pdev->max_vfs;
        dev_info->vmdq_pool_base     = 0;
        dev_info->vmdq_queue_base    = 0;
        dev_info->max_vmdq_pools     = ETH_32_POOLS;
@@ -1441,6 +1454,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
                .nb_max = FM10K_MAX_TX_DESC,
                .nb_min = FM10K_MIN_TX_DESC,
                .nb_align = FM10K_MULT_TX_DESC,
+               .nb_seg_max = FM10K_TX_MAX_SEG,
+               .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
        };
 
        dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2327,15 +2342,16 @@ static int
 fm10k_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device);
 
        /* Enable ITR */
        if (hw->mac.type == fm10k_mac_pf)
-               FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(dev, queue_id)),
+               FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, queue_id)),
                        FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR);
        else
-               FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(dev, queue_id)),
+               FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)),
                        FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR);
-       rte_intr_enable(&dev->pci_dev->intr_handle);
+       rte_intr_enable(&pdev->intr_handle);
        return 0;
 }
 
@@ -2343,13 +2359,14 @@ static int
 fm10k_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device);
 
        /* Disable ITR */
        if (hw->mac.type == fm10k_mac_pf)
-               FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(dev, queue_id)),
+               FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, queue_id)),
                        FM10K_ITR_MASK_SET);
        else
-               FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(dev, queue_id)),
+               FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)),
                        FM10K_ITR_MASK_SET);
        return 0;
 }
@@ -2358,7 +2375,8 @@ static int
 fm10k_dev_rxq_interrupt_setup(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device);
+       struct rte_intr_handle *intr_handle = &pdev->intr_handle;
        uint32_t intr_vector, vec;
        uint16_t queue_id;
        int result = 0;
@@ -2374,7 +2392,7 @@ fm10k_dev_rxq_interrupt_setup(struct rte_eth_dev *dev)
        intr_vector = dev->data->nb_rx_queues;
 
        /* disable interrupt first */
-       rte_intr_disable(&dev->pci_dev->intr_handle);
+       rte_intr_disable(intr_handle);
        if (hw->mac.type == fm10k_mac_pf)
                fm10k_dev_disable_intr_pf(dev);
        else
@@ -2409,7 +2427,7 @@ fm10k_dev_rxq_interrupt_setup(struct rte_eth_dev *dev)
                fm10k_dev_enable_intr_pf(dev);
        else
                fm10k_dev_enable_intr_vf(dev);
-       rte_intr_enable(&dev->pci_dev->intr_handle);
+       rte_intr_enable(intr_handle);
        hw->mac.ops.update_int_moderator(hw);
        return result;
 }
@@ -2524,7 +2542,7 @@ error:
  */
 static void
 fm10k_dev_interrupt_handler_pf(
-                       __rte_unused struct rte_intr_handle *handle,
+                       struct rte_intr_handle *handle,
                        void *param)
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
@@ -2575,7 +2593,7 @@ fm10k_dev_interrupt_handler_pf(
        FM10K_WRITE_REG(hw, FM10K_ITR(0), FM10K_ITR_AUTOMASK |
                                        FM10K_ITR_MASK_CLEAR);
        /* Re-enable interrupt from host side */
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       rte_intr_enable(handle);
 }
 
 /**
@@ -2591,7 +2609,7 @@ fm10k_dev_interrupt_handler_pf(
  */
 static void
 fm10k_dev_interrupt_handler_vf(
-                       __rte_unused struct rte_intr_handle *handle,
+                       struct rte_intr_handle *handle,
                        void *param)
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
@@ -2609,7 +2627,7 @@ fm10k_dev_interrupt_handler_vf(
        FM10K_WRITE_REG(hw, FM10K_VFITR(0), FM10K_ITR_AUTOMASK |
                                        FM10K_ITR_MASK_CLEAR);
        /* Re-enable interrupt from host side */
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       rte_intr_enable(handle);
 }
 
 /* Mailbox message handler in VF */
@@ -2731,7 +2749,7 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
        int use_sse = 1;
        uint16_t tx_ftag_en = 0;
 
-       if (fm10k_check_ftag(dev->pci_dev->devargs))
+       if (fm10k_check_ftag(dev->device->devargs))
                tx_ftag_en = 1;
 
        for (i = 0; i < dev->data->nb_tx_queues; i++) {
@@ -2749,8 +2767,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
                        fm10k_txq_vec_setup(txq);
                }
                dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+               dev->tx_pkt_prepare = NULL;
        } else {
                dev->tx_pkt_burst = fm10k_xmit_pkts;
+               dev->tx_pkt_prepare = fm10k_prep_pkts;
                PMD_INIT_LOG(DEBUG, "Use regular Tx func");
        }
 }
@@ -2762,7 +2782,7 @@ fm10k_set_rx_function(struct rte_eth_dev *dev)
        uint16_t i, rx_using_sse;
        uint16_t rx_ftag_en = 0;
 
-       if (fm10k_check_ftag(dev->pci_dev->devargs))
+       if (fm10k_check_ftag(dev->device->devargs))
                rx_ftag_en = 1;
 
        /* In order to allow Vector Rx there are a few configuration
@@ -2821,6 +2841,8 @@ static int
 eth_fm10k_dev_init(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device);
+       struct rte_intr_handle *intr_handle = &pdev->intr_handle;
        int diag, i;
        struct fm10k_macvlan_filter_info *macvlan;
 
@@ -2829,23 +2851,25 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
        dev->dev_ops = &fm10k_eth_dev_ops;
        dev->rx_pkt_burst = &fm10k_recv_pkts;
        dev->tx_pkt_burst = &fm10k_xmit_pkts;
+       dev->tx_pkt_prepare = &fm10k_prep_pkts;
 
        /* only initialize in the primary process */
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
                return 0;
 
-       rte_eth_copy_pci_info(dev, dev->pci_dev);
+       rte_eth_copy_pci_info(dev, pdev);
+       dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 
        macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
        memset(macvlan, 0, sizeof(*macvlan));
        /* Vendor and Device ID need to be set before init of shared code */
        memset(hw, 0, sizeof(*hw));
-       hw->device_id = dev->pci_dev->id.device_id;
-       hw->vendor_id = dev->pci_dev->id.vendor_id;
-       hw->subsystem_device_id = dev->pci_dev->id.subsystem_device_id;
-       hw->subsystem_vendor_id = dev->pci_dev->id.subsystem_vendor_id;
+       hw->device_id = pdev->id.device_id;
+       hw->vendor_id = pdev->id.vendor_id;
+       hw->subsystem_device_id = pdev->id.subsystem_device_id;
+       hw->subsystem_vendor_id = pdev->id.subsystem_vendor_id;
        hw->revision_id = 0;
-       hw->hw_addr = (void *)dev->pci_dev->mem_resource[0].addr;
+       hw->hw_addr = (void *)pdev->mem_resource[0].addr;
        if (hw->hw_addr == NULL) {
                PMD_INIT_LOG(ERR, "Bad mem resource."
                        " Try to blacklist unused devices.");
@@ -2915,20 +2939,20 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
        /*PF/VF has different interrupt handling mechanism */
        if (hw->mac.type == fm10k_mac_pf) {
                /* register callback func to eal lib */
-               rte_intr_callback_register(&(dev->pci_dev->intr_handle),
+               rte_intr_callback_register(intr_handle,
                        fm10k_dev_interrupt_handler_pf, (void *)dev);
 
                /* enable MISC interrupt */
                fm10k_dev_enable_intr_pf(dev);
        } else { /* VF */
-               rte_intr_callback_register(&(dev->pci_dev->intr_handle),
+               rte_intr_callback_register(intr_handle,
                        fm10k_dev_interrupt_handler_vf, (void *)dev);
 
                fm10k_dev_enable_intr_vf(dev);
        }
 
        /* Enable intr after callback registered */
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       rte_intr_enable(intr_handle);
 
        hw->mac.ops.update_int_moderator(hw);
 
@@ -2998,7 +3022,8 @@ static int
 eth_fm10k_dev_uninit(struct rte_eth_dev *dev)
 {
        struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-
+       struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device);
+       struct rte_intr_handle *intr_handle = &pdev->intr_handle;
        PMD_INIT_FUNC_TRACE();
 
        /* only uninitialize in the primary process */
@@ -3013,7 +3038,7 @@ eth_fm10k_dev_uninit(struct rte_eth_dev *dev)
        dev->tx_pkt_burst = NULL;
 
        /* disable uio/vfio intr */
-       rte_intr_disable(&(dev->pci_dev->intr_handle));
+       rte_intr_disable(intr_handle);
 
        /*PF/VF has different interrupt handling mechanism */
        if (hw->mac.type == fm10k_mac_pf) {
@@ -3021,13 +3046,13 @@ eth_fm10k_dev_uninit(struct rte_eth_dev *dev)
                fm10k_dev_disable_intr_pf(dev);
 
                /* unregister callback func to eal lib */
-               rte_intr_callback_unregister(&(dev->pci_dev->intr_handle),
+               rte_intr_callback_unregister(intr_handle,
                        fm10k_dev_interrupt_handler_pf, (void *)dev);
        } else {
                /* disable interrupt */
                fm10k_dev_disable_intr_vf(dev);
 
-               rte_intr_callback_unregister(&(dev->pci_dev->intr_handle),
+               rte_intr_callback_unregister(intr_handle,
                        fm10k_dev_interrupt_handler_vf, (void *)dev);
        }
 
@@ -3055,34 +3080,16 @@ static const struct rte_pci_id pci_id_fm10k_map[] = {
 
 static struct eth_driver rte_pmd_fm10k = {
        .pci_drv = {
-               .name = "rte_pmd_fm10k",
                .id_table = pci_id_fm10k_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
-                       RTE_PCI_DRV_DETACHABLE,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_fm10k_dev_init,
        .eth_dev_uninit = eth_fm10k_dev_uninit,
        .dev_private_size = sizeof(struct fm10k_adapter),
 };
 
-/*
- * Driver initialization routine.
- * Invoked once at EAL init time.
- * Register itself as the [Poll Mode] Driver of PCI FM10K devices.
- */
-static int
-rte_pmd_fm10k_init(__rte_unused const char *name,
-       __rte_unused const char *params)
-{
-       PMD_INIT_FUNC_TRACE();
-       rte_eth_driver_register(&rte_pmd_fm10k);
-       return 0;
-}
-
-static struct rte_driver rte_fm10k_driver = {
-       .type = PMD_PDEV,
-       .init = rte_pmd_fm10k_init,
-};
-
-PMD_REGISTER_DRIVER(rte_fm10k_driver, fm10k);
-DRIVER_REGISTER_PCI_TABLE(fm10k, pci_id_fm10k_map);
+RTE_PMD_REGISTER_PCI(net_fm10k, rte_pmd_fm10k.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_fm10k, pci_id_fm10k_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_fm10k, "* igb_uio | uio_pci_generic | vfio");
index 5b2d04b..144e5e6 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@
 
 #include <rte_ethdev.h>
 #include <rte_common.h>
+#include <rte_net.h>
 #include "fm10k.h"
 #include "base/fm10k_type.h"
 
@@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif
 
+#define FM10K_TX_OFFLOAD_MASK (  \
+               PKT_TX_VLAN_PKT |        \
+               PKT_TX_IP_CKSUM |        \
+               PKT_TX_L4_MASK |         \
+               PKT_TX_TCP_SEG)
+
+#define FM10K_TX_OFFLOAD_NOTSUP_MASK \
+               (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -96,6 +106,20 @@ rx_desc_to_ol_flags(struct rte_mbuf *m, const union fm10k_rx_desc *d)
 
        if (d->w.pkt_info & FM10K_RXD_RSSTYPE_MASK)
                m->ol_flags |= PKT_RX_RSS_HASH;
+
+       if (unlikely((d->d.staterr &
+               (FM10K_RXD_STATUS_IPCS | FM10K_RXD_STATUS_IPE)) ==
+               (FM10K_RXD_STATUS_IPCS | FM10K_RXD_STATUS_IPE)))
+               m->ol_flags |= PKT_RX_IP_CKSUM_BAD;
+       else
+               m->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
+
+       if (unlikely((d->d.staterr &
+               (FM10K_RXD_STATUS_L4CS | FM10K_RXD_STATUS_L4E)) ==
+               (FM10K_RXD_STATUS_L4CS | FM10K_RXD_STATUS_L4E)))
+               m->ol_flags |= PKT_RX_L4_CKSUM_BAD;
+       else
+               m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
 }
 
 uint16_t
@@ -583,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
        return count;
 }
+
+uint16_t
+fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts)
+{
+       int i, ret;
+       struct rte_mbuf *m;
+
+       for (i = 0; i < nb_pkts; i++) {
+               m = tx_pkts[i];
+
+               if ((m->ol_flags & PKT_TX_TCP_SEG) &&
+                               (m->tso_segsz < FM10K_TSO_MINMSS)) {
+                       rte_errno = -EINVAL;
+                       return i;
+               }
+
+               if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) {
+                       rte_errno = -ENOTSUP;
+                       return i;
+               }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+               ret = rte_validate_tx_offload(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+#endif
+               ret = rte_net_intel_cksum_prepare(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+       }
+
+       return i;
+}
index 9ea747e..27f3e43 100644 (file)
@@ -67,6 +67,8 @@ fm10k_reset_tx_queue(struct fm10k_tx_queue *txq);
 #define RXEFLAG_SHIFT     (13)
 /* IPE/L4E flag shift */
 #define L3L4EFLAG_SHIFT     (14)
+/* shift PKT_RX_L4_CKSUM_GOOD into one byte by 1 bit */
+#define CKSUM_SHIFT     (1)
 
 static inline void
 fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
@@ -92,11 +94,18 @@ fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
                        0x0000, 0x0000, 0x0000, 0x0000,
                        0x0001, 0x0001, 0x0001, 0x0001);
 
+       /* mask the lower byte of ol_flags */
+       const __m128i ol_flags_msk = _mm_set_epi16(
+                       0x0000, 0x0000, 0x0000, 0x0000,
+                       0x00FF, 0x00FF, 0x00FF, 0x00FF);
+
        const __m128i l3l4cksum_flag = _mm_set_epi8(0, 0, 0, 0,
                        0, 0, 0, 0,
                        0, 0, 0, 0,
-                       PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
-                       PKT_RX_IP_CKSUM_BAD, PKT_RX_L4_CKSUM_BAD, 0);
+                       (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> CKSUM_SHIFT,
+                       (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> CKSUM_SHIFT,
+                       (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> CKSUM_SHIFT,
+                       (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> CKSUM_SHIFT);
 
        const __m128i rxe_flag = _mm_set_epi8(0, 0, 0, 0,
                        0, 0, 0, 0,
@@ -139,6 +148,10 @@ fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
        /* Process L4/L3 checksum error flags */
        cksumflag = _mm_srli_epi16(cksumflag, L3L4EFLAG_SHIFT);
        cksumflag = _mm_shuffle_epi8(l3l4cksum_flag, cksumflag);
+
+       /* clean the higher byte and shift back the flag bits */
+       cksumflag = _mm_and_si128(cksumflag, ol_flags_msk);
+       cksumflag = _mm_slli_epi16(cksumflag, CKSUM_SHIFT);
        vtag1 = _mm_or_si128(cksumflag, vtag1);
 
        vol.dword = _mm_cvtsi128_si64(vtag1);
@@ -234,11 +247,8 @@ fm10k_rx_vec_condition_check(struct rte_eth_dev *dev)
        if (fconf->mode != RTE_FDIR_MODE_NONE)
                return -1;
 
-       /* - no csum error report support
-        * - no header split support
-        */
-       if (rxmode->hw_ip_checksum == 1 ||
-           rxmode->header_split == 1)
+       /* no header split support */
+       if (rxmode->header_split == 1)
                return -1;
 
        return 0;
@@ -406,7 +416,7 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
         */
        rxdp = rxq->hw_ring + next_dd;
 
-       _mm_prefetch((const void *)rxdp, _MM_HINT_T0);
+       rte_prefetch0(rxdp);
 
        /* See if we need to rearm the RX queue - gives the prefetch a bit
         * of time to act
@@ -468,6 +478,7 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                /* Read desc statuses backwards to avoid race condition */
                /* A.1 load 4 pkts desc */
                descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
+               rte_compiler_barrier();
 
                /* B.2 copy 2 mbuf point into rx_pkts  */
                _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
@@ -476,8 +487,10 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                mbp2 = _mm_loadu_si128((__m128i *)&mbufp[pos+2]);
 
                descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
+               rte_compiler_barrier();
                /* B.1 load 2 mbuf point */
                descs0[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
+               rte_compiler_barrier();
                descs0[0] = _mm_loadu_si128((__m128i *)(rxdp));
 
                /* B.2 copy 2 mbuf point into rx_pkts  */
index 0d3a83f..5bdf3f7 100644 (file)
@@ -1077,11 +1077,11 @@ enum i40e_status_code i40e_clean_arq_element(struct i40e_hw *hw,
        desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc);
        desc_idx = ntc;
 
+       hw->aq.arq_last_status =
+               (enum i40e_admin_queue_err)LE16_TO_CPU(desc->retval);
        flags = LE16_TO_CPU(desc->flags);
        if (flags & I40E_AQ_FLAG_ERR) {
                ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
-               hw->aq.arq_last_status =
-                       (enum i40e_admin_queue_err)LE16_TO_CPU(desc->retval);
                i40e_debug(hw,
                           I40E_DEBUG_AQ_MESSAGE,
                           "AQRX: Event received with error 0x%X.\n",
index 2b7a760..67cef7c 100644 (file)
@@ -139,12 +139,10 @@ enum i40e_admin_queue_opc {
        i40e_aqc_opc_list_func_capabilities     = 0x000A,
        i40e_aqc_opc_list_dev_capabilities      = 0x000B,
 
-#ifdef X722_SUPPORT
        /* Proxy commands */
        i40e_aqc_opc_set_proxy_config           = 0x0104,
        i40e_aqc_opc_set_ns_proxy_table_entry   = 0x0105,
 
-#endif
        /* LAA */
        i40e_aqc_opc_mac_address_read   = 0x0107,
        i40e_aqc_opc_mac_address_write  = 0x0108,
@@ -152,12 +150,11 @@ enum i40e_admin_queue_opc {
        /* PXE */
        i40e_aqc_opc_clear_pxe_mode     = 0x0110,
 
-#ifdef X722_SUPPORT
        /* WoL commands */
        i40e_aqc_opc_set_wol_filter     = 0x0120,
        i40e_aqc_opc_get_wake_reason    = 0x0121,
+       i40e_aqc_opc_clear_all_wol_filters = 0x025E,
 
-#endif
        /* internal switch commands */
        i40e_aqc_opc_get_switch_config          = 0x0200,
        i40e_aqc_opc_add_statistics             = 0x0201,
@@ -196,6 +193,7 @@ enum i40e_admin_queue_opc {
        i40e_aqc_opc_remove_control_packet_filter       = 0x025B,
        i40e_aqc_opc_add_cloud_filters          = 0x025C,
        i40e_aqc_opc_remove_cloud_filters       = 0x025D,
+       i40e_aqc_opc_clear_wol_switch_filters   = 0x025E,
 
        i40e_aqc_opc_add_mirror_rule    = 0x0260,
        i40e_aqc_opc_delete_mirror_rule = 0x0261,
@@ -223,6 +221,9 @@ enum i40e_admin_queue_opc {
        i40e_aqc_opc_suspend_port_tx                            = 0x041B,
        i40e_aqc_opc_resume_port_tx                             = 0x041C,
        i40e_aqc_opc_configure_partition_bw                     = 0x041D,
+       /* hmc */
+       i40e_aqc_opc_query_hmc_resource_profile = 0x0500,
+       i40e_aqc_opc_set_hmc_resource_profile   = 0x0501,
 
        /* phy commands*/
        i40e_aqc_opc_get_phy_abilities          = 0x0600,
@@ -278,12 +279,10 @@ enum i40e_admin_queue_opc {
        /* Tunnel commands */
        i40e_aqc_opc_add_udp_tunnel     = 0x0B00,
        i40e_aqc_opc_del_udp_tunnel     = 0x0B01,
-#ifdef X722_SUPPORT
        i40e_aqc_opc_set_rss_key        = 0x0B02,
        i40e_aqc_opc_set_rss_lut        = 0x0B03,
        i40e_aqc_opc_get_rss_key        = 0x0B04,
        i40e_aqc_opc_get_rss_lut        = 0x0B05,
-#endif
 
        /* Async Events */
        i40e_aqc_opc_event_lan_overflow         = 0x1001,
@@ -471,13 +470,15 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration);
 /* Set ARP Proxy command / response (indirect 0x0104) */
 struct i40e_aqc_arp_proxy_data {
        __le16  command_flags;
-#define I40E_AQ_ARP_INIT_IPV4  0x0008
-#define I40E_AQ_ARP_UNSUP_CTL  0x0010
-#define I40E_AQ_ARP_ENA                0x0020
-#define I40E_AQ_ARP_ADD_IPV4   0x0040
-#define I40E_AQ_ARP_DEL_IPV4   0x0080
+#define I40E_AQ_ARP_INIT_IPV4  0x0800
+#define I40E_AQ_ARP_UNSUP_CTL  0x1000
+#define I40E_AQ_ARP_ENA                0x2000
+#define I40E_AQ_ARP_ADD_IPV4   0x4000
+#define I40E_AQ_ARP_DEL_IPV4   0x8000
        __le16  table_id;
-       __le32  pfpm_proxyfc;
+       __le32  enabled_offloads;
+#define I40E_AQ_ARP_DIRECTED_OFFLOAD_ENABLE    0x00000020
+#define I40E_AQ_ARP_OFFLOAD_ENABLE             0x00000800
        __le32  ip_addr;
        u8      mac_addr[6];
        u8      reserved[2];
@@ -492,17 +493,19 @@ struct i40e_aqc_ns_proxy_data {
        __le16  table_idx_ipv6_0;
        __le16  table_idx_ipv6_1;
        __le16  control;
-#define I40E_AQ_NS_PROXY_ADD_0         0x0100
-#define I40E_AQ_NS_PROXY_DEL_0         0x0200
-#define I40E_AQ_NS_PROXY_ADD_1         0x0400
-#define I40E_AQ_NS_PROXY_DEL_1         0x0800
-#define I40E_AQ_NS_PROXY_ADD_IPV6_0    0x1000
-#define I40E_AQ_NS_PROXY_DEL_IPV6_0    0x2000
-#define I40E_AQ_NS_PROXY_ADD_IPV6_1    0x4000
-#define I40E_AQ_NS_PROXY_DEL_IPV6_1    0x8000
-#define I40E_AQ_NS_PROXY_COMMAND_SEQ   0x0001
-#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0002
-#define I40E_AQ_NS_PROXY_INIT_MAC_TBL  0x0004
+#define I40E_AQ_NS_PROXY_ADD_0         0x0001
+#define I40E_AQ_NS_PROXY_DEL_0         0x0002
+#define I40E_AQ_NS_PROXY_ADD_1         0x0004
+#define I40E_AQ_NS_PROXY_DEL_1         0x0008
+#define I40E_AQ_NS_PROXY_ADD_IPV6_0    0x0010
+#define I40E_AQ_NS_PROXY_DEL_IPV6_0    0x0020
+#define I40E_AQ_NS_PROXY_ADD_IPV6_1    0x0040
+#define I40E_AQ_NS_PROXY_DEL_IPV6_1    0x0080
+#define I40E_AQ_NS_PROXY_COMMAND_SEQ   0x0100
+#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0200
+#define I40E_AQ_NS_PROXY_INIT_MAC_TBL  0x0400
+#define I40E_AQ_NS_PROXY_OFFLOAD_ENABLE        0x0800
+#define I40E_AQ_NS_PROXY_DIRECTED_OFFLOAD_ENABLE       0x1000
        u8      mac_addr_0[6];
        u8      mac_addr_1[6];
        u8      local_mac_addr[6];
@@ -532,7 +535,8 @@ struct i40e_aqc_mac_address_read {
 #define I40E_AQC_PORT_ADDR_VALID       0x40
 #define I40E_AQC_WOL_ADDR_VALID                0x80
 #define I40E_AQC_MC_MAG_EN_VALID       0x100
-#define I40E_AQC_ADDR_VALID_MASK       0x1F0
+#define I40E_AQC_WOL_PRESERVE_STATUS   0x200
+#define I40E_AQC_ADDR_VALID_MASK       0x3F0
        u8      reserved[6];
        __le32  addr_high;
        __le32  addr_low;
@@ -552,6 +556,8 @@ I40E_CHECK_STRUCT_LEN(24, i40e_aqc_mac_address_read_data);
 /* Manage MAC Address Write Command (0x0108) */
 struct i40e_aqc_mac_address_write {
        __le16  command_flags;
+#define I40E_AQC_MC_MAG_EN             0x0100
+#define I40E_AQC_WOL_PRESERVE_ON_PFR   0x0200
 #define I40E_AQC_WRITE_TYPE_LAA_ONLY   0x0000
 #define I40E_AQC_WRITE_TYPE_LAA_WOL    0x4000
 #define I40E_AQC_WRITE_TYPE_PORT       0x8000
@@ -575,15 +581,24 @@ struct i40e_aqc_clear_pxe {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_clear_pxe);
 
-#ifdef X722_SUPPORT
 /* Set WoL Filter (0x0120) */
 
 struct i40e_aqc_set_wol_filter {
        __le16 filter_index;
 #define I40E_AQC_MAX_NUM_WOL_FILTERS   8
+#define I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_SHIFT       15
+#define I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_MASK        (0x1 << \
+               I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_SHIFT)
+
+#define I40E_AQC_SET_WOL_FILTER_INDEX_SHIFT            0
+#define I40E_AQC_SET_WOL_FILTER_INDEX_MASK     (0x7 << \
+               I40E_AQC_SET_WOL_FILTER_INDEX_SHIFT)
        __le16 cmd_flags;
 #define I40E_AQC_SET_WOL_FILTER                                0x8000
 #define I40E_AQC_SET_WOL_FILTER_NO_TCO_WOL             0x4000
+#define I40E_AQC_SET_WOL_FILTER_WOL_PRESERVE_ON_PFR    0x2000
+#define I40E_AQC_SET_WOL_FILTER_ACTION_CLEAR           0
+#define I40E_AQC_SET_WOL_FILTER_ACTION_SET             1
        __le16 valid_flags;
 #define I40E_AQC_SET_WOL_FILTER_ACTION_VALID           0x8000
 #define I40E_AQC_SET_WOL_FILTER_NO_TCO_ACTION_VALID    0x4000
@@ -594,24 +609,29 @@ struct i40e_aqc_set_wol_filter {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_set_wol_filter);
 
+struct i40e_aqc_set_wol_filter_data {
+       u8 filter[128];
+       u8 mask[16];
+};
+
+I40E_CHECK_STRUCT_LEN(0x90, i40e_aqc_set_wol_filter_data);
+
 /* Get Wake Reason (0x0121) */
 
 struct i40e_aqc_get_wake_reason_completion {
        u8 reserved_1[2];
        __le16 wake_reason;
+#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_SHIFT     0
+#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_MASK (0xFF << \
+               I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_SHIFT)
+#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_SHIFT  8
+#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_MASK   (0xFF << \
+               I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_SHIFT)
        u8 reserved_2[12];
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_get_wake_reason_completion);
 
-struct i40e_aqc_set_wol_filter_data {
-       u8 filter[128];
-       u8 mask[16];
-};
-
-I40E_CHECK_STRUCT_LEN(0x90, i40e_aqc_set_wol_filter_data);
-
-#endif /* X722_SUPPORT */
 /* Switch configuration commands (0x02xx) */
 
 /* Used by many indirect commands that only pass an seid and a buffer in the
@@ -694,6 +714,8 @@ struct i40e_aqc_set_port_parameters {
 #define I40E_AQ_SET_P_PARAMS_PAD_SHORT_PACKETS 2 /* must set! */
 #define I40E_AQ_SET_P_PARAMS_DOUBLE_VLAN_ENA   4
        __le16  bad_frame_vsi;
+#define I40E_AQ_SET_P_PARAMS_BFRAME_SEID_SHIFT 0x0
+#define I40E_AQ_SET_P_PARAMS_BFRAME_SEID_MASK  0x3FF
        __le16  default_seid;        /* reserved for command */
        u8      reserved[10];
 };
@@ -745,6 +767,7 @@ I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp);
 /* Set Switch Configuration (direct 0x0205) */
 struct i40e_aqc_set_switch_config {
        __le16  flags;
+/* flags used for both fields below */
 #define I40E_AQ_SET_SWITCH_CFG_PROMISC         0x0001
 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER       0x0002
        __le16  valid_flags;
@@ -913,16 +936,12 @@ struct i40e_aqc_vsi_properties_data {
                                         I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT)
        /* queueing option section */
        u8      queueing_opt_flags;
-#ifdef X722_SUPPORT
 #define I40E_AQ_VSI_QUE_OPT_MULTICAST_UDP_ENA  0x04
 #define I40E_AQ_VSI_QUE_OPT_UNICAST_UDP_ENA    0x08
-#endif
 #define I40E_AQ_VSI_QUE_OPT_TCP_ENA    0x10
 #define I40E_AQ_VSI_QUE_OPT_FCOE_ENA   0x20
-#ifdef X722_SUPPORT
 #define I40E_AQ_VSI_QUE_OPT_RSS_LUT_PF 0x00
 #define I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI        0x40
-#endif
        u8      queueing_opt_reserved[3];
        /* scheduler section */
        u8      up_enable_bits;
@@ -1644,6 +1663,24 @@ struct i40e_aqc_configure_partition_bw_data {
 
 I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data);
 
+/* Get and set the active HMC resource profile and status.
+ * (direct 0x0500) and (direct 0x0501)
+ */
+struct i40e_aq_get_set_hmc_resource_profile {
+       u8      pm_profile;
+       u8      pe_vf_enabled;
+       u8      reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile);
+
+enum i40e_aq_hmc_profile {
+       /* I40E_HMC_PROFILE_NO_CHANGE   = 0, reserved */
+       I40E_HMC_PROFILE_DEFAULT        = 1,
+       I40E_HMC_PROFILE_FAVOR_VF       = 2,
+       I40E_HMC_PROFILE_EQUAL          = 3,
+};
+
 /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */
 
 /* set in param0 for get phy abilities to report qualified modules */
@@ -1691,6 +1728,7 @@ enum i40e_aq_phy_type {
 #define I40E_LINK_SPEED_10GB_SHIFT     0x3
 #define I40E_LINK_SPEED_40GB_SHIFT     0x4
 #define I40E_LINK_SPEED_20GB_SHIFT     0x5
+#define I40E_LINK_SPEED_25GB_SHIFT     0x6
 
 enum i40e_aq_link_speed {
        I40E_LINK_SPEED_UNKNOWN = 0,
@@ -1698,7 +1736,8 @@ enum i40e_aq_link_speed {
        I40E_LINK_SPEED_1GB     = (1 << I40E_LINK_SPEED_1000MB_SHIFT),
        I40E_LINK_SPEED_10GB    = (1 << I40E_LINK_SPEED_10GB_SHIFT),
        I40E_LINK_SPEED_40GB    = (1 << I40E_LINK_SPEED_40GB_SHIFT),
-       I40E_LINK_SPEED_20GB    = (1 << I40E_LINK_SPEED_20GB_SHIFT)
+       I40E_LINK_SPEED_20GB    = (1 << I40E_LINK_SPEED_20GB_SHIFT),
+       I40E_LINK_SPEED_25GB    = (1 << I40E_LINK_SPEED_25GB_SHIFT),
 };
 
 struct i40e_aqc_module_desc {
@@ -1721,6 +1760,8 @@ struct i40e_aq_get_phy_abilities_resp {
 #define I40E_AQ_PHY_LINK_ENABLED       0x08
 #define I40E_AQ_PHY_AN_ENABLED         0x10
 #define I40E_AQ_PHY_FLAG_MODULE_QUAL   0x20
+#define I40E_AQ_PHY_FEC_ABILITY_KR     0x40
+#define I40E_AQ_PHY_FEC_ABILITY_RS     0x80
        __le16  eee_capability;
 #define I40E_AQ_EEE_100BASE_TX         0x0002
 #define I40E_AQ_EEE_1000BASE_T         0x0004
@@ -1731,7 +1772,22 @@ struct i40e_aq_get_phy_abilities_resp {
        __le32  eeer_val;
        u8      d3_lpan;
 #define I40E_AQ_SET_PHY_D3_LPAN_ENA    0x01
-       u8      reserved[3];
+       u8      phy_type_ext;
+#define I40E_AQ_PHY_TYPE_EXT_25G_KR    0x01
+#define I40E_AQ_PHY_TYPE_EXT_25G_CR    0x02
+#define I40E_AQ_PHY_TYPE_EXT_25G_SR    0x04
+#define I40E_AQ_PHY_TYPE_EXT_25G_LR    0x08
+       u8      fec_cfg_curr_mod_ext_info;
+#define I40E_AQ_ENABLE_FEC_KR          0x01
+#define I40E_AQ_ENABLE_FEC_RS          0x02
+#define I40E_AQ_REQUEST_FEC_KR         0x04
+#define I40E_AQ_REQUEST_FEC_RS         0x08
+#define I40E_AQ_ENABLE_FEC_AUTO                0x10
+#define I40E_AQ_FEC
+#define I40E_AQ_MODULE_TYPE_EXT_MASK   0xE0
+#define I40E_AQ_MODULE_TYPE_EXT_SHIFT  5
+
+       u8      ext_comp_code;
        u8      phy_id[4];
        u8      module_type[3];
        u8      qualified_module_count;
@@ -1753,7 +1809,16 @@ struct i40e_aq_set_phy_config { /* same bits as above in all */
        __le16  eee_capability;
        __le32  eeer;
        u8      low_power_ctrl;
-       u8      reserved[3];
+       u8      phy_type_ext;
+       u8      fec_config;
+#define I40E_AQ_SET_FEC_ABILITY_KR     BIT(0)
+#define I40E_AQ_SET_FEC_ABILITY_RS     BIT(1)
+#define I40E_AQ_SET_FEC_REQUEST_KR     BIT(2)
+#define I40E_AQ_SET_FEC_REQUEST_RS     BIT(3)
+#define I40E_AQ_SET_FEC_AUTO           BIT(4)
+#define I40E_AQ_PHY_FEC_CONFIG_SHIFT   0x0
+#define I40E_AQ_PHY_FEC_CONFIG_MASK    (0x1F << I40E_AQ_PHY_FEC_CONFIG_SHIFT)
+       u8      reserved;
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config);
@@ -1833,16 +1898,26 @@ struct i40e_aqc_get_link_status {
 #define I40E_AQ_LINK_TX_DRAINED                0x01
 #define I40E_AQ_LINK_TX_FLUSHED                0x03
 #define I40E_AQ_LINK_FORCED_40G                0x10
+/* 25G Error Codes */
+#define I40E_AQ_25G_NO_ERR             0X00
+#define I40E_AQ_25G_NOT_PRESENT                0X01
+#define I40E_AQ_25G_NVM_CRC_ERR                0X02
+#define I40E_AQ_25G_SBUS_UCODE_ERR     0X03
+#define I40E_AQ_25G_SERDES_UCODE_ERR   0X04
+#define I40E_AQ_25G_NIMB_UCODE_ERR     0X05
        u8      loopback; /* use defines from i40e_aqc_set_lb_mode */
        __le16  max_frame_size;
        u8      config;
+#define I40E_AQ_CONFIG_FEC_KR_ENA      0x01
+#define I40E_AQ_CONFIG_FEC_RS_ENA      0x02
 #define I40E_AQ_CONFIG_CRC_ENA         0x04
 #define I40E_AQ_CONFIG_PACING_MASK     0x78
-       u8      external_power_ability;
+       u8      power_desc;
 #define I40E_AQ_LINK_POWER_CLASS_1     0x00
 #define I40E_AQ_LINK_POWER_CLASS_2     0x01
 #define I40E_AQ_LINK_POWER_CLASS_3     0x02
 #define I40E_AQ_LINK_POWER_CLASS_4     0x03
+#define I40E_AQ_PWR_CLASS_MASK         0x03
        u8      reserved[4];
 };
 
@@ -2340,7 +2415,6 @@ struct i40e_aqc_del_udp_tunnel_completion {
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_del_udp_tunnel_completion);
-#ifdef X722_SUPPORT
 
 struct i40e_aqc_get_set_rss_key {
 #define I40E_AQC_SET_RSS_KEY_VSI_VALID         (0x1 << 15)
@@ -2381,7 +2455,6 @@ struct  i40e_aqc_get_set_rss_lut {
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_lut);
-#endif
 
 /* tunnel key structure 0x0B10 */
 
index 98ed4b6..b8d8165 100644 (file)
@@ -71,7 +71,6 @@ STATIC enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw)
                case I40E_DEV_ID_25G_SFP28:
                        hw->mac.type = I40E_MAC_XL710;
                        break;
-#ifdef X722_SUPPORT
 #ifdef X722_A0_SUPPORT
                case I40E_DEV_ID_X722_A0:
 #endif
@@ -81,21 +80,16 @@ STATIC enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw)
                case I40E_DEV_ID_1G_BASE_T_X722:
                case I40E_DEV_ID_10G_BASE_T_X722:
                case I40E_DEV_ID_SFP_I_X722:
-               case I40E_DEV_ID_QSFP_I_X722:
                        hw->mac.type = I40E_MAC_X722;
                        break;
-#endif
-#ifdef X722_SUPPORT
 #if defined(INTEGRATED_VF) || defined(VF_DRIVER)
                case I40E_DEV_ID_X722_VF:
-               case I40E_DEV_ID_X722_VF_HV:
 #ifdef X722_A0_SUPPORT
                case I40E_DEV_ID_X722_A0_VF:
 #endif
                        hw->mac.type = I40E_MAC_X722_VF;
                        break;
 #endif /* INTEGRATED_VF || VF_DRIVER */
-#endif /* X722_SUPPORT */
 #if defined(INTEGRATED_VF) || defined(VF_DRIVER)
                case I40E_DEV_ID_VF:
                case I40E_DEV_ID_VF_HV:
@@ -115,7 +109,6 @@ STATIC enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw)
        return status;
 }
 
-#ifndef I40E_NDIS_SUPPORT
 /**
  * i40e_aq_str - convert AQ err code to a string
  * @hw: pointer to the HW structure
@@ -322,7 +315,6 @@ const char *i40e_stat_str(struct i40e_hw *hw, enum i40e_status_code stat_err)
        return hw->err_str;
 }
 
-#endif /* I40E_NDIS_SUPPORT */
 /**
  * i40e_debug_aq
  * @hw: debug mask related to admin queue
@@ -383,8 +375,7 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc,
                                d_buf[j] = buf[i];
                        i40e_debug(hw, mask,
                                   "\t0x%04X  %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",
-                                  i_sav, d_buf[0], d_buf[1],
-                                  d_buf[2], d_buf[3],
+                                  i_sav, d_buf[0], d_buf[1], d_buf[2], d_buf[3],
                                   d_buf[4], d_buf[5], d_buf[6], d_buf[7],
                                   d_buf[8], d_buf[9], d_buf[10], d_buf[11],
                                   d_buf[12], d_buf[13], d_buf[14], d_buf[15]);
@@ -449,7 +440,6 @@ enum i40e_status_code i40e_aq_queue_shutdown(struct i40e_hw *hw,
 
        return status;
 }
-#ifdef X722_SUPPORT
 
 /**
  * i40e_aq_get_set_rss_lut
@@ -608,7 +598,6 @@ enum i40e_status_code i40e_aq_set_rss_key(struct i40e_hw *hw,
 {
        return i40e_aq_get_set_rss_key(hw, vsi_id, key, true);
 }
-#endif /* X722_SUPPORT */
 
 /* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the
  * hardware to a bit-field that can be used by SW to more easily determine the
@@ -773,7 +762,7 @@ struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = {
        /* Non Tunneled IPv6 */
        I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
        I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
-       I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY3),
+       I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY4),
        I40E_PTT_UNUSED_ENTRY(91),
        I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
        I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
@@ -1024,9 +1013,7 @@ enum i40e_status_code i40e_init_shared_code(struct i40e_hw *hw)
 
        switch (hw->mac.type) {
        case I40E_MAC_XL710:
-#ifdef X722_SUPPORT
        case I40E_MAC_X722:
-#endif
                break;
        default:
                return I40E_ERR_DEVICE_NOT_SUPPORTED;
@@ -1046,11 +1033,9 @@ enum i40e_status_code i40e_init_shared_code(struct i40e_hw *hw)
        else
                hw->pf_id = (u8)(func_rid & 0x7);
 
-#ifdef X722_SUPPORT
        if (hw->mac.type == I40E_MAC_X722)
                hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE;
 
-#endif
        status = i40e_init_nvm(hw);
        return status;
 }
@@ -1128,7 +1113,8 @@ enum i40e_status_code i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
        status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL);
 
        if (flags & I40E_AQC_LAN_ADDR_VALID)
-               memcpy(mac_addr, &addrs.pf_lan_mac, sizeof(addrs.pf_lan_mac));
+               i40e_memcpy(mac_addr, &addrs.pf_lan_mac, sizeof(addrs.pf_lan_mac),
+                       I40E_NONDMA_TO_NONDMA);
 
        return status;
 }
@@ -1151,7 +1137,8 @@ enum i40e_status_code i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
                return status;
 
        if (flags & I40E_AQC_PORT_ADDR_VALID)
-               memcpy(mac_addr, &addrs.port_mac, sizeof(addrs.port_mac));
+               i40e_memcpy(mac_addr, &addrs.port_mac, sizeof(addrs.port_mac),
+                       I40E_NONDMA_TO_NONDMA);
        else
                status = I40E_ERR_INVALID_MAC_ADDR;
 
@@ -1190,6 +1177,33 @@ void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable)
        wr32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block), reg_val);
 }
 
+/**
+ * i40e_get_san_mac_addr - get SAN MAC address
+ * @hw: pointer to the HW structure
+ * @mac_addr: pointer to SAN MAC address
+ *
+ * Reads the adapter's SAN MAC address from NVM
+ **/
+enum i40e_status_code i40e_get_san_mac_addr(struct i40e_hw *hw,
+                                           u8 *mac_addr)
+{
+       struct i40e_aqc_mac_address_read_data addrs;
+       enum i40e_status_code status;
+       u16 flags = 0;
+
+       status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL);
+       if (status)
+               return status;
+
+       if (flags & I40E_AQC_SAN_ADDR_VALID)
+               i40e_memcpy(mac_addr, &addrs.pf_san_mac, sizeof(addrs.pf_san_mac),
+                       I40E_NONDMA_TO_NONDMA);
+       else
+               status = I40E_ERR_INVALID_MAC_ADDR;
+
+       return status;
+}
+
 /**
  *  i40e_read_pba_string - Reads part number string from EEPROM
  *  @hw: pointer to hardware structure
@@ -1264,6 +1278,8 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
        case I40E_PHY_TYPE_1000BASE_LX:
        case I40E_PHY_TYPE_40GBASE_SR4:
        case I40E_PHY_TYPE_40GBASE_LR4:
+       case I40E_PHY_TYPE_25GBASE_LR:
+       case I40E_PHY_TYPE_25GBASE_SR:
                media = I40E_MEDIA_TYPE_FIBER;
                break;
        case I40E_PHY_TYPE_100BASE_TX:
@@ -1278,6 +1294,7 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
        case I40E_PHY_TYPE_10GBASE_SFPP_CU:
        case I40E_PHY_TYPE_40GBASE_AOC:
        case I40E_PHY_TYPE_10GBASE_AOC:
+       case I40E_PHY_TYPE_25GBASE_CR:
                media = I40E_MEDIA_TYPE_DA;
                break;
        case I40E_PHY_TYPE_1000BASE_KX:
@@ -1285,6 +1302,7 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
        case I40E_PHY_TYPE_10GBASE_KR:
        case I40E_PHY_TYPE_40GBASE_KR4:
        case I40E_PHY_TYPE_20GBASE_KR2:
+       case I40E_PHY_TYPE_25GBASE_KR:
                media = I40E_MEDIA_TYPE_BACKPLANE;
                break;
        case I40E_PHY_TYPE_SGMII:
@@ -1670,8 +1688,10 @@ enum i40e_status_code i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
        if (hw->aq.asq_last_status == I40E_AQ_RC_EIO)
                status = I40E_ERR_UNKNOWN_PHY;
 
-       if (report_init)
+       if (report_init) {
                hw->phy.phy_types = LE32_TO_CPU(abilities->phy_type);
+               hw->phy.phy_types |= ((u64)abilities->phy_type_ext << 32);
+       }
 
        return status;
 }
@@ -1763,10 +1783,13 @@ enum i40e_status_code i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
                        config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
                /* Copy over all the old settings */
                config.phy_type = abilities.phy_type;
+               config.phy_type_ext = abilities.phy_type_ext;
                config.link_speed = abilities.link_speed;
                config.eee_capability = abilities.eee_capability;
                config.eeer = abilities.eeer_val;
                config.low_power_ctrl = abilities.d3_lpan;
+               config.fec_config = abilities.fec_cfg_curr_mod_ext_info &
+                                   I40E_AQ_PHY_FEC_CONFIG_MASK;
                status = i40e_aq_set_phy_config(hw, &config, NULL);
 
                if (status)
@@ -1926,6 +1949,8 @@ enum i40e_status_code i40e_aq_get_link_info(struct i40e_hw *hw,
        hw_link_info->link_speed = (enum i40e_aq_link_speed)resp->link_speed;
        hw_link_info->link_info = resp->link_info;
        hw_link_info->an_info = resp->an_info;
+       hw_link_info->fec_info = resp->config & (I40E_AQ_CONFIG_FEC_KR_ENA |
+                                                I40E_AQ_CONFIG_FEC_RS_ENA);
        hw_link_info->ext_info = resp->ext_info;
        hw_link_info->loopback = resp->loopback;
        hw_link_info->max_frame_size = LE16_TO_CPU(resp->max_frame_size);
@@ -1948,12 +1973,13 @@ enum i40e_status_code i40e_aq_get_link_info(struct i40e_hw *hw,
        else
                hw_link_info->crc_enable = false;
 
-       if (resp->command_flags & CPU_TO_LE16(I40E_AQ_LSE_ENABLE))
+       if (resp->command_flags & CPU_TO_LE16(I40E_AQ_LSE_IS_ENABLED))
                hw_link_info->lse_enable = true;
        else
                hw_link_info->lse_enable = false;
 
-       if ((hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 &&
+       if ((hw->mac.type == I40E_MAC_XL710) &&
+           (hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 &&
             hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE)
                hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU;
 
@@ -2214,6 +2240,34 @@ enum i40e_status_code i40e_aq_set_default_vsi(struct i40e_hw *hw,
        return status;
 }
 
+/**
+ * i40e_aq_clear_default_vsi
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_clear_default_vsi(struct i40e_hw *hw,
+                               u16 seid,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+               (struct i40e_aqc_set_vsi_promiscuous_modes *)
+               &desc.params.raw;
+       enum i40e_status_code status;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                       i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+       cmd->promiscuous_flags = CPU_TO_LE16(0);
+       cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_DEFAULT);
+       cmd->seid = CPU_TO_LE16(seid);
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
 /**
  * i40e_aq_set_vsi_unicast_promiscuous
  * @hw: pointer to the hw struct
@@ -2289,6 +2343,43 @@ enum i40e_status_code i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
        return status;
 }
 
+/**
+* i40e_aq_set_vsi_full_promiscuous
+* @hw: pointer to the hw struct
+* @seid: VSI number
+* @set: set promiscuous enable/disable
+* @cmd_details: pointer to command details structure or NULL
+**/
+enum i40e_status_code i40e_aq_set_vsi_full_promiscuous(struct i40e_hw *hw,
+                               u16 seid, bool set,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+               (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+       enum i40e_status_code status;
+       u16 flags = 0;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+               i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+       if (set)
+               flags = I40E_AQC_SET_VSI_PROMISC_UNICAST   |
+                       I40E_AQC_SET_VSI_PROMISC_MULTICAST |
+                       I40E_AQC_SET_VSI_PROMISC_BROADCAST;
+
+       cmd->promiscuous_flags = CPU_TO_LE16(flags);
+
+       cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_PROMISC_UNICAST   |
+                                      I40E_AQC_SET_VSI_PROMISC_MULTICAST |
+                                      I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+
+       cmd->seid = CPU_TO_LE16(seid);
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
 /**
  * i40e_aq_set_vsi_mc_promisc_on_vlan
  * @hw: pointer to the hw struct
@@ -2357,6 +2448,40 @@ enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
        return status;
 }
 
+/**
+ * i40e_aq_set_vsi_bc_promisc_on_vlan
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @enable: set broadcast promiscuous enable/disable for a given VLAN
+ * @vid: The VLAN tag filter - capture any broadcast packet with this VLAN tag
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw,
+                               u16 seid, bool enable, u16 vid,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+               (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+       enum i40e_status_code status;
+       u16 flags = 0;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                       i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+       if (enable)
+               flags |= I40E_AQC_SET_VSI_PROMISC_BROADCAST;
+
+       cmd->promiscuous_flags = CPU_TO_LE16(flags);
+       cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+       cmd->seid = CPU_TO_LE16(seid);
+       cmd->vlan_tag = CPU_TO_LE16(vid | I40E_AQC_SET_VSI_VLAN_VALID);
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
 /**
  * i40e_aq_set_vsi_broadcast
  * @hw: pointer to the hw struct
@@ -2691,14 +2816,17 @@ enum i40e_status_code i40e_update_link_info(struct i40e_hw *hw)
        if (status)
                return status;
 
-       if (hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) {
+       /* extra checking needed to ensure link info to user is timely */
+       if ((hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) &&
+           ((hw->phy.link_info.link_info & I40E_AQ_LINK_UP) ||
+            !(hw->phy.link_info_old.link_info & I40E_AQ_LINK_UP))) {
                status = i40e_aq_get_phy_capabilities(hw, false, false,
                                                      &abilities, NULL);
                if (status)
                        return status;
 
-               memcpy(hw->phy.link_info.module_type, &abilities.module_type,
-                       sizeof(hw->phy.link_info.module_type));
+               i40e_memcpy(hw->phy.link_info.module_type, &abilities.module_type,
+                       sizeof(hw->phy.link_info.module_type), I40E_NONDMA_TO_NONDMA);
        }
        return status;
 }
@@ -3549,6 +3677,14 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
                        break;
                case I40E_AQ_CAP_ID_MNG_MODE:
                        p->management_mode = number;
+                       if (major_rev > 1) {
+                               p->mng_protocols_over_mctp = logical_id;
+                               i40e_debug(hw, I40E_DEBUG_INIT,
+                                          "HW Capability: Protocols over MCTP = %d\n",
+                                          p->mng_protocols_over_mctp);
+                       } else {
+                               p->mng_protocols_over_mctp = 0;
+                       }
                        i40e_debug(hw, I40E_DEBUG_INIT,
                                   "HW Capability: Management Mode = %d\n",
                                   p->management_mode);
@@ -3768,7 +3904,6 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
                        if (number & I40E_NVM_MGMT_UPDATE_DISABLED)
                                p->update_disabled = true;
                        break;
-#ifdef X722_SUPPORT
                case I40E_AQ_CAP_ID_WOL_AND_PROXY:
                        hw->num_wol_proxy_filters = (u16)number;
                        hw->wol_proxy_vsi_seid = (u16)logical_id;
@@ -3778,12 +3913,10 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
                        else
                                p->acpi_prog_method = I40E_ACPI_PROGRAMMING_METHOD_HW_FVL;
                        p->proxy_support = (phys_id & I40E_PROXY_SUPPORT_MASK) ? 1 : 0;
-                       p->proxy_support = p->proxy_support;
                        i40e_debug(hw, I40E_DEBUG_INIT,
                                   "HW Capability: WOL proxy filters = %d\n",
                                   hw->num_wol_proxy_filters);
                        break;
-#endif
                default:
                        break;
                }
@@ -3792,16 +3925,8 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
        if (p->fcoe)
                i40e_debug(hw, I40E_DEBUG_ALL, "device is FCoE capable\n");
 
-#ifdef I40E_FCOE_ENA
-       /* Software override ensuring FCoE is disabled if npar or mfp
-        * mode because it is not supported in these modes.
-        */
-       if (p->npar_enable || p->flex10_enable)
-               p->fcoe = false;
-#else
        /* Always disable FCoE if compiled without the I40E_FCOE_ENA flag */
        p->fcoe = false;
-#endif
 
        /* count the enabled ports (aka the "not disabled" ports) */
        hw->num_ports = 0;
@@ -3828,8 +3953,10 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
        /* partition id is 1-based, and functions are evenly spread
         * across the ports as partitions
         */
-       hw->partition_id = (hw->pf_id / hw->num_ports) + 1;
-       hw->num_partitions = num_functions / hw->num_ports;
+       if (hw->num_ports != 0) {
+               hw->partition_id = (hw->pf_id / hw->num_ports) + 1;
+               hw->num_partitions = num_functions / hw->num_ports;
+       }
 
        /* additional HW specific goodies that might
         * someday be HW version specific
@@ -4314,11 +4441,15 @@ enum i40e_status_code i40e_aq_start_stop_dcbx(struct i40e_hw *hw,
 /**
  * i40e_aq_add_udp_tunnel
  * @hw: pointer to the hw struct
- * @udp_port: the UDP port to add
+ * @udp_port: the UDP port to add in Host byte order
  * @header_len: length of the tunneling header length in DWords
  * @protocol_index: protocol index type
  * @filter_index: pointer to filter index
  * @cmd_details: pointer to command details structure or NULL
+ *
+ * Note: Firmware expects the udp_port value to be in Little Endian format,
+ * and this function will call CPU_TO_LE16 to convert from Host byte order to
+ * Little Endian order.
  **/
 enum i40e_status_code i40e_aq_add_udp_tunnel(struct i40e_hw *hw,
                                u16 udp_port, u8 protocol_index,
@@ -5452,12 +5583,12 @@ STATIC void i40e_fix_up_geneve_vni(
                u16 tnl_type;
                u32 ti;
 
-               tnl_type = (le16_to_cpu(f[i].flags) &
+               tnl_type = (LE16_TO_CPU(f[i].flags) &
                           I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >>
                           I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT;
                if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) {
-                       ti = le32_to_cpu(f[i].tenant_id);
-                       f[i].tenant_id = cpu_to_le32(ti << 8);
+                       ti = LE32_TO_CPU(f[i].tenant_id);
+                       f[i].tenant_id = CPU_TO_LE32(ti << 8);
                }
        }
 }
@@ -5961,9 +6092,6 @@ enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw,
        desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
        desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD);
 
-       if (bwd_size > I40E_AQ_LARGE_BUF)
-               desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
-
        desc.datalen = CPU_TO_LE16(bwd_size);
 
        status = i40e_asq_send_command(hw, &desc, bw_data, bwd_size, cmd_details);
@@ -5972,7 +6100,92 @@ enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw,
 }
 
 /**
- * i40e_read_phy_register
+ * i40e_read_phy_register_clause22
+ * @hw: pointer to the HW structure
+ * @reg: register address in the page
+ * @phy_adr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Reads specified PHY register value
+ **/
+enum i40e_status_code i40e_read_phy_register_clause22(struct i40e_hw *hw,
+                                       u16 reg, u8 phy_addr, u16 *value)
+{
+       enum i40e_status_code status = I40E_ERR_TIMEOUT;
+       u8 port_num = (u8)hw->func_caps.mdio_port_num;
+       u32 command = 0;
+       u16 retry = 1000;
+
+       command = (reg << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
+                 (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
+                 (I40E_MDIO_CLAUSE22_OPCODE_READ_MASK) |
+                 (I40E_MDIO_CLAUSE22_STCODE_MASK) |
+                 (I40E_GLGEN_MSCA_MDICMD_MASK);
+       wr32(hw, I40E_GLGEN_MSCA(port_num), command);
+       do {
+               command = rd32(hw, I40E_GLGEN_MSCA(port_num));
+               if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) {
+                       status = I40E_SUCCESS;
+                       break;
+               }
+               i40e_usec_delay(10);
+               retry--;
+       } while (retry);
+
+       if (status) {
+               i40e_debug(hw, I40E_DEBUG_PHY,
+                          "PHY: Can't write command to external PHY.\n");
+       } else {
+               command = rd32(hw, I40E_GLGEN_MSRWD(port_num));
+               *value = (command & I40E_GLGEN_MSRWD_MDIRDDATA_MASK) >>
+                        I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT;
+       }
+
+       return status;
+}
+
+/**
+ * i40e_write_phy_register_clause22
+ * @hw: pointer to the HW structure
+ * @reg: register address in the page
+ * @phy_adr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Writes specified PHY register value
+ **/
+enum i40e_status_code i40e_write_phy_register_clause22(struct i40e_hw *hw,
+                                       u16 reg, u8 phy_addr, u16 value)
+{
+       enum i40e_status_code status = I40E_ERR_TIMEOUT;
+       u8 port_num = (u8)hw->func_caps.mdio_port_num;
+       u32 command  = 0;
+       u16 retry = 1000;
+
+       command = value << I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT;
+       wr32(hw, I40E_GLGEN_MSRWD(port_num), command);
+
+       command = (reg << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
+                 (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
+                 (I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK) |
+                 (I40E_MDIO_CLAUSE22_STCODE_MASK) |
+                 (I40E_GLGEN_MSCA_MDICMD_MASK);
+
+       wr32(hw, I40E_GLGEN_MSCA(port_num), command);
+       do {
+               command = rd32(hw, I40E_GLGEN_MSCA(port_num));
+               if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) {
+                       status = I40E_SUCCESS;
+                       break;
+               }
+               i40e_usec_delay(10);
+               retry--;
+       } while (retry);
+
+       return status;
+}
+
+/**
+ * i40e_read_phy_register_clause45
  * @hw: pointer to the HW structure
  * @page: registers page number
  * @reg: register address in the page
@@ -5981,9 +6194,8 @@ enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw,
  *
  * Reads specified PHY register value
  **/
-enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw,
-                                            u8 page, u16 reg, u8 phy_addr,
-                                            u16 *value)
+enum i40e_status_code i40e_read_phy_register_clause45(struct i40e_hw *hw,
+                               u8 page, u16 reg, u8 phy_addr, u16 *value)
 {
        enum i40e_status_code status = I40E_ERR_TIMEOUT;
        u32 command  = 0;
@@ -5993,8 +6205,8 @@ enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw,
        command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) |
                  (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
                  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
-                 (I40E_MDIO_OPCODE_ADDRESS) |
-                 (I40E_MDIO_STCODE) |
+                 (I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK) |
+                 (I40E_MDIO_CLAUSE45_STCODE_MASK) |
                  (I40E_GLGEN_MSCA_MDICMD_MASK) |
                  (I40E_GLGEN_MSCA_MDIINPROGEN_MASK);
        wr32(hw, I40E_GLGEN_MSCA(port_num), command);
@@ -6016,8 +6228,8 @@ enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw,
 
        command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
                  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
-                 (I40E_MDIO_OPCODE_READ) |
-                 (I40E_MDIO_STCODE) |
+                 (I40E_MDIO_CLAUSE45_OPCODE_READ_MASK) |
+                 (I40E_MDIO_CLAUSE45_STCODE_MASK) |
                  (I40E_GLGEN_MSCA_MDICMD_MASK) |
                  (I40E_GLGEN_MSCA_MDIINPROGEN_MASK);
        status = I40E_ERR_TIMEOUT;
@@ -6047,7 +6259,7 @@ phy_read_end:
 }
 
 /**
- * i40e_write_phy_register
+ * i40e_write_phy_register_clause45
  * @hw: pointer to the HW structure
  * @page: registers page number
  * @reg: register address in the page
@@ -6056,9 +6268,8 @@ phy_read_end:
  *
  * Writes value to specified PHY register
  **/
-enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw,
-                                             u8 page, u16 reg, u8 phy_addr,
-                                             u16 value)
+enum i40e_status_code i40e_write_phy_register_clause45(struct i40e_hw *hw,
+                               u8 page, u16 reg, u8 phy_addr, u16 value)
 {
        enum i40e_status_code status = I40E_ERR_TIMEOUT;
        u32 command  = 0;
@@ -6068,8 +6279,8 @@ enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw,
        command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) |
                  (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
                  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
-                 (I40E_MDIO_OPCODE_ADDRESS) |
-                 (I40E_MDIO_STCODE) |
+                 (I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK) |
+                 (I40E_MDIO_CLAUSE45_STCODE_MASK) |
                  (I40E_GLGEN_MSCA_MDICMD_MASK) |
                  (I40E_GLGEN_MSCA_MDIINPROGEN_MASK);
        wr32(hw, I40E_GLGEN_MSCA(port_num), command);
@@ -6093,8 +6304,8 @@ enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw,
 
        command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
                  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
-                 (I40E_MDIO_OPCODE_WRITE) |
-                 (I40E_MDIO_STCODE) |
+                 (I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK) |
+                 (I40E_MDIO_CLAUSE45_STCODE_MASK) |
                  (I40E_GLGEN_MSCA_MDICMD_MASK) |
                  (I40E_GLGEN_MSCA_MDIINPROGEN_MASK);
        status = I40E_ERR_TIMEOUT;
@@ -6114,6 +6325,78 @@ phy_write_end:
        return status;
 }
 
+/**
+ * i40e_write_phy_register
+ * @hw: pointer to the HW structure
+ * @page: registers page number
+ * @reg: register address in the page
+ * @phy_adr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Writes value to specified PHY register
+ **/
+enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw,
+                               u8 page, u16 reg, u8 phy_addr, u16 value)
+{
+       enum i40e_status_code status;
+
+       switch (hw->device_id) {
+       case I40E_DEV_ID_1G_BASE_T_X722:
+               status = i40e_write_phy_register_clause22(hw,
+                       reg, phy_addr, value);
+               break;
+       case I40E_DEV_ID_10G_BASE_T:
+       case I40E_DEV_ID_10G_BASE_T4:
+       case I40E_DEV_ID_10G_BASE_T_X722:
+       case I40E_DEV_ID_25G_B:
+       case I40E_DEV_ID_25G_SFP28:
+               status = i40e_write_phy_register_clause45(hw,
+                       page, reg, phy_addr, value);
+               break;
+       default:
+               status = I40E_ERR_UNKNOWN_PHY;
+               break;
+       }
+
+       return status;
+}
+
+/**
+ * i40e_read_phy_register
+ * @hw: pointer to the HW structure
+ * @page: registers page number
+ * @reg: register address in the page
+ * @phy_adr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Reads specified PHY register value
+ **/
+enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw,
+                               u8 page, u16 reg, u8 phy_addr, u16 *value)
+{
+       enum i40e_status_code status;
+
+       switch (hw->device_id) {
+       case I40E_DEV_ID_1G_BASE_T_X722:
+               status = i40e_read_phy_register_clause22(hw, reg, phy_addr,
+                                                        value);
+               break;
+       case I40E_DEV_ID_10G_BASE_T:
+       case I40E_DEV_ID_10G_BASE_T4:
+       case I40E_DEV_ID_10G_BASE_T_X722:
+       case I40E_DEV_ID_25G_B:
+       case I40E_DEV_ID_25G_SFP28:
+               status = i40e_read_phy_register_clause45(hw, page, reg,
+                                                        phy_addr, value);
+               break;
+       default:
+               status = I40E_ERR_UNKNOWN_PHY;
+               break;
+       }
+
+       return status;
+}
+
 /**
  * i40e_get_phy_address
  * @hw: pointer to the HW structure
@@ -6156,14 +6439,16 @@ enum i40e_status_code i40e_blink_phy_link_led(struct i40e_hw *hw,
 
        for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++,
             led_addr++) {
-               status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE,
-                                               led_addr, phy_addr, &led_reg);
+               status = i40e_read_phy_register_clause45(hw,
+                                                        I40E_PHY_COM_REG_PAGE,
+                                                        led_addr, phy_addr,
+                                                        &led_reg);
                if (status)
                        goto phy_blinking_end;
                led_ctl = led_reg;
                if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) {
                        led_reg = 0;
-                       status = i40e_write_phy_register(hw,
+                       status = i40e_write_phy_register_clause45(hw,
                                                         I40E_PHY_COM_REG_PAGE,
                                                         led_addr, phy_addr,
                                                         led_reg);
@@ -6175,20 +6460,18 @@ enum i40e_status_code i40e_blink_phy_link_led(struct i40e_hw *hw,
 
        if (time > 0 && interval > 0) {
                for (i = 0; i < time * 1000; i += interval) {
-                       status = i40e_read_phy_register(hw,
-                                                       I40E_PHY_COM_REG_PAGE,
-                                                       led_addr, phy_addr,
-                                                       &led_reg);
+                       status = i40e_read_phy_register_clause45(hw,
+                                               I40E_PHY_COM_REG_PAGE,
+                                               led_addr, phy_addr, &led_reg);
                        if (status)
                                goto restore_config;
                        if (led_reg & I40E_PHY_LED_MANUAL_ON)
                                led_reg = 0;
                        else
                                led_reg = I40E_PHY_LED_MANUAL_ON;
-                       status = i40e_write_phy_register(hw,
-                                                        I40E_PHY_COM_REG_PAGE,
-                                                        led_addr, phy_addr,
-                                                        led_reg);
+                       status = i40e_write_phy_register_clause45(hw,
+                                               I40E_PHY_COM_REG_PAGE,
+                                               led_addr, phy_addr, led_reg);
                        if (status)
                                goto restore_config;
                        i40e_msec_delay(interval);
@@ -6196,8 +6479,9 @@ enum i40e_status_code i40e_blink_phy_link_led(struct i40e_hw *hw,
        }
 
 restore_config:
-       status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr,
-                                        phy_addr, led_ctl);
+       status = i40e_write_phy_register_clause45(hw,
+                                                 I40E_PHY_COM_REG_PAGE,
+                                                 led_addr, phy_addr, led_ctl);
 
 phy_blinking_end:
        return status;
@@ -6228,8 +6512,10 @@ enum i40e_status_code i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
 
        for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++,
             temp_addr++) {
-               status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE,
-                                               temp_addr, phy_addr, &reg_val);
+               status = i40e_read_phy_register_clause45(hw,
+                                                        I40E_PHY_COM_REG_PAGE,
+                                                        temp_addr, phy_addr,
+                                                        &reg_val);
                if (status)
                        return status;
                *val = reg_val;
@@ -6262,41 +6548,42 @@ enum i40e_status_code i40e_led_set_phy(struct i40e_hw *hw, bool on,
        i = rd32(hw, I40E_PFGEN_PORTNUM);
        port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
        phy_addr = i40e_get_phy_address(hw, port_num);
-
-       status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr,
-                                       phy_addr, &led_reg);
+       status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
+                                                led_addr, phy_addr, &led_reg);
        if (status)
                return status;
        led_ctl = led_reg;
        if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) {
                led_reg = 0;
-               status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE,
-                                                led_addr, phy_addr, led_reg);
+               status = i40e_write_phy_register_clause45(hw,
+                                                         I40E_PHY_COM_REG_PAGE,
+                                                         led_addr, phy_addr,
+                                                         led_reg);
                if (status)
                        return status;
        }
-       status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE,
-                                       led_addr, phy_addr, &led_reg);
+       status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
+                                                led_addr, phy_addr, &led_reg);
        if (status)
                goto restore_config;
        if (on)
                led_reg = I40E_PHY_LED_MANUAL_ON;
        else
                led_reg = 0;
-       status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE,
-                                        led_addr, phy_addr, led_reg);
+       status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
+                                                 led_addr, phy_addr, led_reg);
        if (status)
                goto restore_config;
        if (mode & I40E_PHY_LED_MODE_ORIG) {
                led_ctl = (mode & I40E_PHY_LED_MODE_MASK);
-               status = i40e_write_phy_register(hw,
+               status = i40e_write_phy_register_clause45(hw,
                                                 I40E_PHY_COM_REG_PAGE,
                                                 led_addr, phy_addr, led_ctl);
        }
        return status;
 restore_config:
-       status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr,
-                                        phy_addr, led_ctl);
+       status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
+                                                 led_addr, phy_addr, led_ctl);
        return status;
 }
 #endif /* PF_DRIVER */
@@ -6522,7 +6809,6 @@ enum i40e_status_code i40e_vf_reset(struct i40e_hw *hw)
                                      I40E_SUCCESS, NULL, 0, NULL);
 }
 #endif /* VF_DRIVER */
-#ifdef X722_SUPPORT
 
 /**
  * i40e_aq_set_arp_proxy_config
@@ -6545,10 +6831,13 @@ enum i40e_status_code i40e_aq_set_arp_proxy_config(struct i40e_hw *hw,
 
        i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_proxy_config);
 
+       desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+       desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD);
        desc.params.external.addr_high =
                                  CPU_TO_LE32(I40E_HI_DWORD((u64)proxy_config));
        desc.params.external.addr_low =
                                  CPU_TO_LE32(I40E_LO_DWORD((u64)proxy_config));
+       desc.datalen = CPU_TO_LE16(sizeof(struct i40e_aqc_arp_proxy_data));
 
        status = i40e_asq_send_command(hw, &desc, proxy_config,
                                       sizeof(struct i40e_aqc_arp_proxy_data),
@@ -6579,10 +6868,13 @@ enum i40e_status_code i40e_aq_set_ns_proxy_table_entry(struct i40e_hw *hw,
        i40e_fill_default_direct_cmd_desc(&desc,
                                i40e_aqc_opc_set_ns_proxy_table_entry);
 
+       desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+       desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD);
        desc.params.external.addr_high =
                CPU_TO_LE32(I40E_HI_DWORD((u64)ns_proxy_table_entry));
        desc.params.external.addr_low =
                CPU_TO_LE32(I40E_LO_DWORD((u64)ns_proxy_table_entry));
+       desc.datalen = CPU_TO_LE16(sizeof(struct i40e_aqc_ns_proxy_data));
 
        status = i40e_asq_send_command(hw, &desc, ns_proxy_table_entry,
                                       sizeof(struct i40e_aqc_ns_proxy_data),
@@ -6629,9 +6921,11 @@ enum i40e_status_code i40e_aq_set_clear_wol_filter(struct i40e_hw *hw,
        if (set_filter) {
                if (!filter)
                        return  I40E_ERR_PARAM;
+
                cmd_flags |= I40E_AQC_SET_WOL_FILTER;
-               buff_len = sizeof(*filter);
+               cmd_flags |= I40E_AQC_SET_WOL_FILTER_WOL_PRESERVE_ON_PFR;
        }
+
        if (no_wol_tco)
                cmd_flags |= I40E_AQC_SET_WOL_FILTER_NO_TCO_WOL;
        cmd->cmd_flags = CPU_TO_LE16(cmd_flags);
@@ -6642,6 +6936,12 @@ enum i40e_status_code i40e_aq_set_clear_wol_filter(struct i40e_hw *hw,
                valid_flags |= I40E_AQC_SET_WOL_FILTER_NO_TCO_ACTION_VALID;
        cmd->valid_flags = CPU_TO_LE16(valid_flags);
 
+       buff_len = sizeof(*filter);
+       desc.datalen = CPU_TO_LE16(buff_len);
+
+       desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+       desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD);
+
        cmd->address_high = CPU_TO_LE32(I40E_HI_DWORD((u64)filter));
        cmd->address_low = CPU_TO_LE32(I40E_LO_DWORD((u64)filter));
 
@@ -6678,4 +6978,23 @@ enum i40e_status_code i40e_aq_get_wake_event_reason(struct i40e_hw *hw,
        return status;
 }
 
-#endif /* X722_SUPPORT */
+/**
+* i40e_aq_clear_all_wol_filters
+* @hw: pointer to the hw struct
+* @cmd_details: pointer to command details structure or NULL
+*
+* Get information for the reason of a Wake Up event
+**/
+enum i40e_status_code i40e_aq_clear_all_wol_filters(struct i40e_hw *hw,
+       struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       enum i40e_status_code status;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                         i40e_aqc_opc_clear_all_wol_filters);
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
\ No newline at end of file
index ed73e1d..4546689 100644 (file)
@@ -55,7 +55,6 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_DEV_ID_VF                 0x154C
 #define I40E_DEV_ID_VF_HV              0x1571
 #endif /* VF_DRIVER */
-#ifdef X722_SUPPORT
 #ifdef X722_A0_SUPPORT
 #define I40E_DEV_ID_X722_A0            0x374C
 #if defined(INTEGRATED_VF) || defined(VF_DRIVER)
@@ -68,12 +67,9 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_DEV_ID_1G_BASE_T_X722     0x37D1
 #define I40E_DEV_ID_10G_BASE_T_X722    0x37D2
 #define I40E_DEV_ID_SFP_I_X722         0x37D3
-#define I40E_DEV_ID_QSFP_I_X722                0x37D4
 #if defined(INTEGRATED_VF) || defined(VF_DRIVER) || defined(I40E_NDIS_SUPPORT)
 #define I40E_DEV_ID_X722_VF            0x37CD
-#define I40E_DEV_ID_X722_VF_HV         0x37D9
 #endif /* VF_DRIVER */
-#endif /* X722_SUPPORT */
 
 #define i40e_is_40G_device(d)          ((d) == I40E_DEV_ID_QSFP_A  || \
                                         (d) == I40E_DEV_ID_QSFP_B  || \
index 2260648..f03f381 100644 (file)
@@ -1239,11 +1239,6 @@ enum i40e_status_code i40e_hmc_get_object_va(struct i40e_hw *hw,
        u64 obj_offset_in_fpm;
        u32 sd_idx, sd_lmt;
 
-       if (NULL == hmc_info) {
-               ret_code = I40E_ERR_BAD_PTR;
-               DEBUGOUT("i40e_hmc_get_object_va: bad hmc_info ptr\n");
-               goto exit;
-       }
        if (NULL == hmc_info->hmc_obj) {
                ret_code = I40E_ERR_BAD_PTR;
                DEBUGOUT("i40e_hmc_get_object_va: bad hmc_info->hmc_obj ptr\n");
index 4fa1220..e896502 100644 (file)
@@ -219,19 +219,15 @@ enum i40e_status_code i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 {
        enum i40e_status_code ret_code = I40E_SUCCESS;
 
-#ifdef X722_SUPPORT
-       if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
-               ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
-               if (!ret_code) {
+       ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+       if (!ret_code) {
+               if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
                        ret_code = i40e_read_nvm_word_aq(hw, offset, data);
-                       i40e_release_nvm(hw);
+               } else {
+                       ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
                }
-       } else {
-               ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
+               i40e_release_nvm(hw);
        }
-#else
-       ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
-#endif
        return ret_code;
 }
 
@@ -249,14 +245,10 @@ enum i40e_status_code __i40e_read_nvm_word(struct i40e_hw *hw,
 {
        enum i40e_status_code ret_code = I40E_SUCCESS;
 
-#ifdef X722_SUPPORT
        if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
                ret_code = i40e_read_nvm_word_aq(hw, offset, data);
        else
                ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
-#else
-       ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
-#endif
        return ret_code;
 }
 
@@ -348,14 +340,10 @@ enum i40e_status_code __i40e_read_nvm_buffer(struct i40e_hw *hw,
 {
        enum i40e_status_code ret_code = I40E_SUCCESS;
 
-#ifdef X722_SUPPORT
        if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
                ret_code = i40e_read_nvm_buffer_aq(hw, offset, words, data);
        else
                ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data);
-#else
-       ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data);
-#endif
        return ret_code;
 }
 
@@ -375,7 +363,6 @@ enum i40e_status_code i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
 {
        enum i40e_status_code ret_code = I40E_SUCCESS;
 
-#ifdef X722_SUPPORT
        if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
                ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
                if (!ret_code) {
@@ -386,9 +373,6 @@ enum i40e_status_code i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
        } else {
                ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data);
        }
-#else
-       ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data);
-#endif
        return ret_code;
 }
 
@@ -901,9 +885,20 @@ enum i40e_status_code i40e_nvmupd_command(struct i40e_hw *hw,
                        *((u16 *)&bytes[2]) = hw->nvm_wait_opcode;
                }
 
+               /* Clear error status on read */
+               if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR)
+                       hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+
                return I40E_SUCCESS;
        }
 
+       /* Clear status even it is not read and log */
+       if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) {
+               i40e_debug(hw, I40E_DEBUG_NVM,
+                          "Clearing I40E_NVMUPD_STATE_ERROR state without reading\n");
+               hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+       }
+
        switch (hw->nvmupd_state) {
        case I40E_NVMUPD_STATE_INIT:
                status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno);
@@ -1253,6 +1248,7 @@ retry:
 void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode)
 {
        if (opcode == hw->nvm_wait_opcode) {
+
                i40e_debug(hw, I40E_DEBUG_NVM,
                           "NVMUPD: clearing wait on opcode 0x%04x\n", opcode);
                if (hw->nvm_release_on_done) {
@@ -1261,6 +1257,11 @@ void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode)
                }
                hw->nvm_wait_opcode = 0;
 
+               if (hw->aq.arq_last_status) {
+                       hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR;
+                       return;
+               }
+
                switch (hw->nvmupd_state) {
                case I40E_NVMUPD_STATE_INIT_WAIT:
                        hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
@@ -1423,7 +1424,8 @@ STATIC enum i40e_status_code i40e_nvmupd_exec_aq(struct i40e_hw *hw,
 
                if (hw->nvm_buff.va) {
                        buff = hw->nvm_buff.va;
-                       memcpy(buff, &bytes[aq_desc_len], aq_data_len);
+                       i40e_memcpy(buff, &bytes[aq_desc_len], aq_data_len,
+                               I40E_NONDMA_TO_NONDMA);
                }
        }
 
@@ -1496,7 +1498,7 @@ STATIC enum i40e_status_code i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
                           __func__, cmd->offset, cmd->offset + len);
 
                buff = ((u8 *)&hw->nvm_wb_desc) + cmd->offset;
-               memcpy(bytes, buff, len);
+               i40e_memcpy(bytes, buff, len, I40E_NONDMA_TO_NONDMA);
 
                bytes += len;
                remainder -= len;
@@ -1510,7 +1512,7 @@ STATIC enum i40e_status_code i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
 
                i40e_debug(hw, I40E_DEBUG_NVM, "%s: databuf bytes %d to %d\n",
                           __func__, start_byte, start_byte + remainder);
-               memcpy(bytes, buff, remainder);
+               i40e_memcpy(bytes, buff, remainder, I40E_NONDMA_TO_NONDMA);
        }
 
        return I40E_SUCCESS;
index 38e7ba5..c57ecde 100644 (file)
@@ -44,6 +44,7 @@
 #include <rte_cycles.h>
 #include <rte_spinlock.h>
 #include <rte_log.h>
+#include <rte_io.h>
 
 #include "../i40e_logs.h"
 
@@ -153,15 +154,18 @@ do {                                                            \
  * I40E_PRTQF_FD_MSK
  */
 
-#define I40E_PCI_REG(reg)         (*((volatile uint32_t *)(reg)))
+#define I40E_PCI_REG(reg)              rte_read32(reg)
 #define I40E_PCI_REG_ADDR(a, reg) \
        ((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
 static inline uint32_t i40e_read_addr(volatile void *addr)
 {
        return rte_le_to_cpu_32(I40E_PCI_REG(addr));
 }
-#define I40E_PCI_REG_WRITE(reg, value) \
-       do { I40E_PCI_REG((reg)) = rte_cpu_to_le_32(value); } while (0)
+
+#define I40E_PCI_REG_WRITE(reg, value)         \
+       rte_write32((rte_cpu_to_le_32(value)), reg)
+#define I40E_PCI_REG_WRITE_RELAXED(reg, value) \
+       rte_write32_relaxed((rte_cpu_to_le_32(value)), reg)
 
 #define I40E_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_GLGEN_STAT)
 #define I40EVF_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_VFGEN_RSTAT)
index 03dda93..109d3c5 100644 (file)
@@ -78,7 +78,6 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask,
 void i40e_idle_aq(struct i40e_hw *hw);
 bool i40e_check_asq_alive(struct i40e_hw *hw);
 enum i40e_status_code i40e_aq_queue_shutdown(struct i40e_hw *hw, bool unloading);
-#ifdef X722_SUPPORT
 
 enum i40e_status_code i40e_aq_get_rss_lut(struct i40e_hw *hw, u16 seid,
                                          bool pf_lut, u8 *lut, u16 lut_size);
@@ -90,11 +89,8 @@ enum i40e_status_code i40e_aq_get_rss_key(struct i40e_hw *hw,
 enum i40e_status_code i40e_aq_set_rss_key(struct i40e_hw *hw,
                                     u16 seid,
                                     struct i40e_aqc_get_set_rss_key_data *key);
-#endif
-#ifndef I40E_NDIS_SUPPORT
 const char *i40e_aq_str(struct i40e_hw *hw, enum i40e_admin_queue_err aq_err);
 const char *i40e_stat_str(struct i40e_hw *hw, enum i40e_status_code stat_err);
-#endif /* I40E_NDIS_SUPPORT */
 
 #ifdef PF_DRIVER
 
@@ -124,6 +120,8 @@ enum i40e_status_code i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
                                struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_set_default_vsi(struct i40e_hw *hw, u16 vsi_id,
                                struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_clear_default_vsi(struct i40e_hw *hw, u16 vsi_id,
+                               struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
                        bool qualified_modules, bool report_init,
                        struct i40e_aq_get_phy_abilities_resp *abilities,
@@ -170,12 +168,18 @@ enum i40e_status_code i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
                bool rx_only_promisc);
 enum i40e_status_code i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
                u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_vsi_full_promiscuous(struct i40e_hw *hw,
+                               u16 seid, bool set,
+                               struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw,
                                u16 seid, bool enable, u16 vid,
                                struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
                                u16 seid, bool enable, u16 vid,
                                struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw,
+                               u16 seid, bool enable, u16 vid,
+                               struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
                                u16 seid, bool enable,
                                struct i40e_asq_cmd_details *cmd_details);
@@ -438,6 +442,7 @@ enum i40e_status_code i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr);
 enum i40e_status_code i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num,
                                            u32 pba_num_size);
 void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable);
+enum i40e_status_code i40e_get_san_mac_addr(struct i40e_hw *hw, u8 *mac_addr);
 enum i40e_aq_link_speed i40e_get_link_speed(struct i40e_hw *hw);
 /* prototype for functions used for NVM access */
 enum i40e_status_code i40e_init_nvm(struct i40e_hw *hw);
@@ -518,7 +523,6 @@ enum i40e_status_code i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
                                u32 reg_addr, u32 reg_val,
                                struct i40e_asq_cmd_details *cmd_details);
 void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
-#ifdef X722_SUPPORT
 enum i40e_status_code i40e_aq_set_arp_proxy_config(struct i40e_hw *hw,
                        struct i40e_aqc_arp_proxy_data *proxy_config,
                        struct i40e_asq_cmd_details *cmd_details);
@@ -534,11 +538,20 @@ enum i40e_status_code i40e_aq_set_clear_wol_filter(struct i40e_hw *hw,
 enum i40e_status_code i40e_aq_get_wake_event_reason(struct i40e_hw *hw,
                        u16 *wake_reason,
                        struct i40e_asq_cmd_details *cmd_details);
-#endif
-enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, u8 page,
-                                            u16 reg, u8 phy_addr, u16 *value);
-enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, u8 page,
-                                             u16 reg, u8 phy_addr, u16 value);
+enum i40e_status_code i40e_aq_clear_all_wol_filters(struct i40e_hw *hw,
+                       struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_read_phy_register_clause22(struct i40e_hw *hw,
+                                       u16 reg, u8 phy_addr, u16 *value);
+enum i40e_status_code i40e_write_phy_register_clause22(struct i40e_hw *hw,
+                                       u16 reg, u8 phy_addr, u16 value);
+enum i40e_status_code i40e_read_phy_register_clause45(struct i40e_hw *hw,
+                               u8 page, u16 reg, u8 phy_addr, u16 *value);
+enum i40e_status_code i40e_write_phy_register_clause45(struct i40e_hw *hw,
+                               u8 page, u16 reg, u8 phy_addr, u16 value);
+enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw,
+                               u8 page, u16 reg, u8 phy_addr, u16 *value);
+enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw,
+                               u8 page, u16 reg, u8 phy_addr, u16 value);
 u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num);
 enum i40e_status_code i40e_blink_phy_link_led(struct i40e_hw *hw,
                                              u32 time, u32 interval);
index fd0a723..3a305b6 100644 (file)
@@ -3401,7 +3401,6 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_VFQF_HREGION_OVERRIDE_ENA_7_MASK  I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_7_SHIFT)
 #define I40E_VFQF_HREGION_REGION_7_SHIFT       29
 #define I40E_VFQF_HREGION_REGION_7_MASK        I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_7_SHIFT)
-#ifdef X722_SUPPORT
 
 #ifdef PF_DRIVER
 #define I40E_MNGSB_FDCRC               0x000B7050 /* Reset: POR */
@@ -5366,5 +5365,4 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT 20
 #define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_MASK  I40E_MASK(0xFFF, I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT)
 
-#endif /* X722_SUPPORT */
 #endif /* _I40E_REGISTER_H_ */
index 5349419..590d97c 100644 (file)
@@ -157,13 +157,22 @@ enum i40e_debug_mask {
 #define I40E_PCI_LINK_SPEED_5000       0x2
 #define I40E_PCI_LINK_SPEED_8000       0x3
 
-#define I40E_MDIO_STCODE               0
-#define I40E_MDIO_OPCODE_ADDRESS       0
-#define I40E_MDIO_OPCODE_WRITE         I40E_MASK(1, \
+#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_MASK(1, \
+                                                 I40E_GLGEN_MSCA_STCODE_SHIFT)
+#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK   I40E_MASK(1, \
                                                  I40E_GLGEN_MSCA_OPCODE_SHIFT)
-#define I40E_MDIO_OPCODE_READ_INC_ADDR I40E_MASK(2, \
+#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK    I40E_MASK(2, \
                                                  I40E_GLGEN_MSCA_OPCODE_SHIFT)
-#define I40E_MDIO_OPCODE_READ          I40E_MASK(3, \
+
+#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_MASK(0, \
+                                                 I40E_GLGEN_MSCA_STCODE_SHIFT)
+#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK I40E_MASK(0, \
+                                                 I40E_GLGEN_MSCA_OPCODE_SHIFT)
+#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK   I40E_MASK(1, \
+                                                 I40E_GLGEN_MSCA_OPCODE_SHIFT)
+#define I40E_MDIO_CLAUSE45_OPCODE_READ_INC_ADDR_MASK   I40E_MASK(2, \
+                                                 I40E_GLGEN_MSCA_OPCODE_SHIFT)
+#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK    I40E_MASK(3, \
                                                  I40E_GLGEN_MSCA_OPCODE_SHIFT)
 
 #define I40E_PHY_COM_REG_PAGE                  0x1E
@@ -187,9 +196,7 @@ enum i40e_memcpy_type {
        I40E_DMA_TO_NONDMA
 };
 
-#ifdef X722_SUPPORT
 #define I40E_FW_API_VERSION_MINOR_X722 0x0005
-#endif
 #define I40E_FW_API_VERSION_MINOR_X710 0x0005
 
 
@@ -203,13 +210,10 @@ enum i40e_memcpy_type {
  */
 enum i40e_mac_type {
        I40E_MAC_UNKNOWN = 0,
-       I40E_MAC_X710,
        I40E_MAC_XL710,
        I40E_MAC_VF,
-#ifdef X722_SUPPORT
        I40E_MAC_X722,
        I40E_MAC_X722_VF,
-#endif
        I40E_MAC_GENERIC,
 };
 
@@ -264,6 +268,7 @@ struct i40e_link_status {
        enum i40e_aq_link_speed link_speed;
        u8 link_info;
        u8 an_info;
+       u8 fec_info;
        u8 ext_info;
        u8 loopback;
        /* is Link Status Event notification to SW enabled */
@@ -292,61 +297,73 @@ struct i40e_link_status {
 #define I40E_MODULE_TYPE_1000BASE_T    0x08
 };
 
-enum i40e_aq_capabilities_phy_type {
-       I40E_CAP_PHY_TYPE_SGMII                 = BIT(I40E_PHY_TYPE_SGMII),
-       I40E_CAP_PHY_TYPE_1000BASE_KX           = BIT(I40E_PHY_TYPE_1000BASE_KX),
-       I40E_CAP_PHY_TYPE_10GBASE_KX4           = BIT(I40E_PHY_TYPE_10GBASE_KX4),
-       I40E_CAP_PHY_TYPE_10GBASE_KR            = BIT(I40E_PHY_TYPE_10GBASE_KR),
-       I40E_CAP_PHY_TYPE_40GBASE_KR4           = BIT(I40E_PHY_TYPE_40GBASE_KR4),
-       I40E_CAP_PHY_TYPE_XAUI                  = BIT(I40E_PHY_TYPE_XAUI),
-       I40E_CAP_PHY_TYPE_XFI                   = BIT(I40E_PHY_TYPE_XFI),
-       I40E_CAP_PHY_TYPE_SFI                   = BIT(I40E_PHY_TYPE_SFI),
-       I40E_CAP_PHY_TYPE_XLAUI                 = BIT(I40E_PHY_TYPE_XLAUI),
-       I40E_CAP_PHY_TYPE_XLPPI                 = BIT(I40E_PHY_TYPE_XLPPI),
-       I40E_CAP_PHY_TYPE_40GBASE_CR4_CU        = BIT(I40E_PHY_TYPE_40GBASE_CR4_CU),
-       I40E_CAP_PHY_TYPE_10GBASE_CR1_CU        = BIT(I40E_PHY_TYPE_10GBASE_CR1_CU),
-       I40E_CAP_PHY_TYPE_10GBASE_AOC           = BIT(I40E_PHY_TYPE_10GBASE_AOC),
-       I40E_CAP_PHY_TYPE_40GBASE_AOC           = BIT(I40E_PHY_TYPE_40GBASE_AOC),
-       I40E_CAP_PHY_TYPE_100BASE_TX            = BIT(I40E_PHY_TYPE_100BASE_TX),
-       I40E_CAP_PHY_TYPE_1000BASE_T            = BIT(I40E_PHY_TYPE_1000BASE_T),
-       I40E_CAP_PHY_TYPE_10GBASE_T             = BIT(I40E_PHY_TYPE_10GBASE_T),
-       I40E_CAP_PHY_TYPE_10GBASE_SR            = BIT(I40E_PHY_TYPE_10GBASE_SR),
-       I40E_CAP_PHY_TYPE_10GBASE_LR            = BIT(I40E_PHY_TYPE_10GBASE_LR),
-       I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU       = BIT(I40E_PHY_TYPE_10GBASE_SFPP_CU),
-       I40E_CAP_PHY_TYPE_10GBASE_CR1           = BIT(I40E_PHY_TYPE_10GBASE_CR1),
-       I40E_CAP_PHY_TYPE_40GBASE_CR4           = BIT(I40E_PHY_TYPE_40GBASE_CR4),
-       I40E_CAP_PHY_TYPE_40GBASE_SR4           = BIT(I40E_PHY_TYPE_40GBASE_SR4),
-       I40E_CAP_PHY_TYPE_40GBASE_LR4           = BIT(I40E_PHY_TYPE_40GBASE_LR4),
-       I40E_CAP_PHY_TYPE_1000BASE_SX           = BIT(I40E_PHY_TYPE_1000BASE_SX),
-       I40E_CAP_PHY_TYPE_1000BASE_LX           = BIT(I40E_PHY_TYPE_1000BASE_LX),
-       I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL    = BIT(I40E_PHY_TYPE_1000BASE_T_OPTICAL),
-       I40E_CAP_PHY_TYPE_20GBASE_KR2           = BIT(I40E_PHY_TYPE_20GBASE_KR2)
-};
-
 struct i40e_phy_info {
        struct i40e_link_status link_info;
        struct i40e_link_status link_info_old;
        bool get_link_info;
        enum i40e_media_type media_type;
        /* all the phy types the NVM is capable of */
-       u32 phy_types;
-};
-
+       u64 phy_types;
+};
+
+#define I40E_CAP_PHY_TYPE_SGMII BIT_ULL(I40E_PHY_TYPE_SGMII)
+#define I40E_CAP_PHY_TYPE_1000BASE_KX BIT_ULL(I40E_PHY_TYPE_1000BASE_KX)
+#define I40E_CAP_PHY_TYPE_10GBASE_KX4 BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4)
+#define I40E_CAP_PHY_TYPE_10GBASE_KR BIT_ULL(I40E_PHY_TYPE_10GBASE_KR)
+#define I40E_CAP_PHY_TYPE_40GBASE_KR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4)
+#define I40E_CAP_PHY_TYPE_XAUI BIT_ULL(I40E_PHY_TYPE_XAUI)
+#define I40E_CAP_PHY_TYPE_XFI BIT_ULL(I40E_PHY_TYPE_XFI)
+#define I40E_CAP_PHY_TYPE_SFI BIT_ULL(I40E_PHY_TYPE_SFI)
+#define I40E_CAP_PHY_TYPE_XLAUI BIT_ULL(I40E_PHY_TYPE_XLAUI)
+#define I40E_CAP_PHY_TYPE_XLPPI BIT_ULL(I40E_PHY_TYPE_XLPPI)
+#define I40E_CAP_PHY_TYPE_40GBASE_CR4_CU BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU)
+#define I40E_CAP_PHY_TYPE_10GBASE_CR1_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU)
+#define I40E_CAP_PHY_TYPE_10GBASE_AOC BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC)
+#define I40E_CAP_PHY_TYPE_40GBASE_AOC BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC)
+#define I40E_CAP_PHY_TYPE_100BASE_TX BIT_ULL(I40E_PHY_TYPE_100BASE_TX)
+#define I40E_CAP_PHY_TYPE_1000BASE_T BIT_ULL(I40E_PHY_TYPE_1000BASE_T)
+#define I40E_CAP_PHY_TYPE_10GBASE_T BIT_ULL(I40E_PHY_TYPE_10GBASE_T)
+#define I40E_CAP_PHY_TYPE_10GBASE_SR BIT_ULL(I40E_PHY_TYPE_10GBASE_SR)
+#define I40E_CAP_PHY_TYPE_10GBASE_LR BIT_ULL(I40E_PHY_TYPE_10GBASE_LR)
+#define I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU)
+#define I40E_CAP_PHY_TYPE_10GBASE_CR1 BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1)
+#define I40E_CAP_PHY_TYPE_40GBASE_CR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4)
+#define I40E_CAP_PHY_TYPE_40GBASE_SR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4)
+#define I40E_CAP_PHY_TYPE_40GBASE_LR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4)
+#define I40E_CAP_PHY_TYPE_1000BASE_SX BIT_ULL(I40E_PHY_TYPE_1000BASE_SX)
+#define I40E_CAP_PHY_TYPE_1000BASE_LX BIT_ULL(I40E_PHY_TYPE_1000BASE_LX)
+#define I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL)
+#define I40E_CAP_PHY_TYPE_20GBASE_KR2 BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2)
+/*
+ * Defining the macro I40E_TYPE_OFFSET to implement a bit shift for some
+ * PHY types. There is an unused bit (31) in the I40E_CAP_PHY_TYPE_* bit
+ * fields but no corresponding gap in the i40e_aq_phy_type enumeration. So,
+ * a shift is needed to adjust for this with values larger than 31. The
+ * only affected values are I40E_PHY_TYPE_25GBASE_*.
+ */
+#define I40E_PHY_TYPE_OFFSET 1
+#define I40E_CAP_PHY_TYPE_25GBASE_KR BIT_ULL(I40E_PHY_TYPE_25GBASE_KR + \
+                                            I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_CR BIT_ULL(I40E_PHY_TYPE_25GBASE_CR + \
+                                            I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_SR BIT_ULL(I40E_PHY_TYPE_25GBASE_SR + \
+                                            I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_LR BIT_ULL(I40E_PHY_TYPE_25GBASE_LR + \
+                                            I40E_PHY_TYPE_OFFSET)
 #define I40E_HW_CAP_MAX_GPIO                   30
 #define I40E_HW_CAP_MDIO_PORT_MODE_MDIO                0
 #define I40E_HW_CAP_MDIO_PORT_MODE_I2C         1
 
-#ifdef X722_SUPPORT
 enum i40e_acpi_programming_method {
        I40E_ACPI_PROGRAMMING_METHOD_HW_FVL = 0,
        I40E_ACPI_PROGRAMMING_METHOD_AQC_FPK = 1
 };
 
-#define I40E_WOL_SUPPORT_MASK                  1
-#define I40E_ACPI_PROGRAMMING_METHOD_MASK      (1 << 1)
-#define I40E_PROXY_SUPPORT_MASK                        (1 << 2)
+#define I40E_WOL_SUPPORT_MASK                  0x1
+#define I40E_ACPI_PROGRAMMING_METHOD_MASK      0x2
+#define I40E_PROXY_SUPPORT_MASK                        0x4
 
-#endif
 /* Capabilities of a PF or a VF or the whole device */
 struct i40e_hw_capabilities {
        u32  switch_mode;
@@ -355,6 +372,10 @@ struct i40e_hw_capabilities {
 #define I40E_NVM_IMAGE_TYPE_UDP_CLOUD  0x3
 
        u32  management_mode;
+       u32  mng_protocols_over_mctp;
+#define I40E_MNG_PROTOCOL_PLDM         0x2
+#define I40E_MNG_PROTOCOL_OEM_COMMANDS 0x4
+#define I40E_MNG_PROTOCOL_NCSI         0x8
        u32  npar_enable;
        u32  os2bmc;
        u32  valid_functions;
@@ -410,11 +431,9 @@ struct i40e_hw_capabilities {
        u32 enabled_tcmap;
        u32 maxtc;
        u64 wr_csr_prot;
-#ifdef X722_SUPPORT
        bool apm_wol_support;
        enum i40e_acpi_programming_method acpi_prog_method;
        bool proxy_support;
-#endif
 };
 
 struct i40e_mac_info {
@@ -472,6 +491,7 @@ enum i40e_nvmupd_state {
        I40E_NVMUPD_STATE_WRITING,
        I40E_NVMUPD_STATE_INIT_WAIT,
        I40E_NVMUPD_STATE_WRITE_WAIT,
+       I40E_NVMUPD_STATE_ERROR
 };
 
 /* nvm_access definition and its masks/shifts need to be accessible to
@@ -550,6 +570,7 @@ struct i40e_bus_info {
        u16 func;
        u16 device;
        u16 lan_id;
+       u16 bus_id;
 };
 
 /* Flow control (FC) parameters */
@@ -674,30 +695,22 @@ struct i40e_hw {
        struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */
        struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */
 
-#ifdef X722_SUPPORT
        /* WoL and proxy support */
        u16 num_wol_proxy_filters;
        u16 wol_proxy_vsi_seid;
 
-#endif
 #define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0)
        u64 flags;
 
        /* debug mask */
        u32 debug_mask;
-#ifndef I40E_NDIS_SUPPORT
        char err_str[16];
-#endif /* I40E_NDIS_SUPPORT */
 };
 
 STATIC INLINE bool i40e_is_vf(struct i40e_hw *hw)
 {
-#ifdef X722_SUPPORT
        return (hw->mac.type == I40E_MAC_VF ||
                hw->mac.type == I40E_MAC_X722_VF);
-#else
-       return hw->mac.type == I40E_MAC_VF;
-#endif
 }
 
 struct i40e_driver_version {
@@ -801,11 +814,7 @@ enum i40e_rx_desc_status_bits {
        I40E_RX_DESC_STATUS_CRCP_SHIFT          = 4,
        I40E_RX_DESC_STATUS_TSYNINDX_SHIFT      = 5, /* 2 BITS */
        I40E_RX_DESC_STATUS_TSYNVALID_SHIFT     = 7,
-#ifdef X722_SUPPORT
        I40E_RX_DESC_STATUS_EXT_UDP_0_SHIFT     = 8,
-#else
-       I40E_RX_DESC_STATUS_RESERVED1_SHIFT     = 8,
-#endif
 
        I40E_RX_DESC_STATUS_UMBCAST_SHIFT       = 9, /* 2 BITS */
        I40E_RX_DESC_STATUS_FLM_SHIFT           = 11,
@@ -813,11 +822,7 @@ enum i40e_rx_desc_status_bits {
        I40E_RX_DESC_STATUS_LPBK_SHIFT          = 14,
        I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT     = 15,
        I40E_RX_DESC_STATUS_RESERVED2_SHIFT     = 16, /* 2 BITS */
-#ifdef X722_SUPPORT
        I40E_RX_DESC_STATUS_INT_UDP_0_SHIFT     = 18,
-#else
-       I40E_RX_DESC_STATUS_UDP_0_SHIFT         = 18,
-#endif
        I40E_RX_DESC_STATUS_LAST /* this entry must be last!!! */
 };
 
@@ -1195,10 +1200,8 @@ enum i40e_tx_ctx_desc_eipt_offload {
 #define I40E_TXD_CTX_QW0_DECTTL_MASK   (0xFULL << \
                                         I40E_TXD_CTX_QW0_DECTTL_SHIFT)
 
-#ifdef X722_SUPPORT
 #define I40E_TXD_CTX_QW0_L4T_CS_SHIFT  23
 #define I40E_TXD_CTX_QW0_L4T_CS_MASK   BIT_ULL(I40E_TXD_CTX_QW0_L4T_CS_SHIFT)
-#endif
 struct i40e_nop_desc {
        __le64 rsvd;
        __le64 dtype_cmd;
@@ -1235,38 +1238,24 @@ struct i40e_filter_program_desc {
 
 /* Packet Classifier Types for filters */
 enum i40e_filter_pctype {
-#ifdef X722_SUPPORT
        /* Note: Values 0-28 are reserved for future use.
         * Value 29, 30, 32 are not supported on XL710 and X710.
         */
        I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP        = 29,
        I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP      = 30,
-#else
-       /* Note: Values 0-30 are reserved for future use */
-#endif
        I40E_FILTER_PCTYPE_NONF_IPV4_UDP                = 31,
-#ifdef X722_SUPPORT
        I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK     = 32,
-#else
-       /* Note: Value 32 is reserved for future use */
-#endif
        I40E_FILTER_PCTYPE_NONF_IPV4_TCP                = 33,
        I40E_FILTER_PCTYPE_NONF_IPV4_SCTP               = 34,
        I40E_FILTER_PCTYPE_NONF_IPV4_OTHER              = 35,
        I40E_FILTER_PCTYPE_FRAG_IPV4                    = 36,
-#ifdef X722_SUPPORT
        /* Note: Values 37-38 are reserved for future use.
         * Value 39, 40, 42 are not supported on XL710 and X710.
         */
        I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP        = 39,
        I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP      = 40,
-#else
-       /* Note: Values 37-40 are reserved for future use */
-#endif
        I40E_FILTER_PCTYPE_NONF_IPV6_UDP                = 41,
-#ifdef X722_SUPPORT
        I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK     = 42,
-#endif
        I40E_FILTER_PCTYPE_NONF_IPV6_TCP                = 43,
        I40E_FILTER_PCTYPE_NONF_IPV6_SCTP               = 44,
        I40E_FILTER_PCTYPE_NONF_IPV6_OTHER              = 45,
@@ -1321,12 +1310,10 @@ enum i40e_filter_program_desc_pcmd {
                                                 I40E_TXD_FLTR_QW1_CMD_SHIFT)
 #define I40E_TXD_FLTR_QW1_FD_STATUS_MASK (0x3ULL << \
                                          I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT)
-#ifdef X722_SUPPORT
 
 #define I40E_TXD_FLTR_QW1_ATR_SHIFT    (0xEULL + \
                                         I40E_TXD_FLTR_QW1_CMD_SHIFT)
 #define I40E_TXD_FLTR_QW1_ATR_MASK     BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT)
-#endif
 
 #define I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT 20
 #define I40E_TXD_FLTR_QW1_CNTINDEX_MASK        (0x1FFUL << \
@@ -1388,6 +1375,23 @@ struct i40e_veb_tc_stats {
        u64 tc_tx_bytes[I40E_MAX_TRAFFIC_CLASS];
 };
 
+/* Statistics collected per function for FCoE */
+struct i40e_fcoe_stats {
+       u64 rx_fcoe_packets;            /* fcoeprc */
+       u64 rx_fcoe_dwords;             /* focedwrc */
+       u64 rx_fcoe_dropped;            /* fcoerpdc */
+       u64 tx_fcoe_packets;            /* fcoeptc */
+       u64 tx_fcoe_dwords;             /* focedwtc */
+       u64 fcoe_bad_fccrc;             /* fcoecrc */
+       u64 fcoe_last_error;            /* fcoelast */
+       u64 fcoe_ddp_count;             /* fcoeddpc */
+};
+
+/* offset to per function FCoE statistics block */
+#define I40E_FCOE_VF_STAT_OFFSET       0
+#define I40E_FCOE_PF_STAT_OFFSET       128
+#define I40E_FCOE_STAT_MAX             (I40E_FCOE_PF_STAT_OFFSET + I40E_MAX_PF)
+
 /* Statistics collected by the MAC */
 struct i40e_hw_port_stats {
        /* eth stats collected by the port */
@@ -1481,6 +1485,7 @@ struct i40e_hw_port_stats {
 #define I40E_SR_EMPR_REGS_AUTO_LOAD_PTR                0x3A
 #define I40E_SR_GLOBR_REGS_AUTO_LOAD_PTR       0x3B
 #define I40E_SR_CORER_REGS_AUTO_LOAD_PTR       0x3C
+#define I40E_SR_PHY_ACTIVITY_LIST_PTR          0x3D
 #define I40E_SR_PCIE_ALT_AUTO_LOAD_PTR         0x3E
 #define I40E_SR_SW_CHECKSUM_WORD               0x3F
 #define I40E_SR_1ST_FREE_PROVISION_AREA_PTR    0x40
@@ -1509,6 +1514,208 @@ struct i40e_hw_port_stats {
 
 #define I40E_SRRD_SRCTL_ATTEMPTS       100000
 
+/* FCoE Tx context descriptor - Use the i40e_tx_context_desc struct */
+
+enum i40E_fcoe_tx_ctx_desc_cmd_bits {
+       I40E_FCOE_TX_CTX_DESC_OPCODE_SINGLE_SEND        = 0x00, /* 4 BITS */
+       I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS2      = 0x01, /* 4 BITS */
+       I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS3      = 0x05, /* 4 BITS */
+       I40E_FCOE_TX_CTX_DESC_OPCODE_ETSO_FC_CLASS2     = 0x02, /* 4 BITS */
+       I40E_FCOE_TX_CTX_DESC_OPCODE_ETSO_FC_CLASS3     = 0x06, /* 4 BITS */
+       I40E_FCOE_TX_CTX_DESC_OPCODE_DWO_FC_CLASS2      = 0x03, /* 4 BITS */
+       I40E_FCOE_TX_CTX_DESC_OPCODE_DWO_FC_CLASS3      = 0x07, /* 4 BITS */
+       I40E_FCOE_TX_CTX_DESC_OPCODE_DDP_CTX_INVL       = 0x08, /* 4 BITS */
+       I40E_FCOE_TX_CTX_DESC_OPCODE_DWO_CTX_INVL       = 0x09, /* 4 BITS */
+       I40E_FCOE_TX_CTX_DESC_RELOFF                    = 0x10,
+       I40E_FCOE_TX_CTX_DESC_CLRSEQ                    = 0x20,
+       I40E_FCOE_TX_CTX_DESC_DIFENA                    = 0x40,
+       I40E_FCOE_TX_CTX_DESC_IL2TAG2                   = 0x80
+};
+
+/* FCoE DIF/DIX Context descriptor */
+struct i40e_fcoe_difdix_context_desc {
+       __le64 flags_buff0_buff1_ref;
+       __le64 difapp_msk_bias;
+};
+
+#define I40E_FCOE_DIFDIX_CTX_QW0_FLAGS_SHIFT   0
+#define I40E_FCOE_DIFDIX_CTX_QW0_FLAGS_MASK    (0xFFFULL << \
+                                       I40E_FCOE_DIFDIX_CTX_QW0_FLAGS_SHIFT)
+
+enum i40e_fcoe_difdix_ctx_desc_flags_bits {
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_RSVD                          = 0x0000,
+       /* 1 BIT  */
+       I40E_FCOE_DIFDIX_CTX_DESC_APPTYPE_TAGCHK                = 0x0000,
+       /* 1 BIT  */
+       I40E_FCOE_DIFDIX_CTX_DESC_APPTYPE_TAGNOTCHK             = 0x0004,
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_GTYPE_OPAQUE                  = 0x0000,
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_GTYPE_CHKINTEGRITY            = 0x0008,
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_GTYPE_CHKINTEGRITY_APPTAG     = 0x0010,
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_GTYPE_CHKINTEGRITY_APPREFTAG  = 0x0018,
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_REFTYPE_CNST                  = 0x0000,
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_REFTYPE_INC1BLK               = 0x0020,
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_REFTYPE_APPTAG                = 0x0040,
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_REFTYPE_RSVD                  = 0x0060,
+       /* 1 BIT  */
+       I40E_FCOE_DIFDIX_CTX_DESC_DIXMODE_XSUM                  = 0x0000,
+       /* 1 BIT  */
+       I40E_FCOE_DIFDIX_CTX_DESC_DIXMODE_CRC                   = 0x0080,
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_DIFHOST_UNTAG                 = 0x0000,
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_DIFHOST_BUF                   = 0x0100,
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_DIFHOST_RSVD                  = 0x0200,
+       /* 2 BITS */
+       I40E_FCOE_DIFDIX_CTX_DESC_DIFHOST_EMBDTAGS              = 0x0300,
+       /* 1 BIT  */
+       I40E_FCOE_DIFDIX_CTX_DESC_DIFLAN_UNTAG                  = 0x0000,
+       /* 1 BIT  */
+       I40E_FCOE_DIFDIX_CTX_DESC_DIFLAN_TAG                    = 0x0400,
+       /* 1 BIT */
+       I40E_FCOE_DIFDIX_CTX_DESC_DIFBLK_512B                   = 0x0000,
+       /* 1 BIT */
+       I40E_FCOE_DIFDIX_CTX_DESC_DIFBLK_4K                     = 0x0800
+};
+
+#define I40E_FCOE_DIFDIX_CTX_QW0_BUFF0_SHIFT   12
+#define I40E_FCOE_DIFDIX_CTX_QW0_BUFF0_MASK    (0x3FFULL << \
+                                       I40E_FCOE_DIFDIX_CTX_QW0_BUFF0_SHIFT)
+
+#define I40E_FCOE_DIFDIX_CTX_QW0_BUFF1_SHIFT   22
+#define I40E_FCOE_DIFDIX_CTX_QW0_BUFF1_MASK    (0x3FFULL << \
+                                       I40E_FCOE_DIFDIX_CTX_QW0_BUFF1_SHIFT)
+
+#define I40E_FCOE_DIFDIX_CTX_QW0_REF_SHIFT     32
+#define I40E_FCOE_DIFDIX_CTX_QW0_REF_MASK      (0xFFFFFFFFULL << \
+                                       I40E_FCOE_DIFDIX_CTX_QW0_REF_SHIFT)
+
+#define I40E_FCOE_DIFDIX_CTX_QW1_APP_SHIFT     0
+#define I40E_FCOE_DIFDIX_CTX_QW1_APP_MASK      (0xFFFFULL << \
+                                       I40E_FCOE_DIFDIX_CTX_QW1_APP_SHIFT)
+
+#define I40E_FCOE_DIFDIX_CTX_QW1_APP_MSK_SHIFT 16
+#define I40E_FCOE_DIFDIX_CTX_QW1_APP_MSK_MASK  (0xFFFFULL << \
+                                       I40E_FCOE_DIFDIX_CTX_QW1_APP_MSK_SHIFT)
+
+#define I40E_FCOE_DIFDIX_CTX_QW1_REF_BIAS_SHIFT        32
+#define I40E_FCOE_DIFDIX_CTX_QW0_REF_BIAS_MASK (0xFFFFFFFFULL << \
+                                       I40E_FCOE_DIFDIX_CTX_QW1_REF_BIAS_SHIFT)
+
+/* FCoE DIF/DIX Buffers descriptor */
+struct i40e_fcoe_difdix_buffers_desc {
+       __le64 buff_addr0;
+       __le64 buff_addr1;
+};
+
+/* FCoE DDP Context descriptor */
+struct i40e_fcoe_ddp_context_desc {
+       __le64 rsvd;
+       __le64 type_cmd_foff_lsize;
+};
+
+#define I40E_FCOE_DDP_CTX_QW1_DTYPE_SHIFT      0
+#define I40E_FCOE_DDP_CTX_QW1_DTYPE_MASK       (0xFULL << \
+                                       I40E_FCOE_DDP_CTX_QW1_DTYPE_SHIFT)
+
+#define I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT        4
+#define I40E_FCOE_DDP_CTX_QW1_CMD_MASK (0xFULL << \
+                                        I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT)
+
+enum i40e_fcoe_ddp_ctx_desc_cmd_bits {
+       I40E_FCOE_DDP_CTX_DESC_BSIZE_512B       = 0x00, /* 2 BITS */
+       I40E_FCOE_DDP_CTX_DESC_BSIZE_4K         = 0x01, /* 2 BITS */
+       I40E_FCOE_DDP_CTX_DESC_BSIZE_8K         = 0x02, /* 2 BITS */
+       I40E_FCOE_DDP_CTX_DESC_BSIZE_16K        = 0x03, /* 2 BITS */
+       I40E_FCOE_DDP_CTX_DESC_DIFENA           = 0x04, /* 1 BIT  */
+       I40E_FCOE_DDP_CTX_DESC_LASTSEQH         = 0x08, /* 1 BIT  */
+};
+
+#define I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT       16
+#define I40E_FCOE_DDP_CTX_QW1_FOFF_MASK        (0x3FFFULL << \
+                                        I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT)
+
+#define I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT      32
+#define I40E_FCOE_DDP_CTX_QW1_LSIZE_MASK       (0x3FFFULL << \
+                                       I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT)
+
+/* FCoE DDP/DWO Queue Context descriptor */
+struct i40e_fcoe_queue_context_desc {
+       __le64 dmaindx_fbase;           /* 0:11 DMAINDX, 12:63 FBASE */
+       __le64 flen_tph;                /* 0:12 FLEN, 13:15 TPH */
+};
+
+#define I40E_FCOE_QUEUE_CTX_QW0_DMAINDX_SHIFT  0
+#define I40E_FCOE_QUEUE_CTX_QW0_DMAINDX_MASK   (0xFFFULL << \
+                                       I40E_FCOE_QUEUE_CTX_QW0_DMAINDX_SHIFT)
+
+#define I40E_FCOE_QUEUE_CTX_QW0_FBASE_SHIFT    12
+#define I40E_FCOE_QUEUE_CTX_QW0_FBASE_MASK     (0xFFFFFFFFFFFFFULL << \
+                                       I40E_FCOE_QUEUE_CTX_QW0_FBASE_SHIFT)
+
+#define I40E_FCOE_QUEUE_CTX_QW1_FLEN_SHIFT     0
+#define I40E_FCOE_QUEUE_CTX_QW1_FLEN_MASK      (0x1FFFULL << \
+                                       I40E_FCOE_QUEUE_CTX_QW1_FLEN_SHIFT)
+
+#define I40E_FCOE_QUEUE_CTX_QW1_TPH_SHIFT      13
+#define I40E_FCOE_QUEUE_CTX_QW1_TPH_MASK       (0x7ULL << \
+                                       I40E_FCOE_QUEUE_CTX_QW1_FLEN_SHIFT)
+
+enum i40e_fcoe_queue_ctx_desc_tph_bits {
+       I40E_FCOE_QUEUE_CTX_DESC_TPHRDESC       = 0x1,
+       I40E_FCOE_QUEUE_CTX_DESC_TPHDATA        = 0x2
+};
+
+#define I40E_FCOE_QUEUE_CTX_QW1_RECIPE_SHIFT   30
+#define I40E_FCOE_QUEUE_CTX_QW1_RECIPE_MASK    (0x3ULL << \
+                                       I40E_FCOE_QUEUE_CTX_QW1_RECIPE_SHIFT)
+
+/* FCoE DDP/DWO Filter Context descriptor */
+struct i40e_fcoe_filter_context_desc {
+       __le32 param;
+       __le16 seqn;
+
+       /* 48:51(0:3) RSVD, 52:63(4:15) DMAINDX */
+       __le16 rsvd_dmaindx;
+
+       /* 0:7 FLAGS, 8:52 RSVD, 53:63 LANQ */
+       __le64 flags_rsvd_lanq;
+};
+
+#define I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT 4
+#define I40E_FCOE_FILTER_CTX_QW0_DMAINDX_MASK  (0xFFF << \
+                                       I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT)
+
+enum i40e_fcoe_filter_ctx_desc_flags_bits {
+       I40E_FCOE_FILTER_CTX_DESC_CTYP_DDP      = 0x00,
+       I40E_FCOE_FILTER_CTX_DESC_CTYP_DWO      = 0x01,
+       I40E_FCOE_FILTER_CTX_DESC_ENODE_INIT    = 0x00,
+       I40E_FCOE_FILTER_CTX_DESC_ENODE_RSP     = 0x02,
+       I40E_FCOE_FILTER_CTX_DESC_FC_CLASS2     = 0x00,
+       I40E_FCOE_FILTER_CTX_DESC_FC_CLASS3     = 0x04
+};
+
+#define I40E_FCOE_FILTER_CTX_QW1_FLAGS_SHIFT   0
+#define I40E_FCOE_FILTER_CTX_QW1_FLAGS_MASK    (0xFFULL << \
+                                       I40E_FCOE_FILTER_CTX_QW1_FLAGS_SHIFT)
+
+#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT     8
+#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_MASK      (0x3FULL << \
+                       I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT)
+
+#define I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT     53
+#define I40E_FCOE_FILTER_CTX_QW1_LANQINDX_MASK      (0x7FFULL << \
+                       I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT)
+
 enum i40e_switch_element_types {
        I40E_SWITCH_ELEMENT_TYPE_MAC    = 1,
        I40E_SWITCH_ELEMENT_TYPE_PF     = 2,
index fd51ec3..8fba608 100644 (file)
@@ -170,6 +170,11 @@ struct i40e_virtchnl_vsi_resource {
 #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING    0x00020000
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF                0X00080000
+#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM    0X00100000
+
+#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \
+                                   I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \
+                                   I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF)
 
 struct i40e_virtchnl_vf_resource {
        u16 num_vsis;
index ca1a480..4492bcc 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -51,6 +51,7 @@
 #include <rte_dev.h>
 #include <rte_eth_ctrl.h>
 #include <rte_tailq.h>
+#include <rte_hash_crc.h>
 
 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -62,6 +63,7 @@
 #include "i40e_rxtx.h"
 #include "i40e_pf.h"
 #include "i40e_regs.h"
+#include "rte_pmd_i40e.h"
 
 #define ETH_I40E_FLOATING_VEB_ARG      "enable_floating_veb"
 #define ETH_I40E_FLOATING_VEB_LIST_ARG "floating_veb_list"
                I40E_PFINT_ICR0_ENA_GRST_MASK | \
                I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK | \
                I40E_PFINT_ICR0_ENA_STORM_DETECT_MASK | \
-               I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK | \
                I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | \
                I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK | \
                I40E_PFINT_ICR0_ENA_VFLR_MASK | \
 #define I40E_DEFAULT_DCB_APP_NUM    1
 #define I40E_DEFAULT_DCB_APP_PRIO   3
 
-#define I40E_INSET_NONE            0x00000000000000000ULL
-
-/* bit0 ~ bit 7 */
-#define I40E_INSET_DMAC            0x0000000000000001ULL
-#define I40E_INSET_SMAC            0x0000000000000002ULL
-#define I40E_INSET_VLAN_OUTER      0x0000000000000004ULL
-#define I40E_INSET_VLAN_INNER      0x0000000000000008ULL
-#define I40E_INSET_VLAN_TUNNEL     0x0000000000000010ULL
-
-/* bit 8 ~ bit 15 */
-#define I40E_INSET_IPV4_SRC        0x0000000000000100ULL
-#define I40E_INSET_IPV4_DST        0x0000000000000200ULL
-#define I40E_INSET_IPV6_SRC        0x0000000000000400ULL
-#define I40E_INSET_IPV6_DST        0x0000000000000800ULL
-#define I40E_INSET_SRC_PORT        0x0000000000001000ULL
-#define I40E_INSET_DST_PORT        0x0000000000002000ULL
-#define I40E_INSET_SCTP_VT         0x0000000000004000ULL
-
-/* bit 16 ~ bit 31 */
-#define I40E_INSET_IPV4_TOS        0x0000000000010000ULL
-#define I40E_INSET_IPV4_PROTO      0x0000000000020000ULL
-#define I40E_INSET_IPV4_TTL        0x0000000000040000ULL
-#define I40E_INSET_IPV6_TC         0x0000000000080000ULL
-#define I40E_INSET_IPV6_FLOW       0x0000000000100000ULL
-#define I40E_INSET_IPV6_NEXT_HDR   0x0000000000200000ULL
-#define I40E_INSET_IPV6_HOP_LIMIT  0x0000000000400000ULL
-#define I40E_INSET_TCP_FLAGS       0x0000000000800000ULL
-
-/* bit 32 ~ bit 47, tunnel fields */
-#define I40E_INSET_TUNNEL_IPV4_DST       0x0000000100000000ULL
-#define I40E_INSET_TUNNEL_IPV6_DST       0x0000000200000000ULL
-#define I40E_INSET_TUNNEL_DMAC           0x0000000400000000ULL
-#define I40E_INSET_TUNNEL_SRC_PORT       0x0000000800000000ULL
-#define I40E_INSET_TUNNEL_DST_PORT       0x0000001000000000ULL
-#define I40E_INSET_TUNNEL_ID             0x0000002000000000ULL
-
-/* bit 48 ~ bit 55 */
-#define I40E_INSET_LAST_ETHER_TYPE 0x0001000000000000ULL
-
-/* bit 56 ~ bit 63, Flex Payload */
-#define I40E_INSET_FLEX_PAYLOAD_W1 0x0100000000000000ULL
-#define I40E_INSET_FLEX_PAYLOAD_W2 0x0200000000000000ULL
-#define I40E_INSET_FLEX_PAYLOAD_W3 0x0400000000000000ULL
-#define I40E_INSET_FLEX_PAYLOAD_W4 0x0800000000000000ULL
-#define I40E_INSET_FLEX_PAYLOAD_W5 0x1000000000000000ULL
-#define I40E_INSET_FLEX_PAYLOAD_W6 0x2000000000000000ULL
-#define I40E_INSET_FLEX_PAYLOAD_W7 0x4000000000000000ULL
-#define I40E_INSET_FLEX_PAYLOAD_W8 0x8000000000000000ULL
-#define I40E_INSET_FLEX_PAYLOAD \
-       (I40E_INSET_FLEX_PAYLOAD_W1 | I40E_INSET_FLEX_PAYLOAD_W2 | \
-       I40E_INSET_FLEX_PAYLOAD_W3 | I40E_INSET_FLEX_PAYLOAD_W4 | \
-       I40E_INSET_FLEX_PAYLOAD_W5 | I40E_INSET_FLEX_PAYLOAD_W6 | \
-       I40E_INSET_FLEX_PAYLOAD_W7 | I40E_INSET_FLEX_PAYLOAD_W8)
-
 /**
  * Below are values for writing un-exposed registers suggested
  * by silicon experts
 /* Source MAC address */
 #define I40E_REG_INSET_L2_SMAC                   0x1C00000000000000ULL
 /* Outer (S-Tag) VLAN tag in the outer L2 header */
-#define I40E_REG_INSET_L2_OUTER_VLAN             0x0200000000000000ULL
+#define I40E_REG_INSET_L2_OUTER_VLAN             0x0000000004000000ULL
 /* Inner (C-Tag) or single VLAN tag in the outer L2 header */
 #define I40E_REG_INSET_L2_INNER_VLAN             0x0080000000000000ULL
 /* Single VLAN tag in the inner L2 header */
 #define I40E_REG_INSET_L3_SRC_IP4                0x0001800000000000ULL
 /* Destination IPv4 address */
 #define I40E_REG_INSET_L3_DST_IP4                0x0000001800000000ULL
+/* Source IPv4 address for X722 */
+#define I40E_X722_REG_INSET_L3_SRC_IP4           0x0006000000000000ULL
+/* Destination IPv4 address for X722 */
+#define I40E_X722_REG_INSET_L3_DST_IP4           0x0000060000000000ULL
+/* IPv4 Protocol for X722 */
+#define I40E_X722_REG_INSET_L3_IP4_PROTO         0x0010000000000000ULL
+/* IPv4 Time to Live for X722 */
+#define I40E_X722_REG_INSET_L3_IP4_TTL           0x0010000000000000ULL
 /* IPv4 Type of Service (TOS) */
 #define I40E_REG_INSET_L3_IP4_TOS                0x0040000000000000ULL
 /* IPv4 Protocol */
 #define I40E_INSET_IPV6_HOP_LIMIT_MASK  0x000CFF00UL
 #define I40E_INSET_IPV6_NEXT_HDR_MASK   0x000C00FFUL
 
-#define I40E_GL_SWT_L2TAGCTRL(_i)             (0x001C0A70 + ((_i) * 4))
-#define I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT 16
-#define I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_MASK  \
-       I40E_MASK(0xFFFF, I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT)
-
 /* PCI offset for querying capability */
 #define PCI_DEV_CAP_REG            0xA4
 /* PCI offset for enabling/disabling Extended Tag */
@@ -317,6 +267,8 @@ static int i40e_dev_queue_stats_mapping_set(struct rte_eth_dev *dev,
                                            uint16_t queue_id,
                                            uint8_t stat_idx,
                                            uint8_t is_rx);
+static int i40e_fw_version_get(struct rte_eth_dev *dev,
+                               char *fw_version, size_t fw_size);
 static void i40e_dev_info_get(struct rte_eth_dev *dev,
                              struct rte_eth_dev_info *dev_info);
 static int i40e_vlan_filter_set(struct rte_eth_dev *dev,
@@ -366,8 +318,8 @@ static void i40e_stat_update_48(struct i40e_hw *hw,
                               uint64_t *offset,
                               uint64_t *stat);
 static void i40e_pf_config_irq0(struct i40e_hw *hw, bool no_queue);
-static void i40e_dev_interrupt_handler(
-               __rte_unused struct rte_intr_handle *handle, void *param);
+static void i40e_dev_interrupt_handler(struct rte_intr_handle *handle,
+                                      void *param);
 static int i40e_res_pool_init(struct i40e_res_pool_info *pool,
                                uint32_t base, uint32_t num);
 static void i40e_res_pool_destroy(struct i40e_res_pool_info *pool);
@@ -399,9 +351,6 @@ static int i40e_dev_udp_tunnel_port_add(struct rte_eth_dev *dev,
 static int i40e_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
                                        struct rte_eth_udp_tunnel *udp_tunnel);
 static void i40e_filter_input_set_init(struct i40e_pf *pf);
-static int i40e_ethertype_filter_set(struct i40e_pf *pf,
-                       struct rte_eth_ethertype_filter *filter,
-                       bool add);
 static int i40e_ethertype_filter_handle(struct rte_eth_dev *dev,
                                enum rte_filter_op filter_op,
                                void *arg);
@@ -411,6 +360,7 @@ static int i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
                                void *arg);
 static int i40e_dev_get_dcb_info(struct rte_eth_dev *dev,
                                  struct rte_eth_dcb_info *dcb_info);
+static int i40e_dev_sync_phy_type(struct i40e_hw *hw);
 static void i40e_configure_registers(struct i40e_hw *hw);
 static void i40e_hw_init(struct rte_eth_dev *dev);
 static int i40e_config_qinq(struct i40e_hw *hw, struct i40e_vsi *vsi);
@@ -453,6 +403,22 @@ static void i40e_set_default_mac_addr(struct rte_eth_dev *dev,
 
 static int i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
 
+static int i40e_ethertype_filter_convert(
+       const struct rte_eth_ethertype_filter *input,
+       struct i40e_ethertype_filter *filter);
+static int i40e_sw_ethertype_filter_insert(struct i40e_pf *pf,
+                                  struct i40e_ethertype_filter *filter);
+
+static int i40e_tunnel_filter_convert(
+       struct i40e_aqc_add_remove_cloud_filters_element_data *cld_filter,
+       struct i40e_tunnel_filter *tunnel_filter);
+static int i40e_sw_tunnel_filter_insert(struct i40e_pf *pf,
+                               struct i40e_tunnel_filter *tunnel_filter);
+
+static void i40e_ethertype_filter_restore(struct i40e_pf *pf);
+static void i40e_tunnel_filter_restore(struct i40e_pf *pf);
+static void i40e_filter_restore(struct i40e_pf *pf);
+
 static const struct rte_pci_id pci_id_i40e_map[] = {
        { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_XL710) },
        { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QEMU) },
@@ -474,7 +440,6 @@ static const struct rte_pci_id pci_id_i40e_map[] = {
        { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_1G_BASE_T_X722) },
        { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T_X722) },
        { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_I_X722) },
-       { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_I_X722) },
        { .vendor_id = 0, /* sentinel */ },
 };
 
@@ -496,6 +461,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
        .stats_reset                  = i40e_dev_stats_reset,
        .xstats_reset                 = i40e_dev_stats_reset,
        .queue_stats_mapping_set      = i40e_dev_queue_stats_mapping_set,
+       .fw_version_get               = i40e_fw_version_get,
        .dev_infos_get                = i40e_dev_info_get,
        .dev_supported_ptypes_get     = i40e_dev_supported_ptypes_get,
        .vlan_filter_set              = i40e_vlan_filter_set,
@@ -663,10 +629,10 @@ static const struct rte_i40e_xstats_name_off rte_i40e_txq_prio_strings[] = {
 
 static struct eth_driver rte_i40e_pmd = {
        .pci_drv = {
-               .name = "rte_i40e_pmd",
                .id_table = pci_id_i40e_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
-                       RTE_PCI_DRV_DETACHABLE,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_i40e_dev_init,
        .eth_dev_uninit = eth_i40e_dev_uninit,
@@ -701,33 +667,10 @@ rte_i40e_dev_atomic_write_link_status(struct rte_eth_dev *dev,
        return 0;
 }
 
-/*
- * Driver initialization routine.
- * Invoked once at EAL init time.
- * Register itself as the [Poll Mode] Driver of PCI IXGBE devices.
- */
-static int
-rte_i40e_pmd_init(const char *name __rte_unused,
-                 const char *params __rte_unused)
-{
-       PMD_INIT_FUNC_TRACE();
-       rte_eth_driver_register(&rte_i40e_pmd);
-
-       return 0;
-}
-
-static struct rte_driver rte_i40e_driver = {
-       .type = PMD_PDEV,
-       .init = rte_i40e_pmd_init,
-};
-
-PMD_REGISTER_DRIVER(rte_i40e_driver, i40e);
-DRIVER_REGISTER_PCI_TABLE(i40e, pci_id_i40e_map);
+RTE_PMD_REGISTER_PCI(net_i40e, rte_i40e_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_i40e, pci_id_i40e_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_i40e, "* igb_uio | uio_pci_generic | vfio");
 
-/*
- * Initialize registers for flexible payload, which should be set by NVM.
- * This should be removed from code once it is fixed in NVM.
- */
 #ifndef I40E_GLQF_ORT
 #define I40E_GLQF_ORT(_i)    (0x00268900 + ((_i) * 4))
 #endif
@@ -735,8 +678,12 @@ DRIVER_REGISTER_PCI_TABLE(i40e, pci_id_i40e_map);
 #define I40E_GLQF_PIT(_i)    (0x00268C80 + ((_i) * 4))
 #endif
 
-static inline void i40e_flex_payload_reg_init(struct i40e_hw *hw)
+static inline void i40e_GLQF_reg_init(struct i40e_hw *hw)
 {
+       /*
+        * Initialize registers for flexible payload, which should be set by NVM.
+        * This should be removed from code once it is fixed in NVM.
+        */
        I40E_WRITE_REG(hw, I40E_GLQF_ORT(18), 0x00000030);
        I40E_WRITE_REG(hw, I40E_GLQF_ORT(19), 0x00000030);
        I40E_WRITE_REG(hw, I40E_GLQF_ORT(26), 0x0000002B);
@@ -747,17 +694,16 @@ static inline void i40e_flex_payload_reg_init(struct i40e_hw *hw)
        I40E_WRITE_REG(hw, I40E_GLQF_ORT(20), 0x00000031);
        I40E_WRITE_REG(hw, I40E_GLQF_ORT(23), 0x00000031);
        I40E_WRITE_REG(hw, I40E_GLQF_ORT(63), 0x0000002D);
-
-       /* GLQF_PIT Registers */
        I40E_WRITE_REG(hw, I40E_GLQF_PIT(16), 0x00007480);
        I40E_WRITE_REG(hw, I40E_GLQF_PIT(17), 0x00007440);
+
+       /* Initialize registers for parsing packet type of QinQ */
+       I40E_WRITE_REG(hw, I40E_GLQF_ORT(40), 0x00000029);
+       I40E_WRITE_REG(hw, I40E_GLQF_PIT(9), 0x00009420);
 }
 
 #define I40E_FLOW_CONTROL_ETHERTYPE  0x8808
 
-#define TREX_PATCH
-#define TREX_PATCH_LOW_LATENCY
-
 /*
  * Add a ethertype filter to drop all flow control frames transmitted
  * from VSIs.
@@ -776,8 +722,8 @@ i40e_add_tx_flow_control_drop_filter(struct i40e_pf *pf)
                                pf->main_vsi_seid, 0,
                                TRUE, NULL, NULL);
        if (ret)
-               PMD_INIT_LOG(ERR, "Failed to add filter to drop flow control "
-                                 " frames from VSIs.");
+               PMD_INIT_LOG(ERR,
+                       "Failed to add filter to drop flow control frames from VSIs.");
 }
 
 static int
@@ -920,25 +866,159 @@ is_floating_veb_supported(struct rte_devargs *devargs)
 static void
 config_floating_veb(struct rte_eth_dev *dev)
 {
-       struct rte_pci_device *pci_dev = dev->pci_dev;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
        memset(pf->floating_veb_list, 0, sizeof(pf->floating_veb_list));
 
        if (hw->aq.fw_maj_ver >= FLOATING_VEB_SUPPORTED_FW_MAJ) {
-               pf->floating_veb = is_floating_veb_supported(pci_dev->devargs);
-               config_vf_floating_veb(pci_dev->devargs, pf->floating_veb,
+               pf->floating_veb =
+                       is_floating_veb_supported(pci_dev->device.devargs);
+               config_vf_floating_veb(pci_dev->device.devargs,
+                                      pf->floating_veb,
                                       pf->floating_veb_list);
        } else {
                pf->floating_veb = false;
        }
 }
 
+#define I40E_L2_TAGS_S_TAG_SHIFT 1
+#define I40E_L2_TAGS_S_TAG_MASK I40E_MASK(0x1, I40E_L2_TAGS_S_TAG_SHIFT)
+
+static int
+i40e_init_ethtype_filter_list(struct rte_eth_dev *dev)
+{
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       struct i40e_ethertype_rule *ethertype_rule = &pf->ethertype;
+       char ethertype_hash_name[RTE_HASH_NAMESIZE];
+       int ret;
+
+       struct rte_hash_parameters ethertype_hash_params = {
+               .name = ethertype_hash_name,
+               .entries = I40E_MAX_ETHERTYPE_FILTER_NUM,
+               .key_len = sizeof(struct i40e_ethertype_filter_input),
+               .hash_func = rte_hash_crc,
+       };
+
+       /* Initialize ethertype filter rule list and hash */
+       TAILQ_INIT(&ethertype_rule->ethertype_list);
+       snprintf(ethertype_hash_name, RTE_HASH_NAMESIZE,
+                "ethertype_%s", dev->data->name);
+       ethertype_rule->hash_table = rte_hash_create(&ethertype_hash_params);
+       if (!ethertype_rule->hash_table) {
+               PMD_INIT_LOG(ERR, "Failed to create ethertype hash table!");
+               return -EINVAL;
+       }
+       ethertype_rule->hash_map = rte_zmalloc("i40e_ethertype_hash_map",
+                                      sizeof(struct i40e_ethertype_filter *) *
+                                      I40E_MAX_ETHERTYPE_FILTER_NUM,
+                                      0);
+       if (!ethertype_rule->hash_map) {
+               PMD_INIT_LOG(ERR,
+                            "Failed to allocate memory for ethertype hash map!");
+               ret = -ENOMEM;
+               goto err_ethertype_hash_map_alloc;
+       }
+
+       return 0;
+
+err_ethertype_hash_map_alloc:
+       rte_hash_free(ethertype_rule->hash_table);
+
+       return ret;
+}
+
+static int
+i40e_init_tunnel_filter_list(struct rte_eth_dev *dev)
+{
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       struct i40e_tunnel_rule *tunnel_rule = &pf->tunnel;
+       char tunnel_hash_name[RTE_HASH_NAMESIZE];
+       int ret;
+
+       struct rte_hash_parameters tunnel_hash_params = {
+               .name = tunnel_hash_name,
+               .entries = I40E_MAX_TUNNEL_FILTER_NUM,
+               .key_len = sizeof(struct i40e_tunnel_filter_input),
+               .hash_func = rte_hash_crc,
+       };
+
+       /* Initialize tunnel filter rule list and hash */
+       TAILQ_INIT(&tunnel_rule->tunnel_list);
+       snprintf(tunnel_hash_name, RTE_HASH_NAMESIZE,
+                "tunnel_%s", dev->data->name);
+       tunnel_rule->hash_table = rte_hash_create(&tunnel_hash_params);
+       if (!tunnel_rule->hash_table) {
+               PMD_INIT_LOG(ERR, "Failed to create tunnel hash table!");
+               return -EINVAL;
+       }
+       tunnel_rule->hash_map = rte_zmalloc("i40e_tunnel_hash_map",
+                                   sizeof(struct i40e_tunnel_filter *) *
+                                   I40E_MAX_TUNNEL_FILTER_NUM,
+                                   0);
+       if (!tunnel_rule->hash_map) {
+               PMD_INIT_LOG(ERR,
+                            "Failed to allocate memory for tunnel hash map!");
+               ret = -ENOMEM;
+               goto err_tunnel_hash_map_alloc;
+       }
+
+       return 0;
+
+err_tunnel_hash_map_alloc:
+       rte_hash_free(tunnel_rule->hash_table);
+
+       return ret;
+}
+
+static int
+i40e_init_fdir_filter_list(struct rte_eth_dev *dev)
+{
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       struct i40e_fdir_info *fdir_info = &pf->fdir;
+       char fdir_hash_name[RTE_HASH_NAMESIZE];
+       int ret;
+
+       struct rte_hash_parameters fdir_hash_params = {
+               .name = fdir_hash_name,
+               .entries = I40E_MAX_FDIR_FILTER_NUM,
+               .key_len = sizeof(struct rte_eth_fdir_input),
+               .hash_func = rte_hash_crc,
+       };
+
+       /* Initialize flow director filter rule list and hash */
+       TAILQ_INIT(&fdir_info->fdir_list);
+       snprintf(fdir_hash_name, RTE_HASH_NAMESIZE,
+                "fdir_%s", dev->data->name);
+       fdir_info->hash_table = rte_hash_create(&fdir_hash_params);
+       if (!fdir_info->hash_table) {
+               PMD_INIT_LOG(ERR, "Failed to create fdir hash table!");
+               return -EINVAL;
+       }
+       fdir_info->hash_map = rte_zmalloc("i40e_fdir_hash_map",
+                                         sizeof(struct i40e_fdir_filter *) *
+                                         I40E_MAX_FDIR_FILTER_NUM,
+                                         0);
+       if (!fdir_info->hash_map) {
+               PMD_INIT_LOG(ERR,
+                            "Failed to allocate memory for fdir hash map!");
+               ret = -ENOMEM;
+               goto err_fdir_hash_map_alloc;
+       }
+       return 0;
+
+err_fdir_hash_map_alloc:
+       rte_hash_free(fdir_info->hash_table);
+
+       return ret;
+}
+
 static int
 eth_i40e_dev_init(struct rte_eth_dev *dev)
 {
        struct rte_pci_device *pci_dev;
+       struct rte_intr_handle *intr_handle;
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct i40e_vsi *vsi;
@@ -951,6 +1031,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
        dev->dev_ops = &i40e_eth_dev_ops;
        dev->rx_pkt_burst = i40e_recv_pkts;
        dev->tx_pkt_burst = i40e_xmit_pkts;
+       dev->tx_pkt_prepare = i40e_prep_pkts;
 
        /* for secondary processes, we don't initialise any further as primary
         * has already done this work. Only check we don't need a different
@@ -960,9 +1041,11 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
                i40e_set_tx_function(dev);
                return 0;
        }
-       pci_dev = dev->pci_dev;
+       pci_dev = I40E_DEV_TO_PCI(dev);
+       intr_handle = &pci_dev->intr_handle;
 
        rte_eth_copy_pci_info(dev, pci_dev);
+       dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 
        pf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        pf->adapter->eth_dev = dev;
@@ -971,8 +1054,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
        hw->back = I40E_PF_TO_ADAPTER(pf);
        hw->hw_addr = (uint8_t *)(pci_dev->mem_resource[0].addr);
        if (!hw->hw_addr) {
-               PMD_INIT_LOG(ERR, "Hardware is not available, "
-                            "as address is NULL");
+               PMD_INIT_LOG(ERR,
+                       "Hardware is not available, as address is NULL");
                return -ENODEV;
        }
 
@@ -1005,11 +1088,12 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
        }
 
        /*
-        * To work around the NVM issue,initialize registers
-        * for flexible payload by software.
-        * It should be removed once issues are fixed in NVM.
+        * To work around the NVM issue, initialize registers
+        * for flexible payload and packet type of QinQ by
+        * software. It should be removed once issues are fixed
+        * in NVM.
         */
-       i40e_flex_payload_reg_init(hw);
+       i40e_GLQF_reg_init(hw);
 
        /* Initialize the input set for filters (hash and fd) to default value */
        i40e_filter_input_set_init(pf);
@@ -1032,7 +1116,11 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
        config_floating_veb(dev);
        /* Clear PXE mode */
        i40e_clear_pxe_mode(hw);
-
+       ret = i40e_dev_sync_phy_type(hw);
+       if (ret) {
+               PMD_INIT_LOG(ERR, "Failed to sync phy type: %d", ret);
+               goto err_sync_phy_type;
+       }
        /*
         * On X710, performance number is far from the expectation on recent
         * firmware versions. The fix for this issue may not be integrated in
@@ -1103,8 +1191,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
        /* Set the global registers with default ether type value */
        ret = i40e_vlan_tpid_set(dev, ETH_VLAN_TYPE_OUTER, ETHER_TYPE_VLAN);
        if (ret != I40E_SUCCESS) {
-               PMD_INIT_LOG(ERR, "Failed to set the default outer "
-                            "VLAN ether type");
+               PMD_INIT_LOG(ERR,
+                       "Failed to set the default outer VLAN ether type");
                goto err_setup_pf_switch;
        }
 
@@ -1123,6 +1211,15 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
        /* Disable double vlan by default */
        i40e_vsi_config_double_vlan(vsi, FALSE);
 
+       /* Disable S-TAG identification when floating_veb is disabled */
+       if (!pf->floating_veb) {
+               ret = I40E_READ_REG(hw, I40E_PRT_L2TAGSEN);
+               if (ret & I40E_L2_TAGS_S_TAG_MASK) {
+                       ret &= ~I40E_L2_TAGS_S_TAG_MASK;
+                       I40E_WRITE_REG(hw, I40E_PRT_L2TAGSEN, ret);
+               }
+       }
+
        if (!vsi->max_macaddrs)
                len = ETHER_ADDR_LEN;
        else
@@ -1131,8 +1228,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
        /* Should be after VSI initialized */
        dev->data->mac_addrs = rte_zmalloc("i40e", len, 0);
        if (!dev->data->mac_addrs) {
-               PMD_INIT_LOG(ERR, "Failed to allocated memory "
-                                       "for storing mac address");
+               PMD_INIT_LOG(ERR,
+                       "Failed to allocated memory for storing mac address");
                goto err_mac_alloc;
        }
        ether_addr_copy((struct ether_addr *)hw->mac.perm_addr,
@@ -1142,15 +1239,15 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
        i40e_pf_host_init(dev);
 
        /* register callback func to eal lib */
-       rte_intr_callback_register(&(pci_dev->intr_handle),
-               i40e_dev_interrupt_handler, (void *)dev);
+       rte_intr_callback_register(intr_handle,
+                                  i40e_dev_interrupt_handler, dev);
 
        /* configure and enable device interrupt */
        i40e_pf_config_irq0(hw, TRUE);
        i40e_pf_enable_irq0(hw);
 
        /* enable uio intr after callback register */
-       rte_intr_enable(&(pci_dev->intr_handle));
+       rte_intr_enable(intr_handle);
        /*
         * Add an ethertype filter to drop all flow control frames transmitted
         * from VSIs. By doing so, we stop VF from sending out PAUSE or PFC
@@ -1173,8 +1270,26 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
                pf->flags &= ~I40E_FLAG_DCB;
        }
 
+       ret = i40e_init_ethtype_filter_list(dev);
+       if (ret < 0)
+               goto err_init_ethtype_filter_list;
+       ret = i40e_init_tunnel_filter_list(dev);
+       if (ret < 0)
+               goto err_init_tunnel_filter_list;
+       ret = i40e_init_fdir_filter_list(dev);
+       if (ret < 0)
+               goto err_init_fdir_filter_list;
+
        return 0;
 
+err_init_fdir_filter_list:
+       rte_free(pf->tunnel.hash_table);
+       rte_free(pf->tunnel.hash_map);
+err_init_tunnel_filter_list:
+       rte_free(pf->ethertype.hash_table);
+       rte_free(pf->ethertype.hash_map);
+err_init_ethtype_filter_list:
+       rte_free(dev->data->mac_addrs);
 err_mac_alloc:
        i40e_vsi_release(pf->main_vsi);
 err_setup_pf_switch:
@@ -1188,17 +1303,79 @@ err_msix_pool_init:
 err_qp_pool_init:
 err_parameter_init:
 err_get_capabilities:
+err_sync_phy_type:
        (void)i40e_shutdown_adminq(hw);
 
        return ret;
 }
 
+static void
+i40e_rm_ethtype_filter_list(struct i40e_pf *pf)
+{
+       struct i40e_ethertype_filter *p_ethertype;
+       struct i40e_ethertype_rule *ethertype_rule;
+
+       ethertype_rule = &pf->ethertype;
+       /* Remove all ethertype filter rules and hash */
+       if (ethertype_rule->hash_map)
+               rte_free(ethertype_rule->hash_map);
+       if (ethertype_rule->hash_table)
+               rte_hash_free(ethertype_rule->hash_table);
+
+       while ((p_ethertype = TAILQ_FIRST(&ethertype_rule->ethertype_list))) {
+               TAILQ_REMOVE(&ethertype_rule->ethertype_list,
+                            p_ethertype, rules);
+               rte_free(p_ethertype);
+       }
+}
+
+static void
+i40e_rm_tunnel_filter_list(struct i40e_pf *pf)
+{
+       struct i40e_tunnel_filter *p_tunnel;
+       struct i40e_tunnel_rule *tunnel_rule;
+
+       tunnel_rule = &pf->tunnel;
+       /* Remove all tunnel director rules and hash */
+       if (tunnel_rule->hash_map)
+               rte_free(tunnel_rule->hash_map);
+       if (tunnel_rule->hash_table)
+               rte_hash_free(tunnel_rule->hash_table);
+
+       while ((p_tunnel = TAILQ_FIRST(&tunnel_rule->tunnel_list))) {
+               TAILQ_REMOVE(&tunnel_rule->tunnel_list, p_tunnel, rules);
+               rte_free(p_tunnel);
+       }
+}
+
+static void
+i40e_rm_fdir_filter_list(struct i40e_pf *pf)
+{
+       struct i40e_fdir_filter *p_fdir;
+       struct i40e_fdir_info *fdir_info;
+
+       fdir_info = &pf->fdir;
+       /* Remove all flow director rules and hash */
+       if (fdir_info->hash_map)
+               rte_free(fdir_info->hash_map);
+       if (fdir_info->hash_table)
+               rte_hash_free(fdir_info->hash_table);
+
+       while ((p_fdir = TAILQ_FIRST(&fdir_info->fdir_list))) {
+               TAILQ_REMOVE(&fdir_info->fdir_list, p_fdir, rules);
+               rte_free(p_fdir);
+       }
+}
+
 static int
 eth_i40e_dev_uninit(struct rte_eth_dev *dev)
 {
+       struct i40e_pf *pf;
        struct rte_pci_device *pci_dev;
+       struct rte_intr_handle *intr_handle;
        struct i40e_hw *hw;
        struct i40e_filter_control_settings settings;
+       struct rte_flow *p_flow;
        int ret;
        uint8_t aq_fail = 0;
 
@@ -1207,8 +1384,10 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
                return 0;
 
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       pci_dev = dev->pci_dev;
+       pci_dev = I40E_DEV_TO_PCI(dev);
+       intr_handle = &pci_dev->intr_handle;
 
        if (hw->adapter_stopped == 0)
                i40e_dev_close(dev);
@@ -1217,11 +1396,6 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)
        dev->rx_pkt_burst = NULL;
        dev->tx_pkt_burst = NULL;
 
-       /* Disable LLDP */
-       ret = i40e_aq_stop_lldp(hw, true, NULL);
-       if (ret != I40E_SUCCESS) /* Its failure can be ignored */
-               PMD_INIT_LOG(INFO, "Failed to stop lldp");
-
        /* Clear PXE mode */
        i40e_clear_pxe_mode(hw);
 
@@ -1243,11 +1417,21 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)
        dev->data->mac_addrs = NULL;
 
        /* disable uio intr before callback unregister */
-       rte_intr_disable(&(pci_dev->intr_handle));
+       rte_intr_disable(intr_handle);
 
        /* register callback func to eal lib */
-       rte_intr_callback_unregister(&(pci_dev->intr_handle),
-               i40e_dev_interrupt_handler, (void *)dev);
+       rte_intr_callback_unregister(intr_handle,
+                                    i40e_dev_interrupt_handler, dev);
+
+       i40e_rm_ethtype_filter_list(pf);
+       i40e_rm_tunnel_filter_list(pf);
+       i40e_rm_fdir_filter_list(pf);
+
+       /* Remove all flows */
+       while ((p_flow = TAILQ_FIRST(&pf->flow_list))) {
+               TAILQ_REMOVE(&pf->flow_list, p_flow, node);
+               rte_free(p_flow);
+       }
 
        return 0;
 }
@@ -1313,6 +1497,8 @@ i40e_dev_configure(struct rte_eth_dev *dev)
                }
        }
 
+       TAILQ_INIT(&pf->flow_list);
+
        return 0;
 
 err_dcb:
@@ -1333,7 +1519,8 @@ void
 i40e_vsi_queues_unbind_intr(struct i40e_vsi *vsi)
 {
        struct rte_eth_dev *dev = vsi->adapter->eth_dev;
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
        uint16_t msix_vect = vsi->msix_intr;
        uint16_t i;
@@ -1446,7 +1633,8 @@ void
 i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi)
 {
        struct rte_eth_dev *dev = vsi->adapter->eth_dev;
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
        uint16_t msix_vect = vsi->msix_intr;
        uint16_t nb_msix = RTE_MIN(vsi->nb_msix, intr_handle->nb_efd);
@@ -1517,7 +1705,8 @@ static void
 i40e_vsi_enable_queues_intr(struct i40e_vsi *vsi)
 {
        struct rte_eth_dev *dev = vsi->adapter->eth_dev;
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
        uint16_t interval = i40e_calc_itr_interval(\
                RTE_LIBRTE_I40E_ITR_INTERVAL);
@@ -1548,7 +1737,8 @@ static void
 i40e_vsi_disable_queues_intr(struct i40e_vsi *vsi)
 {
        struct rte_eth_dev *dev = vsi->adapter->eth_dev;
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
        uint16_t msix_intr, i;
 
@@ -1571,6 +1761,8 @@ i40e_parse_link_speeds(uint16_t link_speeds)
 
        if (link_speeds & ETH_LINK_SPEED_40G)
                link_speed |= I40E_LINK_SPEED_40GB;
+       if (link_speeds & ETH_LINK_SPEED_25G)
+               link_speed |= I40E_LINK_SPEED_25GB;
        if (link_speeds & ETH_LINK_SPEED_20G)
                link_speed |= I40E_LINK_SPEED_20GB;
        if (link_speeds & ETH_LINK_SPEED_10G)
@@ -1596,6 +1788,7 @@ i40e_phy_conf_link(struct i40e_hw *hw,
                        I40E_AQ_PHY_FLAG_PAUSE_RX |
                        I40E_AQ_PHY_FLAG_LOW_POWER;
        const uint8_t advt = I40E_LINK_SPEED_40GB |
+                       I40E_LINK_SPEED_25GB |
                        I40E_LINK_SPEED_10GB |
                        I40E_LINK_SPEED_1GB |
                        I40E_LINK_SPEED_100MB;
@@ -1623,6 +1816,8 @@ i40e_phy_conf_link(struct i40e_hw *hw,
 
        /* use get_phy_abilities_resp value for the rest */
        phy_conf.phy_type = phy_ab.phy_type;
+       phy_conf.phy_type_ext = phy_ab.phy_type_ext;
+       phy_conf.fec_config = phy_ab.fec_cfg_curr_mod_ext_info;
        phy_conf.eee_capability = phy_ab.eee_capability;
        phy_conf.eeer = phy_ab.eeer_val;
        phy_conf.low_power_ctrl = phy_ab.d3_lpan;
@@ -1654,7 +1849,7 @@ i40e_apply_link_speed(struct rte_eth_dev *dev)
        abilities |= I40E_AQ_PHY_LINK_ENABLED;
 
        /* Skip changing speed on 40G interfaces, FW does not support */
-       if (i40e_is_40G_device(hw->device_id)) {
+       if (I40E_PHY_TYPE_SUPPORT_40G(hw->phy.phy_types)) {
                speed =  I40E_LINK_SPEED_UNKNOWN;
                abilities |= I40E_AQ_PHY_AN_ENABLED;
        }
@@ -1669,7 +1864,8 @@ i40e_dev_start(struct rte_eth_dev *dev)
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct i40e_vsi *main_vsi = pf->main_vsi;
        int ret, i;
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        uint32_t intr_vector = 0;
 
        hw->adapter_stopped = 0;
@@ -1686,8 +1882,9 @@ i40e_dev_start(struct rte_eth_dev *dev)
             !RTE_ETH_DEV_SRIOV(dev).active) &&
            dev->data->dev_conf.intr_conf.rxq != 0) {
                intr_vector = dev->data->nb_rx_queues;
-               if (rte_intr_efd_enable(intr_handle, intr_vector))
-                       return -1;
+               ret = rte_intr_efd_enable(intr_handle, intr_vector);
+               if (ret)
+                       return ret;
        }
 
        if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
@@ -1696,8 +1893,9 @@ i40e_dev_start(struct rte_eth_dev *dev)
                                    dev->data->nb_rx_queues * sizeof(int),
                                    0);
                if (!intr_handle->intr_vec) {
-                       PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
-                                    " intr_vec\n", dev->data->nb_rx_queues);
+                       PMD_INIT_LOG(ERR,
+                               "Failed to allocate %d rx_queues intr_vec",
+                               dev->data->nb_rx_queues);
                        return -ENOMEM;
                }
        }
@@ -1750,7 +1948,8 @@ i40e_dev_start(struct rte_eth_dev *dev)
        /* Apply link configure */
        if (dev->data->dev_conf.link_speeds & ~(ETH_LINK_SPEED_100M |
                                ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G |
-                               ETH_LINK_SPEED_20G | ETH_LINK_SPEED_40G)) {
+                               ETH_LINK_SPEED_20G | ETH_LINK_SPEED_25G |
+                               ETH_LINK_SPEED_40G)) {
                PMD_DRV_LOG(ERR, "Invalid link setting");
                goto err_up;
        }
@@ -1769,13 +1968,25 @@ i40e_dev_start(struct rte_eth_dev *dev)
                i40e_pf_enable_irq0(hw);
 
                if (dev->data->dev_conf.intr_conf.lsc != 0)
-                       PMD_INIT_LOG(INFO, "lsc won't enable because of"
-                                    " no intr multiplex\n");
+                       PMD_INIT_LOG(INFO,
+                               "lsc won't enable because of no intr multiplex");
+       } else if (dev->data->dev_conf.intr_conf.lsc != 0) {
+               ret = i40e_aq_set_phy_int_mask(hw,
+                                              ~(I40E_AQ_EVENT_LINK_UPDOWN |
+                                              I40E_AQ_EVENT_MODULE_QUAL_FAIL |
+                                              I40E_AQ_EVENT_MEDIA_NA), NULL);
+               if (ret != I40E_SUCCESS)
+                       PMD_DRV_LOG(WARNING, "Fail to set phy mask");
+
+               /* Call get_link_info aq commond to enable LSE */
+               i40e_dev_link_update(dev, 0);
        }
 
        /* enable uio intr after callback register */
        rte_intr_enable(intr_handle);
 
+       i40e_filter_restore(pf);
+
        return I40E_SUCCESS;
 
 err_up:
@@ -1791,7 +2002,8 @@ i40e_dev_stop(struct rte_eth_dev *dev)
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_vsi *main_vsi = pf->main_vsi;
        struct i40e_mirror_rule *p_mirror;
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        int i;
 
        /* Disable all queues */
@@ -1842,6 +2054,8 @@ i40e_dev_close(struct rte_eth_dev *dev)
 {
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        uint32_t reg;
        int i;
 
@@ -1853,23 +2067,22 @@ i40e_dev_close(struct rte_eth_dev *dev)
 
        /* Disable interrupt */
        i40e_pf_disable_irq0(hw);
-       rte_intr_disable(&(dev->pci_dev->intr_handle));
+       rte_intr_disable(intr_handle);
 
        /* shutdown and destroy the HMC */
        i40e_shutdown_lan_hmc(hw);
 
-       /* release all the existing VSIs and VEBs */
-       i40e_fdir_teardown(pf);
-       i40e_vsi_release(pf->main_vsi);
-
        for (i = 0; i < pf->nb_cfg_vmdq_vsi; i++) {
                i40e_vsi_release(pf->vmdq[i].vsi);
                pf->vmdq[i].vsi = NULL;
        }
-
        rte_free(pf->vmdq);
        pf->vmdq = NULL;
 
+       /* release all the existing VSIs and VEBs */
+       i40e_fdir_teardown(pf);
+       i40e_vsi_release(pf->main_vsi);
+
        /* shutdown the adminq */
        i40e_aq_queue_shutdown(hw, true);
        i40e_shutdown_adminq(hw);
@@ -1970,9 +2183,10 @@ static int
 i40e_dev_set_link_down(struct rte_eth_dev *dev)
 {
        uint8_t speed = I40E_LINK_SPEED_UNKNOWN;
-       uint8_t abilities = I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
+       uint8_t abilities = 0;
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
+       abilities = I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
        return i40e_phy_conf_link(hw, abilities, speed);
 }
 
@@ -1987,6 +2201,7 @@ i40e_dev_link_update(struct rte_eth_dev *dev,
        struct rte_eth_link link, old;
        int status;
        unsigned rep_cnt = MAX_REPEAT_TIME;
+       bool enable_lse = dev->data->dev_conf.intr_conf.lsc ? true : false;
 
        memset(&link, 0, sizeof(link));
        memset(&old, 0, sizeof(old));
@@ -1995,7 +2210,8 @@ i40e_dev_link_update(struct rte_eth_dev *dev,
 
        do {
                /* Get link status information from hardware */
-               status = i40e_aq_get_link_info(hw, false, &link_status, NULL);
+               status = i40e_aq_get_link_info(hw, enable_lse,
+                                               &link_status, NULL);
                if (status != I40E_SUCCESS) {
                        link.link_speed = ETH_SPEED_NUM_100M;
                        link.link_duplex = ETH_LINK_FULL_DUPLEX;
@@ -2030,6 +2246,9 @@ i40e_dev_link_update(struct rte_eth_dev *dev,
        case I40E_LINK_SPEED_20GB:
                link.link_speed = ETH_SPEED_NUM_20G;
                break;
+       case I40E_LINK_SPEED_25GB:
+               link.link_speed = ETH_SPEED_NUM_25G;
+               break;
        case I40E_LINK_SPEED_40GB:
                link.link_speed = ETH_SPEED_NUM_40G;
                break;
@@ -2296,11 +2515,9 @@ i40e_read_stats_registers(struct i40e_pf *pf, struct i40e_hw *hw)
                            I40E_GLPRT_PTC9522L(hw->port),
                            pf->offset_loaded, &os->tx_size_big,
                            &ns->tx_size_big);
-#ifndef TREX_PATCH
        i40e_stat_update_32(hw, I40E_GLQF_PCNT(pf->fdir.match_counter_index),
                           pf->offset_loaded,
                           &os->fd_sb_match, &ns->fd_sb_match);
-#endif
        /* GLPRT_MSPDC not supported */
        /* GLPRT_XEC not supported */
 
@@ -2310,46 +2527,6 @@ i40e_read_stats_registers(struct i40e_pf *pf, struct i40e_hw *hw)
                i40e_update_vsi_stats(pf->main_vsi);
 }
 
-//TREX_PATCH
-// fill stats array with fdir rules match count statistics
-// Notice that we read statistics from start to start + len, but we fill the stats are
-//  starting from 0 with len values
-void
-i40e_trex_fdir_stats_get(struct rte_eth_dev *dev, uint32_t *stats, uint32_t start, uint32_t len)
-{
-    int i;
-    struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-
-    for (i = 0; i < len; i++) {
-        stats[i] = I40E_READ_REG(hw, I40E_GLQF_PCNT(i + start));
-    }
-}
-
-// TREX_PATCH
-void
-i40e_trex_fdir_stats_reset(struct rte_eth_dev *dev, uint32_t *stats, uint32_t start, uint32_t len)
-{
-    int i;
-    struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-
-    for (i = 0; i < len; i++) {
-        if (stats) {
-            stats[i] = I40E_READ_REG(hw, I40E_GLQF_PCNT(i + start));
-        }
-        I40E_WRITE_REG(hw, I40E_GLQF_PCNT(i + start), 0xffffffff);
-    }
-}
-
-// TREX_PATCH
-int
-i40e_trex_get_fw_ver(struct rte_eth_dev *dev, uint32_t *nvm_ver)
-{
-    struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-
-    *nvm_ver = hw->nvm.version;
-    return 0;
-}
-
 /* Get all statistics of a port */
 static void
 i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
@@ -2366,17 +2543,10 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
                        pf->main_vsi->eth_stats.rx_multicast +
                        pf->main_vsi->eth_stats.rx_broadcast -
                        pf->main_vsi->eth_stats.rx_discards;
-#ifndef TREX_PATCH
        stats->opackets = pf->main_vsi->eth_stats.tx_unicast +
                        pf->main_vsi->eth_stats.tx_multicast +
                        pf->main_vsi->eth_stats.tx_broadcast;
        stats->ibytes   = ns->eth.rx_bytes;
-#else
-    /* Hanoch: move to global transmit and not pf->vsi and we have two high and low priorty */
-    stats->opackets = ns->eth.tx_unicast +ns->eth.tx_multicast +ns->eth.tx_broadcast;
-       stats->ibytes   = pf->main_vsi->eth_stats.rx_bytes;
-#endif
-
        stats->obytes   = ns->eth.tx_bytes;
        stats->oerrors  = ns->eth.tx_errors +
                        pf->main_vsi->eth_stats.tx_errors;
@@ -2557,6 +2727,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
        for (i = 0; i < I40E_NB_ETH_XSTATS; i++) {
                xstats[count].value = *(uint64_t *)(((char *)&hw_stats->eth) +
                        rte_i40e_stats_strings[i].offset);
+               xstats[count].id = count;
                count++;
        }
 
@@ -2564,6 +2735,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
        for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) {
                xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
                        rte_i40e_hw_port_strings[i].offset);
+               xstats[count].id = count;
                count++;
        }
 
@@ -2573,6 +2745,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
                                *(uint64_t *)(((char *)hw_stats) +
                                rte_i40e_rxq_prio_strings[i].offset +
                                (sizeof(uint64_t) * prio));
+                       xstats[count].id = count;
                        count++;
                }
        }
@@ -2583,6 +2756,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
                                *(uint64_t *)(((char *)hw_stats) +
                                rte_i40e_txq_prio_strings[i].offset +
                                (sizeof(uint64_t) * prio));
+                       xstats[count].id = count;
                        count++;
                }
        }
@@ -2601,19 +2775,49 @@ i40e_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *dev,
        return -ENOSYS;
 }
 
+static int
+i40e_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size)
+{
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       u32 full_ver;
+       u8 ver, patch;
+       u16 build;
+       int ret;
+
+       full_ver = hw->nvm.oem_ver;
+       ver = (u8)(full_ver >> 24);
+       build = (u16)((full_ver >> 8) & 0xffff);
+       patch = (u8)(full_ver & 0xff);
+
+       ret = snprintf(fw_version, fw_size,
+                "%d.%d%d 0x%08x %d.%d.%d",
+                ((hw->nvm.version >> 12) & 0xf),
+                ((hw->nvm.version >> 4) & 0xff),
+                (hw->nvm.version & 0xf), hw->nvm.eetrack,
+                ver, build, patch);
+
+       ret += 1; /* add the size of '\0' */
+       if (fw_size < (u32)ret)
+               return ret;
+       else
+               return 0;
+}
+
 static void
 i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct i40e_vsi *vsi = pf->main_vsi;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
 
+       dev_info->pci_dev = pci_dev;
        dev_info->max_rx_queues = vsi->nb_qps;
        dev_info->max_tx_queues = vsi->nb_qps;
        dev_info->min_rx_bufsize = I40E_BUF_SIZE_MIN;
        dev_info->max_rx_pktlen = I40E_FRAME_SIZE_MAX;
        dev_info->max_mac_addrs = vsi->max_macaddrs;
-       dev_info->max_vfs = dev->pci_dev->max_vfs;
+       dev_info->max_vfs = pci_dev->max_vfs;
        dev_info->rx_offload_capa =
                DEV_RX_OFFLOAD_VLAN_STRIP |
                DEV_RX_OFFLOAD_QINQ_STRIP |
@@ -2628,7 +2832,11 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                DEV_TX_OFFLOAD_TCP_CKSUM |
                DEV_TX_OFFLOAD_SCTP_CKSUM |
                DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
-               DEV_TX_OFFLOAD_TCP_TSO;
+               DEV_TX_OFFLOAD_TCP_TSO |
+               DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+               DEV_TX_OFFLOAD_GRE_TNL_TSO |
+               DEV_TX_OFFLOAD_IPIP_TNL_TSO |
+               DEV_TX_OFFLOAD_GENEVE_TNL_TSO;
        dev_info->hash_key_size = (I40E_PFQF_HKEY_MAX_INDEX + 1) *
                                                sizeof(uint32_t);
        dev_info->reta_size = pf->hash_lut_size;
@@ -2666,6 +2874,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                .nb_max = I40E_MAX_RING_DESC,
                .nb_min = I40E_MIN_RING_DESC,
                .nb_align = I40E_ALIGN_RING_DESC,
+               .nb_seg_max = I40E_TX_MAX_SEG,
+               .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
        };
 
        if (pf->flags & I40E_FLAG_VMDQ) {
@@ -2678,9 +2888,12 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                dev_info->max_tx_queues += dev_info->vmdq_queue_num;
        }
 
-       if (i40e_is_40G_device(hw->device_id))
+       if (I40E_PHY_TYPE_SUPPORT_40G(hw->phy.phy_types))
                /* For XL710 */
                dev_info->speed_capa = ETH_LINK_SPEED_40G;
+       else if (I40E_PHY_TYPE_SUPPORT_25G(hw->phy.phy_types))
+               /* For XXV710 */
+               dev_info->speed_capa = ETH_LINK_SPEED_25G;
        else
                /* For X710 */
                dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G;
@@ -2723,7 +2936,7 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev,
                else {
                        ret = -EINVAL;
                        PMD_DRV_LOG(ERR,
-                               "Unsupported vlan type in single vlan.\n");
+                               "Unsupported vlan type in single vlan.");
                        return ret;
                }
                break;
@@ -2735,13 +2948,15 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev,
        ret = i40e_aq_debug_read_register(hw, I40E_GL_SWT_L2TAGCTRL(reg_id),
                                          &reg_r, NULL);
        if (ret != I40E_SUCCESS) {
-               PMD_DRV_LOG(ERR, "Fail to debug read from "
-                           "I40E_GL_SWT_L2TAGCTRL[%d]", reg_id);
+               PMD_DRV_LOG(ERR,
+                          "Fail to debug read from I40E_GL_SWT_L2TAGCTRL[%d]",
+                          reg_id);
                ret = -EIO;
                return ret;
        }
-       PMD_DRV_LOG(DEBUG, "Debug read from I40E_GL_SWT_L2TAGCTRL[%d]: "
-                   "0x%08"PRIx64"", reg_id, reg_r);
+       PMD_DRV_LOG(DEBUG,
+               "Debug read from I40E_GL_SWT_L2TAGCTRL[%d]: 0x%08"PRIx64,
+               reg_id, reg_r);
 
        reg_w = reg_r & (~(I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_MASK));
        reg_w |= ((uint64_t)tpid << I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT);
@@ -2755,12 +2970,14 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev,
                                           reg_w, NULL);
        if (ret != I40E_SUCCESS) {
                ret = -EIO;
-               PMD_DRV_LOG(ERR, "Fail to debug write to "
-                           "I40E_GL_SWT_L2TAGCTRL[%d]", reg_id);
+               PMD_DRV_LOG(ERR,
+                       "Fail to debug write to I40E_GL_SWT_L2TAGCTRL[%d]",
+                       reg_id);
                return ret;
        }
-       PMD_DRV_LOG(DEBUG, "Debug write 0x%08"PRIx64" to "
-                   "I40E_GL_SWT_L2TAGCTRL[%d]", reg_w, reg_id);
+       PMD_DRV_LOG(DEBUG,
+               "Debug write 0x%08"PRIx64" to I40E_GL_SWT_L2TAGCTRL[%d]",
+               reg_w, reg_id);
 
        return ret;
 }
@@ -2904,8 +3121,9 @@ i40e_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
        max_high_water = I40E_RXPBSIZE >> I40E_KILOSHIFT;
        if ((fc_conf->high_water > max_high_water) ||
                        (fc_conf->high_water < fc_conf->low_water)) {
-               PMD_INIT_LOG(ERR, "Invalid high/low water setup value in KB, "
-                       "High_water must <= %d.", max_high_water);
+               PMD_INIT_LOG(ERR,
+                       "Invalid high/low water setup value in KB, High_water must be <= %d.",
+                       max_high_water);
                return -EINVAL;
        }
 
@@ -2926,7 +3144,7 @@ i40e_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
        if (err < 0)
                return -ENOSYS;
 
-       if (i40e_is_40G_device(hw->device_id)) {
+       if (I40E_PHY_TYPE_SUPPORT_40G(hw->phy.phy_types)) {
                /* Configure flow control refresh threshold,
                 * the value for stat_tx_pause_refresh_timer[8]
                 * is used for global pause operation.
@@ -3077,8 +3295,8 @@ i40e_macaddr_remove(struct rte_eth_dev *dev, uint32_t index)
                                /* No VMDQ pool enabled or configured */
                                if (!(pf->flags & I40E_FLAG_VMDQ) ||
                                        (i > pf->nb_cfg_vmdq_vsi)) {
-                                       PMD_DRV_LOG(ERR, "No VMDQ pool enabled"
-                                                       "/configured");
+                                       PMD_DRV_LOG(ERR,
+                                               "No VMDQ pool enabled/configured");
                                        return;
                                }
                                vsi = pf->vmdq[i - 1].vsi;
@@ -3279,9 +3497,9 @@ i40e_dev_rss_reta_update(struct rte_eth_dev *dev,
 
        if (reta_size != lut_size ||
                reta_size > ETH_RSS_RETA_SIZE_512) {
-               PMD_DRV_LOG(ERR, "The size of hash lookup table configured "
-                       "(%d) doesn't match the number hardware can supported "
-                                       "(%d)\n", reta_size, lut_size);
+               PMD_DRV_LOG(ERR,
+                       "The size of hash lookup table configured (%d) doesn't match the number hardware can supported (%d)",
+                       reta_size, lut_size);
                return -EINVAL;
        }
 
@@ -3320,9 +3538,9 @@ i40e_dev_rss_reta_query(struct rte_eth_dev *dev,
 
        if (reta_size != lut_size ||
                reta_size > ETH_RSS_RETA_SIZE_512) {
-               PMD_DRV_LOG(ERR, "The size of hash lookup table configured "
-                       "(%d) doesn't match the number hardware can supported "
-                                       "(%d)\n", reta_size, lut_size);
+               PMD_DRV_LOG(ERR,
+                       "The size of hash lookup table configured (%d) doesn't match the number hardware can supported (%d)",
+                       reta_size, lut_size);
                return -EINVAL;
        }
 
@@ -3377,8 +3595,9 @@ i40e_allocate_dma_mem_d(__attribute__((unused)) struct i40e_hw *hw,
        mem->va = mz->addr;
        mem->pa = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
        mem->zone = (const void *)mz;
-       PMD_DRV_LOG(DEBUG, "memzone %s allocated with physical address: "
-                   "%"PRIu64, mz->name, mem->pa);
+       PMD_DRV_LOG(DEBUG,
+               "memzone %s allocated with physical address: %"PRIu64,
+               mz->name, mem->pa);
 
        return I40E_SUCCESS;
 }
@@ -3395,9 +3614,9 @@ i40e_free_dma_mem_d(__attribute__((unused)) struct i40e_hw *hw,
        if (!mem)
                return I40E_ERR_PARAM;
 
-       PMD_DRV_LOG(DEBUG, "memzone %s to be freed with physical address: "
-                   "%"PRIu64, ((const struct rte_memzone *)mem->zone)->name,
-                   mem->pa);
+       PMD_DRV_LOG(DEBUG,
+               "memzone %s to be freed with physical address: %"PRIu64,
+               ((const struct rte_memzone *)mem->zone)->name, mem->pa);
        rte_memzone_free((const struct rte_memzone *)mem->zone);
        mem->zone = NULL;
        mem->va = NULL;
@@ -3508,9 +3727,10 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev)
 {
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
        uint16_t qp_count = 0, vsi_count = 0;
 
-       if (dev->pci_dev->max_vfs && !hw->func_caps.sr_iov_1_1) {
+       if (pci_dev->max_vfs && !hw->func_caps.sr_iov_1_1) {
                PMD_INIT_LOG(ERR, "HW configuration doesn't support SRIOV");
                return -EINVAL;
        }
@@ -3551,13 +3771,13 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev)
 
        /* VF queue/VSI allocation */
        pf->vf_qp_offset = pf->lan_qp_offset + pf->lan_nb_qps;
-       if (hw->func_caps.sr_iov_1_1 && dev->pci_dev->max_vfs) {
+       if (hw->func_caps.sr_iov_1_1 && pci_dev->max_vfs) {
                pf->flags |= I40E_FLAG_SRIOV;
                pf->vf_nb_qps = RTE_LIBRTE_I40E_QUEUE_NUM_PER_VF;
-               pf->vf_num = dev->pci_dev->max_vfs;
-               PMD_DRV_LOG(DEBUG, "%u VF VSIs, %u queues per VF VSI, "
-                           "in total %u queues", pf->vf_num, pf->vf_nb_qps,
-                           pf->vf_nb_qps * pf->vf_num);
+               pf->vf_num = pci_dev->max_vfs;
+               PMD_DRV_LOG(DEBUG,
+                       "%u VF VSIs, %u queues per VF VSI, in total %u queues",
+                       pf->vf_num, pf->vf_nb_qps, pf->vf_nb_qps * pf->vf_num);
        } else {
                pf->vf_nb_qps = 0;
                pf->vf_num = 0;
@@ -3585,14 +3805,13 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev)
                        if (pf->max_nb_vmdq_vsi) {
                                pf->flags |= I40E_FLAG_VMDQ;
                                pf->vmdq_nb_qps = pf->vmdq_nb_qp_max;
-                               PMD_DRV_LOG(DEBUG, "%u VMDQ VSIs, %u queues "
-                                           "per VMDQ VSI, in total %u queues",
-                                           pf->max_nb_vmdq_vsi,
-                                           pf->vmdq_nb_qps, pf->vmdq_nb_qps *
-                                           pf->max_nb_vmdq_vsi);
+                               PMD_DRV_LOG(DEBUG,
+                                       "%u VMDQ VSIs, %u queues per VMDQ VSI, in total %u queues",
+                                       pf->max_nb_vmdq_vsi, pf->vmdq_nb_qps,
+                                       pf->vmdq_nb_qps * pf->max_nb_vmdq_vsi);
                        } else {
-                               PMD_DRV_LOG(INFO, "No enough queues left for "
-                                           "VMDq");
+                               PMD_DRV_LOG(INFO,
+                                       "No enough queues left for VMDq");
                        }
                } else {
                        PMD_DRV_LOG(INFO, "No queue or VSI left for VMDq");
@@ -3605,15 +3824,15 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev)
                pf->flags |= I40E_FLAG_DCB;
 
        if (qp_count > hw->func_caps.num_tx_qp) {
-               PMD_DRV_LOG(ERR, "Failed to allocate %u queues, which exceeds "
-                           "the hardware maximum %u", qp_count,
-                           hw->func_caps.num_tx_qp);
+               PMD_DRV_LOG(ERR,
+                       "Failed to allocate %u queues, which exceeds the hardware maximum %u",
+                       qp_count, hw->func_caps.num_tx_qp);
                return -EINVAL;
        }
        if (vsi_count > hw->func_caps.num_vsis) {
-               PMD_DRV_LOG(ERR, "Failed to allocate %u VSIs, which exceeds "
-                           "the hardware maximum %u", vsi_count,
-                           hw->func_caps.num_vsis);
+               PMD_DRV_LOG(ERR,
+                       "Failed to allocate %u VSIs, which exceeds the hardware maximum %u",
+                       vsi_count, hw->func_caps.num_vsis);
                return -EINVAL;
        }
 
@@ -3859,8 +4078,8 @@ i40e_res_pool_alloc(struct i40e_res_pool_info *pool,
                 */
                entry = rte_zmalloc("res_pool", sizeof(*entry), 0);
                if (entry == NULL) {
-                       PMD_DRV_LOG(ERR, "Failed to allocate memory for "
-                                   "resource pool");
+                       PMD_DRV_LOG(ERR,
+                               "Failed to allocate memory for resource pool");
                        return -ENOMEM;
                }
                entry->base = valid_entry->base;
@@ -3900,9 +4119,9 @@ validate_tcmap_parameter(struct i40e_vsi *vsi, uint8_t enabled_tcmap)
        }
 
        if (!bitmap_is_subset(hw->func_caps.enabled_tcmap, enabled_tcmap)) {
-               PMD_DRV_LOG(ERR, "Enabled TC map 0x%x not applicable to "
-                           "HW support 0x%x", hw->func_caps.enabled_tcmap,
-                           enabled_tcmap);
+               PMD_DRV_LOG(ERR,
+                       "Enabled TC map 0x%x not applicable to HW support 0x%x",
+                       hw->func_caps.enabled_tcmap, enabled_tcmap);
                return I40E_NOT_SUPPORTED;
        }
        return I40E_SUCCESS;
@@ -4108,18 +4327,10 @@ i40e_veb_setup(struct i40e_pf *pf, struct i40e_vsi *vsi)
        /* create floating veb if vsi is NULL */
        if (vsi != NULL) {
                ret = i40e_aq_add_veb(hw, veb->uplink_seid, vsi->seid,
-#ifdef TREX_PATCH_LOW_LATENCY
-                      vsi->enabled_tc, false,
-#else
-                      I40E_DEFAULT_TCMAP, false,
-#endif
+                                     I40E_DEFAULT_TCMAP, false,
                                      &veb->seid, false, NULL);
        } else {
-#ifdef TREX_PATCH_LOW_LATENCY
-               ret = i40e_aq_add_veb(hw, 0, 0, vsi->enabled_tc,
-#else
                ret = i40e_aq_add_veb(hw, 0, 0, I40E_DEFAULT_TCMAP,
-#endif
                                      true, &veb->seid, false, NULL);
        }
 
@@ -4133,7 +4344,7 @@ i40e_veb_setup(struct i40e_pf *pf, struct i40e_vsi *vsi)
        ret = i40e_aq_get_veb_parameters(hw, veb->seid, NULL, NULL,
                                &veb->stats_idx, NULL, NULL, NULL);
        if (ret != I40E_SUCCESS) {
-               PMD_DRV_LOG(ERR, "Get veb statics index failed, aq_err: %d",
+               PMD_DRV_LOG(ERR, "Get veb statistics index failed, aq_err: %d",
                            hw->aq.asq_last_status);
                goto fail;
        }
@@ -4157,11 +4368,16 @@ i40e_vsi_release(struct i40e_vsi *vsi)
        void *temp;
        int ret;
        struct i40e_mac_filter *f;
-       uint16_t user_param = vsi->user_param;
+       uint16_t user_param;
 
        if (!vsi)
                return I40E_SUCCESS;
 
+       if (!vsi->adapter)
+               return -EFAULT;
+
+       user_param = vsi->user_param;
+
        pf = I40E_VSI_TO_PF(vsi);
        hw = I40E_VSI_TO_HW(vsi);
 
@@ -4250,8 +4466,8 @@ i40e_update_default_filter_setting(struct i40e_vsi *vsi)
                struct i40e_mac_filter *f;
                struct ether_addr *mac;
 
-               PMD_DRV_LOG(WARNING, "Cannot remove the default "
-                           "macvlan filter");
+               PMD_DRV_LOG(WARNING,
+                       "Cannot remove the default macvlan filter");
                /* It needs to add the permanent mac into mac list */
                f = rte_zmalloc("macv_filter", sizeof(*f), 0);
                if (f == NULL) {
@@ -4273,57 +4489,6 @@ i40e_update_default_filter_setting(struct i40e_vsi *vsi)
        return i40e_vsi_add_mac(vsi, &filter);
 }
 
-#ifdef TREX_PATCH_LOW_LATENCY
-static int
-i40e_vsi_update_tc_max_bw(struct i40e_vsi *vsi, u16 credit){
-    struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
-    int ret;
-
-    if (!vsi->seid) {
-        PMD_DRV_LOG(ERR, "seid not valid");
-        return -EINVAL;
-    }
-
-    ret = i40e_aq_config_vsi_bw_limit(hw, vsi->seid, credit,0, NULL);
-    if (ret != I40E_SUCCESS) {
-        PMD_DRV_LOG(ERR, "Failed to configure TC BW");
-        return ret;
-    }
-    return (0);
-}
-
-static int
-i40e_vsi_update_tc_bandwidth_ex(struct i40e_vsi *vsi)
-{
-       struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
-       int i, ret;
-    struct i40e_aqc_configure_vsi_ets_sla_bw_data tc_bw_data;
-    struct i40e_aqc_configure_vsi_tc_bw_data * res_buffer;
-
-       if (!vsi->seid) {
-               PMD_DRV_LOG(ERR, "seid not valid");
-               return -EINVAL;
-       }
-
-       memset(&tc_bw_data, 0, sizeof(tc_bw_data));
-       tc_bw_data.tc_valid_bits = 3;
-
-    /* enable TC 0,1 */
-       ret = i40e_aq_config_vsi_ets_sla_bw_limit(hw, vsi->seid, &tc_bw_data, NULL);
-       if (ret != I40E_SUCCESS) {
-               PMD_DRV_LOG(ERR, "Failed to configure TC BW");
-               return ret;
-       }
-    
-    vsi->enabled_tc=3;
-    res_buffer = ( struct i40e_aqc_configure_vsi_tc_bw_data *)&tc_bw_data;
-    (void)rte_memcpy(vsi->info.qs_handle, res_buffer->qs_handles,
-                                       sizeof(vsi->info.qs_handle));
-
-       return I40E_SUCCESS;
-}
-#endif
-
 /*
  * i40e_vsi_get_bw_config - Query VSI BW Information
  * @vsi: the VSI to be queried
@@ -4352,8 +4517,9 @@ i40e_vsi_get_bw_config(struct i40e_vsi *vsi)
        ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid,
                                        &ets_sla_config, NULL);
        if (ret != I40E_SUCCESS) {
-               PMD_DRV_LOG(ERR, "VSI failed to get TC bandwdith "
-                           "configuration %u", hw->aq.asq_last_status);
+               PMD_DRV_LOG(ERR,
+                       "VSI failed to get TC bandwdith configuration %u",
+                       hw->aq.asq_last_status);
                return ret;
        }
 
@@ -4399,8 +4565,7 @@ i40e_enable_pf_lb(struct i40e_pf *pf)
 
        /* Use the FW API if FW >= v5.0 */
        if (hw->aq.fw_maj_ver < 5) {
-        //TREX_PATCH - changed from ERR to INFO. Most of our customers do not have latest FW
-               PMD_INIT_LOG(INFO, "FW < v5.0, cannot enable loopback");
+               PMD_INIT_LOG(ERR, "FW < v5.0, cannot enable loopback");
                return;
        }
 
@@ -4421,7 +4586,7 @@ i40e_enable_pf_lb(struct i40e_pf *pf)
 
        ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
        if (ret)
-               PMD_DRV_LOG(ERR, "update vsi switch failed, aq_err=%d\n",
+               PMD_DRV_LOG(ERR, "update vsi switch failed, aq_err=%d",
                            hw->aq.asq_last_status);
 }
 
@@ -4442,14 +4607,14 @@ i40e_vsi_setup(struct i40e_pf *pf,
 
        if (type != I40E_VSI_MAIN && type != I40E_VSI_SRIOV &&
            uplink_vsi == NULL) {
-               PMD_DRV_LOG(ERR, "VSI setup failed, "
-                           "VSI link shouldn't be NULL");
+               PMD_DRV_LOG(ERR,
+                       "VSI setup failed, VSI link shouldn't be NULL");
                return NULL;
        }
 
        if (type == I40E_VSI_MAIN && uplink_vsi != NULL) {
-               PMD_DRV_LOG(ERR, "VSI setup failed, MAIN VSI "
-                           "uplink VSI should be NULL");
+               PMD_DRV_LOG(ERR,
+                       "VSI setup failed, MAIN VSI uplink VSI should be NULL");
                return NULL;
        }
 
@@ -4493,6 +4658,7 @@ i40e_vsi_setup(struct i40e_pf *pf,
        vsi->max_macaddrs = I40E_NUM_MACADDR_MAX;
        vsi->parent_vsi = uplink_vsi ? uplink_vsi : pf->main_vsi;
        vsi->user_param = user_param;
+       vsi->vlan_anti_spoof_on = 0;
        /* Allocate queues */
        switch (vsi->type) {
        case I40E_VSI_MAIN  :
@@ -4600,8 +4766,8 @@ i40e_vsi_setup(struct i40e_pf *pf,
                ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info,
                                                I40E_DEFAULT_TCMAP);
                if (ret != I40E_SUCCESS) {
-                       PMD_DRV_LOG(ERR, "Failed to configure "
-                                   "TC queue mapping");
+                       PMD_DRV_LOG(ERR,
+                               "Failed to configure TC queue mapping");
                        goto fail_msix_alloc;
                }
                ctxt.seid = vsi->seid;
@@ -4671,8 +4837,8 @@ i40e_vsi_setup(struct i40e_pf *pf,
                ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info,
                                                I40E_DEFAULT_TCMAP);
                if (ret != I40E_SUCCESS) {
-                       PMD_DRV_LOG(ERR, "Failed to configure "
-                                   "TC queue mapping");
+                       PMD_DRV_LOG(ERR,
+                               "Failed to configure TC queue mapping");
                        goto fail_msix_alloc;
                }
                ctxt.info.up_enable_bits = I40E_DEFAULT_TCMAP;
@@ -4714,8 +4880,8 @@ i40e_vsi_setup(struct i40e_pf *pf,
                ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info,
                                                I40E_DEFAULT_TCMAP);
                if (ret != I40E_SUCCESS) {
-                       PMD_DRV_LOG(ERR, "Failed to configure "
-                                       "TC queue mapping");
+                       PMD_DRV_LOG(ERR,
+                               "Failed to configure TC queue mapping");
                        goto fail_msix_alloc;
                }
                ctxt.info.up_enable_bits = I40E_DEFAULT_TCMAP;
@@ -4732,8 +4898,8 @@ i40e_vsi_setup(struct i40e_pf *pf,
                ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info,
                                                I40E_DEFAULT_TCMAP);
                if (ret != I40E_SUCCESS) {
-                       PMD_DRV_LOG(ERR, "Failed to configure "
-                                       "TC queue mapping.");
+                       PMD_DRV_LOG(ERR,
+                               "Failed to configure TC queue mapping.");
                        goto fail_msix_alloc;
                }
                ctxt.info.up_enable_bits = I40E_DEFAULT_TCMAP;
@@ -4996,8 +5162,9 @@ i40e_pf_setup(struct i40e_pf *pf)
                /* make queue allocated first, let FDIR use queue pair 0*/
                ret = i40e_res_pool_alloc(&pf->qp_pool, I40E_DEFAULT_QP_NUM_FDIR);
                if (ret != I40E_FDIR_QUEUE_ID) {
-                       PMD_DRV_LOG(ERR, "queue allocation fails for FDIR :"
-                                   " ret =%d", ret);
+                       PMD_DRV_LOG(ERR,
+                               "queue allocation fails for FDIR: ret =%d",
+                               ret);
                        pf->flags &= ~I40E_FLAG_FDIR;
                }
        }
@@ -5016,12 +5183,12 @@ i40e_pf_setup(struct i40e_pf *pf)
        else if (hw->func_caps.rss_table_size == ETH_RSS_RETA_SIZE_512)
                settings.hash_lut_size = I40E_HASH_LUT_SIZE_512;
        else {
-               PMD_DRV_LOG(ERR, "Hash lookup table size (%u) not supported\n",
-                                               hw->func_caps.rss_table_size);
+               PMD_DRV_LOG(ERR, "Hash lookup table size (%u) not supported",
+                       hw->func_caps.rss_table_size);
                return I40E_ERR_PARAM;
        }
-       PMD_DRV_LOG(INFO, "Hardware capability of hash lookup table "
-                       "size: %u\n", hw->func_caps.rss_table_size);
+       PMD_DRV_LOG(INFO, "Hardware capability of hash lookup table size: %u",
+               hw->func_caps.rss_table_size);
        pf->hash_lut_size = hw->func_caps.rss_table_size;
 
        /* Enable ethtype and macvlan filters */
@@ -5271,8 +5438,8 @@ i40e_dev_rx_init(struct i40e_pf *pf)
 
                ret = i40e_rx_queue_init(rxq);
                if (ret != I40E_SUCCESS) {
-                       PMD_DRV_LOG(ERR, "Failed to do RX queue "
-                                   "initialization");
+                       PMD_DRV_LOG(ERR,
+                               "Failed to do RX queue initialization");
                        break;
                }
        }
@@ -5518,6 +5685,24 @@ i40e_dev_handle_vfr_event(struct rte_eth_dev *dev)
        }
 }
 
+static void
+i40e_notify_all_vfs_link_status(struct rte_eth_dev *dev)
+{
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       struct i40e_virtchnl_pf_event event;
+       int i;
+
+       event.event = I40E_VIRTCHNL_EVENT_LINK_CHANGE;
+       event.event_data.link_event.link_status =
+               dev->data->dev_link.link_status;
+       event.event_data.link_event.link_speed =
+               (enum i40e_aq_link_speed)dev->data->dev_link.link_speed;
+
+       for (i = 0; i < pf->vf_num; i++)
+               i40e_pf_host_send_msg_to_vf(&pf->vfs[i], I40E_VIRTCHNL_OP_EVENT,
+                               I40E_SUCCESS, (uint8_t *)&event, sizeof(event));
+}
+
 static void
 i40e_dev_handle_aq_msg(struct rte_eth_dev *dev)
 {
@@ -5538,8 +5723,9 @@ i40e_dev_handle_aq_msg(struct rte_eth_dev *dev)
                ret = i40e_clean_arq_element(hw, &info, &pending);
 
                if (ret != I40E_SUCCESS) {
-                       PMD_DRV_LOG(INFO, "Failed to read msg from AdminQ, "
-                                   "aq_err: %u", hw->aq.asq_last_status);
+                       PMD_DRV_LOG(INFO,
+                               "Failed to read msg from AdminQ, aq_err: %u",
+                               hw->aq.asq_last_status);
                        break;
                }
                opcode = rte_le_to_cpu_16(info.desc.opcode);
@@ -5554,6 +5740,14 @@ i40e_dev_handle_aq_msg(struct rte_eth_dev *dev)
                                        info.msg_buf,
                                        info.msg_len);
                        break;
+               case i40e_aqc_opc_get_link_status:
+                       ret = i40e_dev_link_update(dev, 0);
+                       if (!ret) {
+                               i40e_notify_all_vfs_link_status(dev);
+                               _rte_eth_dev_callback_process(dev,
+                                       RTE_ETH_EVENT_INTR_LSC, NULL);
+                       }
+                       break;
                default:
                        PMD_DRV_LOG(ERR, "Request %u is not supported yet",
                                    opcode);
@@ -5563,81 +5757,30 @@ i40e_dev_handle_aq_msg(struct rte_eth_dev *dev)
        rte_free(info.msg_buf);
 }
 
-/*
- * Interrupt handler is registered as the alarm callback for handling LSC
- * interrupt in a definite of time, in order to wait the NIC into a stable
- * state. Currently it waits 1 sec in i40e for the link up interrupt, and
- * no need for link down interrupt.
+/**
+ * Interrupt handler triggered by NIC  for handling
+ * specific interrupt.
+ *
+ * @param handle
+ *  Pointer to interrupt handle.
+ * @param param
+ *  The address of parameter (struct rte_eth_dev *) regsitered before.
+ *
+ * @return
+ *  void
  */
 static void
-i40e_dev_interrupt_delayed_handler(void *param)
+i40e_dev_interrupt_handler(struct rte_intr_handle *intr_handle,
+                          void *param)
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t icr0;
 
-       /* read interrupt causes again */
-       icr0 = I40E_READ_REG(hw, I40E_PFINT_ICR0);
-
-#ifdef RTE_LIBRTE_I40E_DEBUG_DRIVER
-       if (icr0 & I40E_PFINT_ICR0_ECC_ERR_MASK)
-               PMD_DRV_LOG(ERR, "ICR0: unrecoverable ECC error\n");
-       if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK)
-               PMD_DRV_LOG(ERR, "ICR0: malicious programming detected\n");
-       if (icr0 & I40E_PFINT_ICR0_GRST_MASK)
-               PMD_DRV_LOG(INFO, "ICR0: global reset requested\n");
-       if (icr0 & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK)
-               PMD_DRV_LOG(INFO, "ICR0: PCI exception\n activated\n");
-       if (icr0 & I40E_PFINT_ICR0_STORM_DETECT_MASK)
-               PMD_DRV_LOG(INFO, "ICR0: a change in the storm control "
-                                                               "state\n");
-       if (icr0 & I40E_PFINT_ICR0_HMC_ERR_MASK)
-               PMD_DRV_LOG(ERR, "ICR0: HMC error\n");
-       if (icr0 & I40E_PFINT_ICR0_PE_CRITERR_MASK)
-               PMD_DRV_LOG(ERR, "ICR0: protocol engine critical error\n");
-#endif /* RTE_LIBRTE_I40E_DEBUG_DRIVER */
-
-       if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) {
-               PMD_DRV_LOG(INFO, "INT:VF reset detected\n");
-               i40e_dev_handle_vfr_event(dev);
-       }
-       if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) {
-               PMD_DRV_LOG(INFO, "INT:ADMINQ event\n");
-               i40e_dev_handle_aq_msg(dev);
-       }
-
-       /* handle the link up interrupt in an alarm callback */
-       i40e_dev_link_update(dev, 0);
-       _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
-
-       i40e_pf_enable_irq0(hw);
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
-}
-
-/**
- * Interrupt handler triggered by NIC  for handling
- * specific interrupt.
- *
- * @param handle
- *  Pointer to interrupt handle.
- * @param param
- *  The address of parameter (struct rte_eth_dev *) regsitered before.
- *
- * @return
- *  void
- */
-static void
-i40e_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
-                          void *param)
-{
-       struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
-       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       uint32_t icr0;
-
-       /* Disable interrupt */
-       i40e_pf_disable_irq0(hw);
-
-       /* read out interrupt causes */
+       /* Disable interrupt */
+       i40e_pf_disable_irq0(hw);
+
+       /* read out interrupt causes */
        icr0 = I40E_READ_REG(hw, I40E_PFINT_ICR0);
 
        /* No interrupt event indicated */
@@ -5671,34 +5814,10 @@ i40e_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
                i40e_dev_handle_aq_msg(dev);
        }
 
-       /* Link Status Change interrupt */
-       if (icr0 & I40E_PFINT_ICR0_LINK_STAT_CHANGE_MASK) {
-#define I40E_US_PER_SECOND 1000000
-               struct rte_eth_link link;
-
-               PMD_DRV_LOG(INFO, "ICR0: link status changed\n");
-               memset(&link, 0, sizeof(link));
-               rte_i40e_dev_atomic_read_link_status(dev, &link);
-               i40e_dev_link_update(dev, 0);
-
-               /*
-                * For link up interrupt, it needs to wait 1 second to let the
-                * hardware be a stable state. Otherwise several consecutive
-                * interrupts can be observed.
-                * For link down interrupt, no need to wait.
-                */
-               if (!link.link_status && rte_eal_alarm_set(I40E_US_PER_SECOND,
-                       i40e_dev_interrupt_delayed_handler, (void *)dev) >= 0)
-                       return;
-               else
-                       _rte_eth_dev_callback_process(dev,
-                               RTE_ETH_EVENT_INTR_LSC);
-       }
-
 done:
        /* Enable interrupt */
        i40e_pf_enable_irq0(hw);
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       rte_intr_enable(intr_handle);
 }
 
 static int
@@ -5751,7 +5870,7 @@ i40e_add_macvlan_filters(struct i40e_vsi *vsi,
                                flags = I40E_AQC_MACVLAN_ADD_HASH_MATCH;
                                break;
                        default:
-                               PMD_DRV_LOG(ERR, "Invalid MAC match type\n");
+                               PMD_DRV_LOG(ERR, "Invalid MAC match type");
                                ret = I40E_ERR_PARAM;
                                goto DONE;
                        }
@@ -5826,7 +5945,7 @@ i40e_remove_macvlan_filters(struct i40e_vsi *vsi,
                                flags = I40E_AQC_MACVLAN_DEL_HASH_MATCH;
                                break;
                        default:
-                               PMD_DRV_LOG(ERR, "Invalid MAC filter type\n");
+                               PMD_DRV_LOG(ERR, "Invalid MAC filter type");
                                ret = I40E_ERR_PARAM;
                                goto DONE;
                        }
@@ -5881,14 +6000,11 @@ i40e_find_vlan_filter(struct i40e_vsi *vsi,
 }
 
 static void
-i40e_set_vlan_filter(struct i40e_vsi *vsi,
-                        uint16_t vlan_id, bool on)
+i40e_store_vlan_filter(struct i40e_vsi *vsi,
+                      uint16_t vlan_id, bool on)
 {
        uint32_t vid_idx, vid_bit;
 
-       if (vlan_id > ETH_VLAN_ID_MAX)
-               return;
-
        vid_idx = I40E_VFTA_IDX(vlan_id);
        vid_bit = I40E_VFTA_BIT(vlan_id);
 
@@ -5898,6 +6014,38 @@ i40e_set_vlan_filter(struct i40e_vsi *vsi,
                vsi->vfta[vid_idx] &= ~vid_bit;
 }
 
+static void
+i40e_set_vlan_filter(struct i40e_vsi *vsi,
+                    uint16_t vlan_id, bool on)
+{
+       struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
+       struct i40e_aqc_add_remove_vlan_element_data vlan_data = {0};
+       int ret;
+
+       if (vlan_id > ETH_VLAN_ID_MAX)
+               return;
+
+       i40e_store_vlan_filter(vsi, vlan_id, on);
+
+       if (!vsi->vlan_anti_spoof_on || !vlan_id)
+               return;
+
+       vlan_data.vlan_tag = rte_cpu_to_le_16(vlan_id);
+
+       if (on) {
+               ret = i40e_aq_add_vlan(hw, vsi->seid,
+                                      &vlan_data, 1, NULL);
+               if (ret != I40E_SUCCESS)
+                       PMD_DRV_LOG(ERR, "Failed to add vlan filter");
+       } else {
+               ret = i40e_aq_remove_vlan(hw, vsi->seid,
+                                         &vlan_data, 1, NULL);
+               if (ret != I40E_SUCCESS)
+                       PMD_DRV_LOG(ERR,
+                                   "Failed to remove vlan filter");
+       }
+}
+
 /**
  * Find all vlan options for specific mac addr,
  * return with actual vlan found.
@@ -5923,8 +6071,8 @@ i40e_find_all_vlan_for_mac(struct i40e_vsi *vsi,
                        for (k = 0; k < I40E_UINT32_BIT_SIZE; k++) {
                                if (vsi->vfta[j] & (1 << k)) {
                                        if (i > num - 1) {
-                                               PMD_DRV_LOG(ERR, "vlan number "
-                                                           "not match");
+                                               PMD_DRV_LOG(ERR,
+                                                       "vlan number doesn't match");
                                                return I40E_ERR_PARAM;
                                        }
                                        (void)rte_memcpy(&mv_f[i].macaddr,
@@ -5969,7 +6117,7 @@ i40e_find_all_mac_for_vlan(struct i40e_vsi *vsi,
 static int
 i40e_vsi_remove_all_macvlan_filter(struct i40e_vsi *vsi)
 {
-       int i, num;
+       int i, j, num;
        struct i40e_mac_filter *f;
        struct i40e_macvlan_filter *mv_f;
        int ret = I40E_SUCCESS;
@@ -5994,6 +6142,7 @@ i40e_vsi_remove_all_macvlan_filter(struct i40e_vsi *vsi)
                TAILQ_FOREACH(f, &vsi->mac_list, next) {
                        (void)rte_memcpy(&mv_f[i].macaddr,
                                &f->mac_info.mac_addr, ETH_ADDR_LEN);
+                       mv_f[i].filter_type = f->mac_info.filter_type;
                        mv_f[i].vlan_id = 0;
                        i++;
                }
@@ -6003,6 +6152,8 @@ i40e_vsi_remove_all_macvlan_filter(struct i40e_vsi *vsi)
                                        vsi->vlan_num, &f->mac_info.mac_addr);
                        if (ret != I40E_SUCCESS)
                                goto DONE;
+                       for (j = i; j < i + vsi->vlan_num; j++)
+                               mv_f[j].filter_type = f->mac_info.filter_type;
                        i += vsi->vlan_num;
                }
        }
@@ -6214,7 +6365,7 @@ i40e_vsi_delete_mac(struct i40e_vsi *vsi, struct ether_addr *addr)
        if (filter_type == RTE_MACVLAN_PERFECT_MATCH ||
                filter_type == RTE_MACVLAN_HASH_MATCH) {
                if (vlan_num == 0) {
-                       PMD_DRV_LOG(ERR, "VLAN number shouldn't be 0\n");
+                       PMD_DRV_LOG(ERR, "VLAN number shouldn't be 0");
                        return I40E_ERR_PARAM;
                }
        } else if (filter_type == RTE_MAC_PERFECT_MATCH ||
@@ -6256,7 +6407,7 @@ DONE:
 
 /* Configure hash enable flags for RSS */
 uint64_t
-i40e_config_hena(uint64_t flags)
+i40e_config_hena(uint64_t flags, enum i40e_mac_type type)
 {
        uint64_t hena = 0;
 
@@ -6265,20 +6416,42 @@ i40e_config_hena(uint64_t flags)
 
        if (flags & ETH_RSS_FRAG_IPV4)
                hena |= 1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4;
-       if (flags & ETH_RSS_NONFRAG_IPV4_TCP)
-               hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
-       if (flags & ETH_RSS_NONFRAG_IPV4_UDP)
-               hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+       if (flags & ETH_RSS_NONFRAG_IPV4_TCP) {
+               if (type == I40E_MAC_X722) {
+                       hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) |
+                        (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
+               } else
+                       hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+       }
+       if (flags & ETH_RSS_NONFRAG_IPV4_UDP) {
+               if (type == I40E_MAC_X722) {
+                       hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
+                        (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
+                        (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
+               } else
+                       hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+       }
        if (flags & ETH_RSS_NONFRAG_IPV4_SCTP)
                hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
        if (flags & ETH_RSS_NONFRAG_IPV4_OTHER)
                hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
        if (flags & ETH_RSS_FRAG_IPV6)
                hena |= 1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6;
-       if (flags & ETH_RSS_NONFRAG_IPV6_TCP)
-               hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
-       if (flags & ETH_RSS_NONFRAG_IPV6_UDP)
-               hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+       if (flags & ETH_RSS_NONFRAG_IPV6_TCP) {
+               if (type == I40E_MAC_X722) {
+                       hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) |
+                        (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
+               } else
+                       hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
+       }
+       if (flags & ETH_RSS_NONFRAG_IPV6_UDP) {
+               if (type == I40E_MAC_X722) {
+                       hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) |
+                        (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
+                        (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
+               } else
+                       hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+       }
        if (flags & ETH_RSS_NONFRAG_IPV6_SCTP)
                hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP;
        if (flags & ETH_RSS_NONFRAG_IPV6_OTHER)
@@ -6301,8 +6474,14 @@ i40e_parse_hena(uint64_t flags)
                rss_hf |= ETH_RSS_FRAG_IPV4;
        if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP))
                rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
+       if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK))
+               rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
        if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP))
                rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
+       if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP))
+               rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
+       if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP))
+               rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
        if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP))
                rss_hf |= ETH_RSS_NONFRAG_IPV4_SCTP;
        if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER))
@@ -6311,8 +6490,14 @@ i40e_parse_hena(uint64_t flags)
                rss_hf |= ETH_RSS_FRAG_IPV6;
        if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP))
                rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
+       if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK))
+               rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
        if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP))
                rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
+       if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP))
+               rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
+       if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP))
+               rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
        if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP))
                rss_hf |= ETH_RSS_NONFRAG_IPV6_SCTP;
        if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER))
@@ -6332,7 +6517,10 @@ i40e_pf_disable_rss(struct i40e_pf *pf)
 
        hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0));
        hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32;
-       hena &= ~I40E_RSS_HENA_ALL;
+       if (hw->mac.type == I40E_MAC_X722)
+               hena &= ~I40E_RSS_HENA_ALL_X722;
+       else
+               hena &= ~I40E_RSS_HENA_ALL;
        i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (uint32_t)hena);
        i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (uint32_t)(hena >> 32));
        I40E_WRITE_FLUSH(hw);
@@ -6360,8 +6548,7 @@ i40e_set_rss_key(struct i40e_vsi *vsi, uint8_t *key, uint8_t key_len)
 
                ret = i40e_aq_set_rss_key(hw, vsi->vsi_id, key_dw);
                if (ret)
-                       PMD_INIT_LOG(ERR, "Failed to configure RSS key "
-                                    "via AQ");
+                       PMD_INIT_LOG(ERR, "Failed to configure RSS key via AQ");
        } else {
                uint32_t *hash_key = (uint32_t *)key;
                uint16_t i;
@@ -6419,8 +6606,11 @@ i40e_hw_rss_hash_set(struct i40e_pf *pf, struct rte_eth_rss_conf *rss_conf)
        rss_hf = rss_conf->rss_hf;
        hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0));
        hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32;
-       hena &= ~I40E_RSS_HENA_ALL;
-       hena |= i40e_config_hena(rss_hf);
+       if (hw->mac.type == I40E_MAC_X722)
+               hena &= ~I40E_RSS_HENA_ALL_X722;
+       else
+               hena &= ~I40E_RSS_HENA_ALL;
+       hena |= i40e_config_hena(rss_hf, hw->mac.type);
        i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (uint32_t)hena);
        i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (uint32_t)(hena >> 32));
        I40E_WRITE_FLUSH(hw);
@@ -6439,7 +6629,9 @@ i40e_dev_rss_hash_update(struct rte_eth_dev *dev,
 
        hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0));
        hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32;
-       if (!(hena & I40E_RSS_HENA_ALL)) { /* RSS disabled */
+       if (!(hena & ((hw->mac.type == I40E_MAC_X722)
+                ? I40E_RSS_HENA_ALL_X722
+                : I40E_RSS_HENA_ALL))) { /* RSS disabled */
                if (rss_hf != 0) /* Enable RSS */
                        return -EINVAL;
                return 0; /* Nothing to do */
@@ -6502,7 +6694,86 @@ i40e_dev_get_filter_type(uint16_t filter_type, uint16_t *flag)
        return 0;
 }
 
+/* Convert tunnel filter structure */
+static int
+i40e_tunnel_filter_convert(struct i40e_aqc_add_remove_cloud_filters_element_data
+                          *cld_filter,
+                          struct i40e_tunnel_filter *tunnel_filter)
+{
+       ether_addr_copy((struct ether_addr *)&cld_filter->outer_mac,
+                       (struct ether_addr *)&tunnel_filter->input.outer_mac);
+       ether_addr_copy((struct ether_addr *)&cld_filter->inner_mac,
+                       (struct ether_addr *)&tunnel_filter->input.inner_mac);
+       tunnel_filter->input.inner_vlan = cld_filter->inner_vlan;
+       tunnel_filter->input.flags = cld_filter->flags;
+       tunnel_filter->input.tenant_id = cld_filter->tenant_id;
+       tunnel_filter->queue = cld_filter->queue_number;
+
+       return 0;
+}
+
+/* Check if there exists the tunnel filter */
+struct i40e_tunnel_filter *
+i40e_sw_tunnel_filter_lookup(struct i40e_tunnel_rule *tunnel_rule,
+                            const struct i40e_tunnel_filter_input *input)
+{
+       int ret;
+
+       ret = rte_hash_lookup(tunnel_rule->hash_table, (const void *)input);
+       if (ret < 0)
+               return NULL;
+
+       return tunnel_rule->hash_map[ret];
+}
+
+/* Add a tunnel filter into the SW list */
 static int
+i40e_sw_tunnel_filter_insert(struct i40e_pf *pf,
+                            struct i40e_tunnel_filter *tunnel_filter)
+{
+       struct i40e_tunnel_rule *rule = &pf->tunnel;
+       int ret;
+
+       ret = rte_hash_add_key(rule->hash_table, &tunnel_filter->input);
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR,
+                           "Failed to insert tunnel filter to hash table %d!",
+                           ret);
+               return ret;
+       }
+       rule->hash_map[ret] = tunnel_filter;
+
+       TAILQ_INSERT_TAIL(&rule->tunnel_list, tunnel_filter, rules);
+
+       return 0;
+}
+
+/* Delete a tunnel filter from the SW list */
+int
+i40e_sw_tunnel_filter_del(struct i40e_pf *pf,
+                         struct i40e_tunnel_filter_input *input)
+{
+       struct i40e_tunnel_rule *rule = &pf->tunnel;
+       struct i40e_tunnel_filter *tunnel_filter;
+       int ret;
+
+       ret = rte_hash_del_key(rule->hash_table, input);
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR,
+                           "Failed to delete tunnel filter to hash table %d!",
+                           ret);
+               return ret;
+       }
+       tunnel_filter = rule->hash_map[ret];
+       rule->hash_map[ret] = NULL;
+
+       TAILQ_REMOVE(&rule->tunnel_list, tunnel_filter, rules);
+       rte_free(tunnel_filter);
+
+       return 0;
+}
+
+int
 i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
                        struct rte_eth_tunnel_filter_conf *tunnel_filter,
                        uint8_t add)
@@ -6517,6 +6788,9 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
        struct i40e_vsi *vsi = pf->main_vsi;
        struct i40e_aqc_add_remove_cloud_filters_element_data  *cld_filter;
        struct i40e_aqc_add_remove_cloud_filters_element_data  *pfilter;
+       struct i40e_tunnel_rule *tunnel_rule = &pf->tunnel;
+       struct i40e_tunnel_filter *tunnel, *node;
+       struct i40e_tunnel_filter check_filter; /* Check if filter exists */
 
        cld_filter = rte_zmalloc("tunnel_filter",
                sizeof(struct i40e_aqc_add_remove_cloud_filters_element_data),
@@ -6579,11 +6853,38 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
        pfilter->tenant_id = rte_cpu_to_le_32(tunnel_filter->tenant_id);
        pfilter->queue_number = rte_cpu_to_le_16(tunnel_filter->queue_id);
 
-       if (add)
+       /* Check if there is the filter in SW list */
+       memset(&check_filter, 0, sizeof(check_filter));
+       i40e_tunnel_filter_convert(cld_filter, &check_filter);
+       node = i40e_sw_tunnel_filter_lookup(tunnel_rule, &check_filter.input);
+       if (add && node) {
+               PMD_DRV_LOG(ERR, "Conflict with existing tunnel rules!");
+               return -EINVAL;
+       }
+
+       if (!add && !node) {
+               PMD_DRV_LOG(ERR, "There's no corresponding tunnel filter!");
+               return -EINVAL;
+       }
+
+       if (add) {
                ret = i40e_aq_add_cloud_filters(hw, vsi->seid, cld_filter, 1);
-       else
+               if (ret < 0) {
+                       PMD_DRV_LOG(ERR, "Failed to add a tunnel filter.");
+                       return ret;
+               }
+               tunnel = rte_zmalloc("tunnel_filter", sizeof(*tunnel), 0);
+               rte_memcpy(tunnel, &check_filter, sizeof(check_filter));
+               ret = i40e_sw_tunnel_filter_insert(pf, tunnel);
+       } else {
                ret = i40e_aq_remove_cloud_filters(hw, vsi->seid,
-                                               cld_filter, 1);
+                                                  cld_filter, 1);
+               if (ret < 0) {
+                       PMD_DRV_LOG(ERR, "Failed to delete a tunnel filter.");
+                       return ret;
+               }
+               ret = i40e_sw_tunnel_filter_del(pf, &node->input);
+       }
 
        rte_free(cld_filter);
        return ret;
@@ -6620,8 +6921,9 @@ i40e_add_vxlan_port(struct i40e_pf *pf, uint16_t port)
        /* Now check if there is space to add the new port */
        idx = i40e_get_vxlan_port_idx(pf, 0);
        if (idx < 0) {
-               PMD_DRV_LOG(ERR, "Maximum number of UDP ports reached,"
-                       "not adding port %d", port);
+               PMD_DRV_LOG(ERR,
+                       "Maximum number of UDP ports reached, not adding port %d",
+                       port);
                return -ENOSPC;
        }
 
@@ -6860,7 +7162,7 @@ i40e_dev_set_gre_key_len(struct i40e_hw *hw, uint8_t len)
        int ret = -EINVAL;
 
        val = I40E_READ_REG(hw, I40E_GL_PRS_FVBM(2));
-       PMD_DRV_LOG(DEBUG, "Read original GL_PRS_FVBM with 0x%08x\n", val);
+       PMD_DRV_LOG(DEBUG, "Read original GL_PRS_FVBM with 0x%08x", val);
 
        if (len == 3) {
                reg = val | I40E_GL_PRS_FVBM_MSK_ENA;
@@ -6879,7 +7181,7 @@ i40e_dev_set_gre_key_len(struct i40e_hw *hw, uint8_t len)
        } else {
                ret = 0;
        }
-       PMD_DRV_LOG(DEBUG, "Read modified GL_PRS_FVBM with 0x%08x\n",
+       PMD_DRV_LOG(DEBUG, "Read modified GL_PRS_FVBM with 0x%08x",
                    I40E_READ_REG(hw, I40E_GL_PRS_FVBM(2)));
 
        return ret;
@@ -6992,15 +7294,15 @@ i40e_set_symmetric_hash_enable_per_port(struct i40e_hw *hw, uint8_t enable)
 
        if (enable > 0) {
                if (reg & I40E_PRTQF_CTL_0_HSYM_ENA_MASK) {
-                       PMD_DRV_LOG(INFO, "Symmetric hash has already "
-                                                       "been enabled");
+                       PMD_DRV_LOG(INFO,
+                               "Symmetric hash has already been enabled");
                        return;
                }
                reg |= I40E_PRTQF_CTL_0_HSYM_ENA_MASK;
        } else {
                if (!(reg & I40E_PRTQF_CTL_0_HSYM_ENA_MASK)) {
-                       PMD_DRV_LOG(INFO, "Symmetric hash has already "
-                                                       "been disabled");
+                       PMD_DRV_LOG(INFO,
+                               "Symmetric hash has already been disabled");
                        return;
                }
                reg &= ~I40E_PRTQF_CTL_0_HSYM_ENA_MASK;
@@ -7124,16 +7426,16 @@ i40e_set_hash_filter_global_config(struct i40e_hw *hw,
        if (g_cfg->hash_func == RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
                /* Toeplitz */
                if (reg & I40E_GLQF_CTL_HTOEP_MASK) {
-                       PMD_DRV_LOG(DEBUG, "Hash function already set to "
-                                                               "Toeplitz");
+                       PMD_DRV_LOG(DEBUG,
+                               "Hash function already set to Toeplitz");
                        goto out;
                }
                reg |= I40E_GLQF_CTL_HTOEP_MASK;
        } else if (g_cfg->hash_func == RTE_ETH_HASH_FUNCTION_SIMPLE_XOR) {
                /* Simple XOR */
                if (!(reg & I40E_GLQF_CTL_HTOEP_MASK)) {
-                       PMD_DRV_LOG(DEBUG, "Hash function already set to "
-                                                       "Simple XOR");
+                       PMD_DRV_LOG(DEBUG,
+                               "Hash function already set to Simple XOR");
                        goto out;
                }
                reg &= ~I40E_GLQF_CTL_HTOEP_MASK;
@@ -7176,6 +7478,24 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype,
                        I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
                        I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT |
                        I40E_INSET_FLEX_PAYLOAD,
+               [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] =
+                       I40E_INSET_DMAC | I40E_INSET_SMAC |
+                       I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
+                       I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV4_TOS |
+                       I40E_INSET_IPV4_PROTO | I40E_INSET_IPV4_TTL |
+                       I40E_INSET_TUNNEL_DMAC | I40E_INSET_TUNNEL_ID |
+                       I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
+                       I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT |
+                       I40E_INSET_FLEX_PAYLOAD,
+               [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] =
+                       I40E_INSET_DMAC | I40E_INSET_SMAC |
+                       I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
+                       I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV4_TOS |
+                       I40E_INSET_IPV4_PROTO | I40E_INSET_IPV4_TTL |
+                       I40E_INSET_TUNNEL_DMAC | I40E_INSET_TUNNEL_ID |
+                       I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
+                       I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT |
+                       I40E_INSET_FLEX_PAYLOAD,
                [I40E_FILTER_PCTYPE_NONF_IPV4_TCP] =
                        I40E_INSET_DMAC | I40E_INSET_SMAC |
                        I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
@@ -7185,6 +7505,15 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype,
                        I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
                        I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT |
                        I40E_INSET_TCP_FLAGS | I40E_INSET_FLEX_PAYLOAD,
+               [I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] =
+                       I40E_INSET_DMAC | I40E_INSET_SMAC |
+                       I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
+                       I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV4_TOS |
+                       I40E_INSET_IPV4_PROTO | I40E_INSET_IPV4_TTL |
+                       I40E_INSET_TUNNEL_DMAC | I40E_INSET_TUNNEL_ID |
+                       I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
+                       I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT |
+                       I40E_INSET_TCP_FLAGS | I40E_INSET_FLEX_PAYLOAD,
                [I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] =
                        I40E_INSET_DMAC | I40E_INSET_SMAC |
                        I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
@@ -7218,6 +7547,24 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype,
                        I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_IPV6_SRC |
                        I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT |
                        I40E_INSET_DST_PORT | I40E_INSET_FLEX_PAYLOAD,
+               [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] =
+                       I40E_INSET_DMAC | I40E_INSET_SMAC |
+                       I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
+                       I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV6_TC |
+                       I40E_INSET_IPV6_FLOW | I40E_INSET_IPV6_NEXT_HDR |
+                       I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_IPV6_SRC |
+                       I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT |
+                       I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS |
+                       I40E_INSET_FLEX_PAYLOAD,
+               [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] =
+                       I40E_INSET_DMAC | I40E_INSET_SMAC |
+                       I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
+                       I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV6_TC |
+                       I40E_INSET_IPV6_FLOW | I40E_INSET_IPV6_NEXT_HDR |
+                       I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_IPV6_SRC |
+                       I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT |
+                       I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS |
+                       I40E_INSET_FLEX_PAYLOAD,
                [I40E_FILTER_PCTYPE_NONF_IPV6_TCP] =
                        I40E_INSET_DMAC | I40E_INSET_SMAC |
                        I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
@@ -7227,6 +7574,15 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype,
                        I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT |
                        I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS |
                        I40E_INSET_FLEX_PAYLOAD,
+               [I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] =
+                       I40E_INSET_DMAC | I40E_INSET_SMAC |
+                       I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
+                       I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV6_TC |
+                       I40E_INSET_IPV6_FLOW | I40E_INSET_IPV6_NEXT_HDR |
+                       I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_IPV6_SRC |
+                       I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT |
+                       I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS |
+                       I40E_INSET_FLEX_PAYLOAD,
                [I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] =
                        I40E_INSET_DMAC | I40E_INSET_SMAC |
                        I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
@@ -7266,11 +7622,26 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype,
                I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
                I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL |
                I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
+               [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] =
+               I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
+               I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
+               I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL |
+               I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
+               [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] =
+               I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
+               I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
+               I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL |
+               I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
                [I40E_FILTER_PCTYPE_NONF_IPV4_TCP] =
                I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
                I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
                I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL |
                I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
+               [I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] =
+               I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
+               I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
+               I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL |
+               I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
                [I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] =
                I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
                I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
@@ -7292,11 +7663,26 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype,
                I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST |
                I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT |
                I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
+               [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] =
+               I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
+               I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST |
+               I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT |
+               I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
+               [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] =
+               I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
+               I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST |
+               I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT |
+               I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
                [I40E_FILTER_PCTYPE_NONF_IPV6_TCP] =
                I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
                I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST |
                I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT |
                I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
+               [I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] =
+               I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
+               I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST |
+               I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT |
+               I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
                [I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] =
                I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER |
                I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST |
@@ -7340,7 +7726,7 @@ i40e_validate_input_set(enum i40e_filter_pctype pctype,
 }
 
 /* default input set fields combination per pctype */
-static uint64_t
+uint64_t
 i40e_get_default_input_set(uint16_t pctype)
 {
        static const uint64_t default_inset_table[] = {
@@ -7349,9 +7735,18 @@ i40e_get_default_input_set(uint16_t pctype)
                [I40E_FILTER_PCTYPE_NONF_IPV4_UDP] =
                        I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
                        I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
+               [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] =
+                       I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
+                       I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
+               [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] =
+                       I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
+                       I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
                [I40E_FILTER_PCTYPE_NONF_IPV4_TCP] =
                        I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
                        I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
+               [I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] =
+                       I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
+                       I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
                [I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] =
                        I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST |
                        I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT |
@@ -7363,9 +7758,18 @@ i40e_get_default_input_set(uint16_t pctype)
                [I40E_FILTER_PCTYPE_NONF_IPV6_UDP] =
                        I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST |
                        I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
+               [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] =
+                       I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST |
+                       I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
+               [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] =
+                       I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST |
+                       I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
                [I40E_FILTER_PCTYPE_NONF_IPV6_TCP] =
                        I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST |
                        I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
+               [I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] =
+                       I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST |
+                       I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT,
                [I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] =
                        I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST |
                        I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT |
@@ -7484,25 +7888,23 @@ i40e_parse_input_set(uint64_t *inset,
  * and vice versa
  */
 static uint64_t
-i40e_translate_input_set_reg(uint64_t input)
+i40e_translate_input_set_reg(enum i40e_mac_type type, uint64_t input)
 {
        uint64_t val = 0;
        uint16_t i;
 
-       static const struct {
+       struct inset_map {
                uint64_t inset;
                uint64_t inset_reg;
-       } inset_map[] = {
+       };
+
+       static const struct inset_map inset_map_common[] = {
                {I40E_INSET_DMAC, I40E_REG_INSET_L2_DMAC},
                {I40E_INSET_SMAC, I40E_REG_INSET_L2_SMAC},
                {I40E_INSET_VLAN_OUTER, I40E_REG_INSET_L2_OUTER_VLAN},
                {I40E_INSET_VLAN_INNER, I40E_REG_INSET_L2_INNER_VLAN},
                {I40E_INSET_LAST_ETHER_TYPE, I40E_REG_INSET_LAST_ETHER_TYPE},
-               {I40E_INSET_IPV4_SRC, I40E_REG_INSET_L3_SRC_IP4},
-               {I40E_INSET_IPV4_DST, I40E_REG_INSET_L3_DST_IP4},
                {I40E_INSET_IPV4_TOS, I40E_REG_INSET_L3_IP4_TOS},
-               {I40E_INSET_IPV4_PROTO, I40E_REG_INSET_L3_IP4_PROTO},
-               {I40E_INSET_IPV4_TTL, I40E_REG_INSET_L3_IP4_TTL},
                {I40E_INSET_IPV6_SRC, I40E_REG_INSET_L3_SRC_IP6},
                {I40E_INSET_IPV6_DST, I40E_REG_INSET_L3_DST_IP6},
                {I40E_INSET_IPV6_TC, I40E_REG_INSET_L3_IP6_TC},
@@ -7531,13 +7933,40 @@ i40e_translate_input_set_reg(uint64_t input)
                {I40E_INSET_FLEX_PAYLOAD_W8, I40E_REG_INSET_FLEX_PAYLOAD_WORD8},
        };
 
+    /* some different registers map in x722*/
+       static const struct inset_map inset_map_diff_x722[] = {
+               {I40E_INSET_IPV4_SRC, I40E_X722_REG_INSET_L3_SRC_IP4},
+               {I40E_INSET_IPV4_DST, I40E_X722_REG_INSET_L3_DST_IP4},
+               {I40E_INSET_IPV4_PROTO, I40E_X722_REG_INSET_L3_IP4_PROTO},
+               {I40E_INSET_IPV4_TTL, I40E_X722_REG_INSET_L3_IP4_TTL},
+       };
+
+       static const struct inset_map inset_map_diff_not_x722[] = {
+               {I40E_INSET_IPV4_SRC, I40E_REG_INSET_L3_SRC_IP4},
+               {I40E_INSET_IPV4_DST, I40E_REG_INSET_L3_DST_IP4},
+               {I40E_INSET_IPV4_PROTO, I40E_REG_INSET_L3_IP4_PROTO},
+               {I40E_INSET_IPV4_TTL, I40E_REG_INSET_L3_IP4_TTL},
+       };
+
        if (input == 0)
                return val;
 
        /* Translate input set to register aware inset */
-       for (i = 0; i < RTE_DIM(inset_map); i++) {
-               if (input & inset_map[i].inset)
-                       val |= inset_map[i].inset_reg;
+       if (type == I40E_MAC_X722) {
+               for (i = 0; i < RTE_DIM(inset_map_diff_x722); i++) {
+                       if (input & inset_map_diff_x722[i].inset)
+                               val |= inset_map_diff_x722[i].inset_reg;
+               }
+       } else {
+               for (i = 0; i < RTE_DIM(inset_map_diff_not_x722); i++) {
+                       if (input & inset_map_diff_not_x722[i].inset)
+                               val |= inset_map_diff_not_x722[i].inset_reg;
+               }
+       }
+
+       for (i = 0; i < RTE_DIM(inset_map_common); i++) {
+               if (input & inset_map_common[i].inset)
+                       val |= inset_map_common[i].inset_reg;
        }
 
        return val;
@@ -7596,10 +8025,10 @@ i40e_check_write_reg(struct i40e_hw *hw, uint32_t addr, uint32_t val)
 {
        uint32_t reg = i40e_read_rx_ctl(hw, addr);
 
-       PMD_DRV_LOG(DEBUG, "[0x%08x] original: 0x%08x\n", addr, reg);
+       PMD_DRV_LOG(DEBUG, "[0x%08x] original: 0x%08x", addr, reg);
        if (reg != val)
                i40e_write_rx_ctl(hw, addr, val);
-       PMD_DRV_LOG(DEBUG, "[0x%08x] after: 0x%08x\n", addr,
+       PMD_DRV_LOG(DEBUG, "[0x%08x] after: 0x%08x", addr,
                    (uint32_t)i40e_read_rx_ctl(hw, addr));
 }
 
@@ -7614,15 +8043,22 @@ i40e_filter_input_set_init(struct i40e_pf *pf)
 
        for (pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
             pctype <= I40E_FILTER_PCTYPE_L2_PAYLOAD; pctype++) {
-               if (!I40E_VALID_PCTYPE(pctype))
-                       continue;
+               if (hw->mac.type == I40E_MAC_X722) {
+                       if (!I40E_VALID_PCTYPE_X722(pctype))
+                               continue;
+               } else {
+                       if (!I40E_VALID_PCTYPE(pctype))
+                               continue;
+               }
+
                input_set = i40e_get_default_input_set(pctype);
 
                num = i40e_generate_inset_mask_reg(input_set, mask_reg,
                                                   I40E_INSET_MASK_NUM_REG);
                if (num < 0)
                        return;
-               inset_reg = i40e_translate_input_set_reg(input_set);
+               inset_reg = i40e_translate_input_set_reg(hw->mac.type,
+                                       input_set);
 
                i40e_check_write_reg(hw, I40E_PRTQF_FD_INSET(pctype, 0),
                                      (uint32_t)(inset_reg & UINT32_MAX));
@@ -7680,7 +8116,15 @@ i40e_hash_filter_inset_select(struct i40e_hw *hw,
                PMD_DRV_LOG(ERR, "invalid flow_type input.");
                return -EINVAL;
        }
-       pctype = i40e_flowtype_to_pctype(conf->flow_type);
+
+       if (hw->mac.type == I40E_MAC_X722) {
+               /* get translated pctype value in fd pctype register */
+               pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl(hw,
+                       I40E_GLQF_FD_PCTYPES((int)i40e_flowtype_to_pctype(
+                       conf->flow_type)));
+       } else
+               pctype = i40e_flowtype_to_pctype(conf->flow_type);
+
        ret = i40e_parse_input_set(&input_set, pctype, conf->field,
                                   conf->inset_size);
        if (ret) {
@@ -7704,7 +8148,7 @@ i40e_hash_filter_inset_select(struct i40e_hw *hw,
        if (num < 0)
                return -EINVAL;
 
-       inset_reg |= i40e_translate_input_set_reg(input_set);
+       inset_reg |= i40e_translate_input_set_reg(hw->mac.type, input_set);
 
        i40e_check_write_reg(hw, I40E_GLQF_HASH_INSET(0, pctype),
                              (uint32_t)(inset_reg & UINT32_MAX));
@@ -7749,7 +8193,9 @@ i40e_fdir_filter_inset_select(struct i40e_pf *pf,
                PMD_DRV_LOG(ERR, "invalid flow_type input.");
                return -EINVAL;
        }
+
        pctype = i40e_flowtype_to_pctype(conf->flow_type);
+
        ret = i40e_parse_input_set(&input_set, pctype, conf->field,
                                   conf->inset_size);
        if (ret) {
@@ -7780,7 +8226,7 @@ i40e_fdir_filter_inset_select(struct i40e_pf *pf,
        if (num < 0)
                return -EINVAL;
 
-       inset_reg |= i40e_translate_input_set_reg(input_set);
+       inset_reg |= i40e_translate_input_set_reg(hw->mac.type, input_set);
 
        i40e_check_write_reg(hw, I40E_PRTQF_FD_INSET(pctype, 0),
                              (uint32_t)(inset_reg & UINT32_MAX));
@@ -7893,16 +8339,95 @@ i40e_hash_filter_ctrl(struct rte_eth_dev *dev,
        return ret;
 }
 
+/* Convert ethertype filter structure */
+static int
+i40e_ethertype_filter_convert(const struct rte_eth_ethertype_filter *input,
+                             struct i40e_ethertype_filter *filter)
+{
+       rte_memcpy(&filter->input.mac_addr, &input->mac_addr, ETHER_ADDR_LEN);
+       filter->input.ether_type = input->ether_type;
+       filter->flags = input->flags;
+       filter->queue = input->queue;
+
+       return 0;
+}
+
+/* Check if there exists the ehtertype filter */
+struct i40e_ethertype_filter *
+i40e_sw_ethertype_filter_lookup(struct i40e_ethertype_rule *ethertype_rule,
+                               const struct i40e_ethertype_filter_input *input)
+{
+       int ret;
+
+       ret = rte_hash_lookup(ethertype_rule->hash_table, (const void *)input);
+       if (ret < 0)
+               return NULL;
+
+       return ethertype_rule->hash_map[ret];
+}
+
+/* Add ethertype filter in SW list */
+static int
+i40e_sw_ethertype_filter_insert(struct i40e_pf *pf,
+                               struct i40e_ethertype_filter *filter)
+{
+       struct i40e_ethertype_rule *rule = &pf->ethertype;
+       int ret;
+
+       ret = rte_hash_add_key(rule->hash_table, &filter->input);
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR,
+                           "Failed to insert ethertype filter"
+                           " to hash table %d!",
+                           ret);
+               return ret;
+       }
+       rule->hash_map[ret] = filter;
+
+       TAILQ_INSERT_TAIL(&rule->ethertype_list, filter, rules);
+
+       return 0;
+}
+
+/* Delete ethertype filter in SW list */
+int
+i40e_sw_ethertype_filter_del(struct i40e_pf *pf,
+                            struct i40e_ethertype_filter_input *input)
+{
+       struct i40e_ethertype_rule *rule = &pf->ethertype;
+       struct i40e_ethertype_filter *filter;
+       int ret;
+
+       ret = rte_hash_del_key(rule->hash_table, input);
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR,
+                           "Failed to delete ethertype filter"
+                           " to hash table %d!",
+                           ret);
+               return ret;
+       }
+       filter = rule->hash_map[ret];
+       rule->hash_map[ret] = NULL;
+
+       TAILQ_REMOVE(&rule->ethertype_list, filter, rules);
+       rte_free(filter);
+
+       return 0;
+}
+
 /*
  * Configure ethertype filter, which can director packet by filtering
  * with mac address and ether_type or only ether_type
  */
-static int
+int
 i40e_ethertype_filter_set(struct i40e_pf *pf,
                        struct rte_eth_ethertype_filter *filter,
                        bool add)
 {
        struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+       struct i40e_ethertype_rule *ethertype_rule = &pf->ethertype;
+       struct i40e_ethertype_filter *ethertype_filter, *node;
+       struct i40e_ethertype_filter check_filter;
        struct i40e_control_filter_stats stats;
        uint16_t flags = 0;
        int ret;
@@ -7913,13 +8438,29 @@ i40e_ethertype_filter_set(struct i40e_pf *pf,
        }
        if (filter->ether_type == ETHER_TYPE_IPv4 ||
                filter->ether_type == ETHER_TYPE_IPv6) {
-               PMD_DRV_LOG(ERR, "unsupported ether_type(0x%04x) in"
-                       " control packet filter.", filter->ether_type);
+               PMD_DRV_LOG(ERR,
+                       "unsupported ether_type(0x%04x) in control packet filter.",
+                       filter->ether_type);
                return -EINVAL;
        }
        if (filter->ether_type == ETHER_TYPE_VLAN)
-               PMD_DRV_LOG(WARNING, "filter vlan ether_type in first tag is"
-                       " not supported.");
+               PMD_DRV_LOG(WARNING,
+                       "filter vlan ether_type in first tag is not supported.");
+
+       /* Check if there is the filter in SW list */
+       memset(&check_filter, 0, sizeof(check_filter));
+       i40e_ethertype_filter_convert(filter, &check_filter);
+       node = i40e_sw_ethertype_filter_lookup(ethertype_rule,
+                                              &check_filter.input);
+       if (add && node) {
+               PMD_DRV_LOG(ERR, "Conflict with existing ethertype rules!");
+               return -EINVAL;
+       }
+
+       if (!add && !node) {
+               PMD_DRV_LOG(ERR, "There's no corresponding ethertype filter!");
+               return -EINVAL;
+       }
 
        if (!(filter->flags & RTE_ETHTYPE_FLAGS_MAC))
                flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC;
@@ -7934,14 +8475,25 @@ i40e_ethertype_filter_set(struct i40e_pf *pf,
                        pf->main_vsi->seid,
                        filter->queue, add, &stats, NULL);
 
-       PMD_DRV_LOG(INFO, "add/rem control packet filter, return %d,"
-                        " mac_etype_used = %u, etype_used = %u,"
-                        " mac_etype_free = %u, etype_free = %u\n",
-                        ret, stats.mac_etype_used, stats.etype_used,
-                        stats.mac_etype_free, stats.etype_free);
+       PMD_DRV_LOG(INFO,
+               "add/rem control packet filter, return %d, mac_etype_used = %u, etype_used = %u, mac_etype_free = %u, etype_free = %u",
+               ret, stats.mac_etype_used, stats.etype_used,
+               stats.mac_etype_free, stats.etype_free);
        if (ret < 0)
                return -ENOSYS;
-       return 0;
+
+       /* Add or delete a filter in SW list */
+       if (add) {
+               ethertype_filter = rte_zmalloc("ethertype_filter",
+                                      sizeof(*ethertype_filter), 0);
+               rte_memcpy(ethertype_filter, &check_filter,
+                          sizeof(check_filter));
+               ret = i40e_sw_ethertype_filter_insert(pf, ethertype_filter);
+       } else {
+               ret = i40e_sw_ethertype_filter_del(pf, &node->input);
+       }
+
+       return ret;
 }
 
 /*
@@ -7976,7 +8528,7 @@ i40e_ethertype_filter_handle(struct rte_eth_dev *dev,
                        FALSE);
                break;
        default:
-               PMD_DRV_LOG(ERR, "unsupported operation %u\n", filter_op);
+               PMD_DRV_LOG(ERR, "unsupported operation %u", filter_op);
                ret = -ENOSYS;
                break;
        }
@@ -8014,6 +8566,11 @@ i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
        case RTE_ETH_FILTER_FDIR:
                ret = i40e_fdir_ctrl_func(dev, filter_op, arg);
                break;
+       case RTE_ETH_FILTER_GENERIC:
+               if (filter_op != RTE_ETH_FILTER_GET)
+                       return -EINVAL;
+               *(const void **)arg = &i40e_flow_ops;
+               break;
        default:
                PMD_DRV_LOG(WARNING, "Filter type (%d) not supported",
                                                        filter_type);
@@ -8031,10 +8588,11 @@ i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
 static void
 i40e_enable_extended_tag(struct rte_eth_dev *dev)
 {
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
        uint32_t buf = 0;
        int ret;
 
-       ret = rte_eal_pci_read_config(dev->pci_dev, &buf, sizeof(buf),
+       ret = rte_eal_pci_read_config(pci_dev, &buf, sizeof(buf),
                                      PCI_DEV_CAP_REG);
        if (ret < 0) {
                PMD_DRV_LOG(ERR, "Failed to read PCI offset 0x%x",
@@ -8047,7 +8605,7 @@ i40e_enable_extended_tag(struct rte_eth_dev *dev)
        }
 
        buf = 0;
-       ret = rte_eal_pci_read_config(dev->pci_dev, &buf, sizeof(buf),
+       ret = rte_eal_pci_read_config(pci_dev, &buf, sizeof(buf),
                                      PCI_DEV_CTRL_REG);
        if (ret < 0) {
                PMD_DRV_LOG(ERR, "Failed to read PCI offset 0x%x",
@@ -8059,7 +8617,7 @@ i40e_enable_extended_tag(struct rte_eth_dev *dev)
                return;
        }
        buf |= PCI_DEV_CTRL_EXT_TAG_MASK;
-       ret = rte_eal_pci_write_config(dev->pci_dev, &buf, sizeof(buf),
+       ret = rte_eal_pci_write_config(pci_dev, &buf, sizeof(buf),
                                       PCI_DEV_CTRL_REG);
        if (ret < 0) {
                PMD_DRV_LOG(ERR, "Failed to write PCI offset 0x%x",
@@ -8122,8 +8680,14 @@ i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype)
                [I40E_FILTER_PCTYPE_FRAG_IPV4] = RTE_ETH_FLOW_FRAG_IPV4,
                [I40E_FILTER_PCTYPE_NONF_IPV4_UDP] =
                        RTE_ETH_FLOW_NONFRAG_IPV4_UDP,
+               [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] =
+                       RTE_ETH_FLOW_NONFRAG_IPV4_UDP,
+               [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] =
+                       RTE_ETH_FLOW_NONFRAG_IPV4_UDP,
                [I40E_FILTER_PCTYPE_NONF_IPV4_TCP] =
                        RTE_ETH_FLOW_NONFRAG_IPV4_TCP,
+               [I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] =
+                       RTE_ETH_FLOW_NONFRAG_IPV4_TCP,
                [I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] =
                        RTE_ETH_FLOW_NONFRAG_IPV4_SCTP,
                [I40E_FILTER_PCTYPE_NONF_IPV4_OTHER] =
@@ -8131,8 +8695,14 @@ i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype)
                [I40E_FILTER_PCTYPE_FRAG_IPV6] = RTE_ETH_FLOW_FRAG_IPV6,
                [I40E_FILTER_PCTYPE_NONF_IPV6_UDP] =
                        RTE_ETH_FLOW_NONFRAG_IPV6_UDP,
+               [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] =
+                       RTE_ETH_FLOW_NONFRAG_IPV6_UDP,
+               [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] =
+                       RTE_ETH_FLOW_NONFRAG_IPV6_UDP,
                [I40E_FILTER_PCTYPE_NONF_IPV6_TCP] =
                        RTE_ETH_FLOW_NONFRAG_IPV6_TCP,
+               [I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] =
+                       RTE_ETH_FLOW_NONFRAG_IPV6_TCP,
                [I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] =
                        RTE_ETH_FLOW_NONFRAG_IPV6_SCTP,
                [I40E_FILTER_PCTYPE_NONF_IPV6_OTHER] =
@@ -8168,6 +8738,23 @@ i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype)
 #define I40E_GL_SWR_PM_UP_THR_SF_VALUE   0x06060606
 #define I40E_GL_SWR_PM_UP_THR            0x269FBC
 
+static int
+i40e_dev_sync_phy_type(struct i40e_hw *hw)
+{
+       enum i40e_status_code status;
+       struct i40e_aq_get_phy_abilities_resp phy_ab;
+       int ret = -ENOTSUP;
+
+       status = i40e_aq_get_phy_capabilities(hw, false, true, &phy_ab,
+                                             NULL);
+
+       if (status)
+               return ret;
+
+       return 0;
+}
+
+
 static void
 i40e_configure_registers(struct i40e_hw *hw)
 {
@@ -8185,7 +8772,8 @@ i40e_configure_registers(struct i40e_hw *hw)
 
        for (i = 0; i < RTE_DIM(reg_table); i++) {
                if (reg_table[i].addr == I40E_GL_SWR_PM_UP_THR) {
-                       if (i40e_is_40G_device(hw->device_id)) /* For XL710 */
+                       if (I40E_PHY_TYPE_SUPPORT_40G(hw->phy.phy_types) || /* For XL710 */
+                           I40E_PHY_TYPE_SUPPORT_25G(hw->phy.phy_types)) /* For XXV710 */
                                reg_table[i].val =
                                        I40E_GL_SWR_PM_UP_THR_SF_VALUE;
                        else /* For X710 */
@@ -8208,9 +8796,9 @@ i40e_configure_registers(struct i40e_hw *hw)
                ret = i40e_aq_debug_write_register(hw, reg_table[i].addr,
                                                reg_table[i].val, NULL);
                if (ret < 0) {
-                       PMD_DRV_LOG(ERR, "Failed to write 0x%"PRIx64" to the "
-                               "address of 0x%"PRIx32, reg_table[i].val,
-                                                       reg_table[i].addr);
+                       PMD_DRV_LOG(ERR,
+                               "Failed to write 0x%"PRIx64" to the address of 0x%"PRIx32,
+                               reg_table[i].val, reg_table[i].addr);
                        break;
                }
                PMD_DRV_LOG(DEBUG, "Write 0x%"PRIx64" to the address of "
@@ -8255,8 +8843,9 @@ i40e_config_qinq(struct i40e_hw *hw, struct i40e_vsi *vsi)
                                                   I40E_VSI_L2TAGSTXVALID(
                                                   vsi->vsi_id), reg, NULL);
                if (ret < 0) {
-                       PMD_DRV_LOG(ERR, "Failed to update "
-                               "VSI_L2TAGSTXVALID[%d]", vsi->vsi_id);
+                       PMD_DRV_LOG(ERR,
+                               "Failed to update VSI_L2TAGSTXVALID[%d]",
+                               vsi->vsi_id);
                        return I40E_ERR_CONFIG;
                }
        }
@@ -8307,11 +8896,10 @@ i40e_aq_add_mirror_rule(struct i40e_hw *hw,
 
        rte_memcpy(&desc.params.raw, &cmd, sizeof(cmd));
        status = i40e_asq_send_command(hw, &desc, entries, buff_len, NULL);
-       PMD_DRV_LOG(INFO, "i40e_aq_add_mirror_rule, aq_status %d,"
-                        "rule_id = %u"
-                        " mirror_rules_used = %u, mirror_rules_free = %u,",
-                        hw->aq.asq_last_status, resp->rule_id,
-                        resp->mirror_rules_used, resp->mirror_rules_free);
+       PMD_DRV_LOG(INFO,
+               "i40e_aq_add_mirror_rule, aq_status %d, rule_id = %u mirror_rules_used = %u, mirror_rules_free = %u,",
+               hw->aq.asq_last_status, resp->rule_id,
+               resp->mirror_rules_used, resp->mirror_rules_free);
        *rule_id = rte_le_to_cpu_16(resp->rule_id);
 
        return status;
@@ -8389,8 +8977,8 @@ i40e_mirror_rule_set(struct rte_eth_dev *dev,
        PMD_DRV_LOG(DEBUG, "i40e_mirror_rule_set: sw_id = %d.", sw_id);
 
        if (pf->main_vsi->veb == NULL || pf->vfs == NULL) {
-               PMD_DRV_LOG(ERR, "mirror rule can not be configured"
-                       " without veb or vfs.");
+               PMD_DRV_LOG(ERR,
+                       "mirror rule can not be configured without veb or vfs.");
                return -ENOSYS;
        }
        if (pf->nb_mirror_rule > I40E_MAX_MIRROR_RULES) {
@@ -8422,9 +9010,9 @@ i40e_mirror_rule_set(struct rte_eth_dev *dev,
                                        mirr_rule->entries,
                                        mirr_rule->num_entries, mirr_rule->id);
                        if (ret < 0) {
-                               PMD_DRV_LOG(ERR, "failed to remove mirror rule:"
-                                                  " ret = %d, aq_err = %d.",
-                                                  ret, hw->aq.asq_last_status);
+                               PMD_DRV_LOG(ERR,
+                                       "failed to remove mirror rule: ret = %d, aq_err = %d.",
+                                       ret, hw->aq.asq_last_status);
                                return -ENOSYS;
                        }
                        TAILQ_REMOVE(&pf->mirror_list, mirr_rule, rules);
@@ -8513,9 +9101,9 @@ i40e_mirror_rule_set(struct rte_eth_dev *dev,
                                      mirr_rule->rule_type, mirr_rule->entries,
                                      j, &rule_id);
        if (ret < 0) {
-               PMD_DRV_LOG(ERR, "failed to add mirror rule:"
-                                  " ret = %d, aq_err = %d.",
-                                  ret, hw->aq.asq_last_status);
+               PMD_DRV_LOG(ERR,
+                       "failed to add mirror rule: ret = %d, aq_err = %d.",
+                       ret, hw->aq.asq_last_status);
                rte_free(mirr_rule);
                return -ENOSYS;
        }
@@ -8567,9 +9155,9 @@ i40e_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t sw_id)
                                mirr_rule->entries,
                                mirr_rule->num_entries, mirr_rule->id);
                if (ret < 0) {
-                       PMD_DRV_LOG(ERR, "failed to remove mirror rule:"
-                                          " status = %d, aq_err = %d.",
-                                          ret, hw->aq.asq_last_status);
+                       PMD_DRV_LOG(ERR,
+                               "failed to remove mirror rule: status = %d, aq_err = %d.",
+                               ret, hw->aq.asq_last_status);
                        return -ENOSYS;
                }
                TAILQ_REMOVE(&pf->mirror_list, mirr_rule, rules);
@@ -9001,9 +9589,9 @@ i40e_config_switch_comp_tc(struct i40e_veb *veb, uint8_t tc_map)
        ret = i40e_aq_config_switch_comp_bw_config(hw, veb->seid,
                                                   &veb_bw, NULL);
        if (ret) {
-               PMD_INIT_LOG(ERR, "AQ command Config switch_comp BW allocation"
-                                 " per TC failed = %d",
-                                 hw->aq.asq_last_status);
+               PMD_INIT_LOG(ERR,
+                       "AQ command Config switch_comp BW allocation per TC failed = %d",
+                       hw->aq.asq_last_status);
                return ret;
        }
 
@@ -9011,16 +9599,18 @@ i40e_config_switch_comp_tc(struct i40e_veb *veb, uint8_t tc_map)
        ret = i40e_aq_query_switch_comp_ets_config(hw, veb->seid,
                                                   &ets_query, NULL);
        if (ret != I40E_SUCCESS) {
-               PMD_DRV_LOG(ERR, "Failed to get switch_comp ETS"
-                                " configuration %u", hw->aq.asq_last_status);
+               PMD_DRV_LOG(ERR,
+                       "Failed to get switch_comp ETS configuration %u",
+                       hw->aq.asq_last_status);
                return ret;
        }
        memset(&bw_query, 0, sizeof(bw_query));
        ret = i40e_aq_query_switch_comp_bw_config(hw, veb->seid,
                                                  &bw_query, NULL);
        if (ret != I40E_SUCCESS) {
-               PMD_DRV_LOG(ERR, "Failed to get switch_comp bandwidth"
-                                " configuration %u", hw->aq.asq_last_status);
+               PMD_DRV_LOG(ERR,
+                       "Failed to get switch_comp bandwidth configuration %u",
+                       hw->aq.asq_last_status);
                return ret;
        }
 
@@ -9085,8 +9675,8 @@ i40e_vsi_config_tc(struct i40e_vsi *vsi, uint8_t tc_map)
        }
        ret = i40e_aq_config_vsi_tc_bw(hw, vsi->seid, &bw_data, NULL);
        if (ret) {
-               PMD_INIT_LOG(ERR, "AQ command Config VSI BW allocation"
-                       " per TC failed = %d",
+               PMD_INIT_LOG(ERR,
+                       "AQ command Config VSI BW allocation per TC failed = %d",
                        hw->aq.asq_last_status);
                goto out;
        }
@@ -9107,9 +9697,8 @@ i40e_vsi_config_tc(struct i40e_vsi *vsi, uint8_t tc_map)
        /* Update the VSI after updating the VSI queue-mapping information */
        ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
        if (ret) {
-               PMD_INIT_LOG(ERR, "Failed to configure "
-                           "TC queue mapping = %d",
-                           hw->aq.asq_last_status);
+               PMD_INIT_LOG(ERR, "Failed to configure TC queue mapping = %d",
+                       hw->aq.asq_last_status);
                goto out;
        }
        /* update the local VSI info with updated queue map */
@@ -9161,8 +9750,8 @@ i40e_dcb_hw_configure(struct i40e_pf *pf,
        /* Use the FW API if FW > v4.4*/
        if (!(((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver >= 4)) ||
              (hw->aq.fw_maj_ver >= 5))) {
-               PMD_INIT_LOG(ERR, "FW < v4.4, can not use FW LLDP API"
-                                 " to configure DCB");
+               PMD_INIT_LOG(ERR,
+                       "FW < v4.4, can not use FW LLDP API to configure DCB");
                return I40E_ERR_FIRMWARE_API_VERSION;
        }
 
@@ -9177,8 +9766,7 @@ i40e_dcb_hw_configure(struct i40e_pf *pf,
        old_cfg->etsrec = old_cfg->etscfg;
        ret = i40e_set_dcb_config(hw);
        if (ret) {
-               PMD_INIT_LOG(ERR,
-                        "Set DCB Config failed, err %s aq_err %s\n",
+               PMD_INIT_LOG(ERR, "Set DCB Config failed, err %s aq_err %s",
                         i40e_stat_str(hw, ret),
                         i40e_aq_str(hw, hw->aq.asq_last_status));
                return ret;
@@ -9210,7 +9798,7 @@ i40e_dcb_hw_configure(struct i40e_pf *pf,
                ret = i40e_config_switch_comp_tc(main_vsi->veb, tc_map);
                if (ret)
                        PMD_INIT_LOG(WARNING,
-                                "Failed configuring TC for VEB seid=%d\n",
+                                "Failed configuring TC for VEB seid=%d",
                                 main_vsi->veb->seid);
        }
        /* Update each VSI */
@@ -9228,8 +9816,8 @@ i40e_dcb_hw_configure(struct i40e_pf *pf,
                                                         I40E_DEFAULT_TCMAP);
                        if (ret)
                                PMD_INIT_LOG(WARNING,
-                                        "Failed configuring TC for VSI seid=%d\n",
-                                        vsi_list->vsi->seid);
+                                       "Failed configuring TC for VSI seid=%d",
+                                       vsi_list->vsi->seid);
                        /* continue */
                }
        }
@@ -9243,7 +9831,6 @@ i40e_dcb_hw_configure(struct i40e_pf *pf,
  *
  * Returns 0 on success, negative value on failure
  */
-//TREX_PATCH - changed all ERR to INFO in below func
 static int
 i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb)
 {
@@ -9252,7 +9839,7 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb)
        int ret = 0;
 
        if ((pf->flags & I40E_FLAG_DCB) == 0) {
-               PMD_INIT_LOG(INFO, "HW doesn't support DCB");
+               PMD_INIT_LOG(ERR, "HW doesn't support DCB");
                return -ENOTSUP;
        }
 
@@ -9261,29 +9848,21 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb)
         * LLDP MIB change event.
         */
        if (sw_dcb == TRUE) {
-               ret = i40e_aq_stop_lldp(hw, TRUE, NULL);
-               if (ret != I40E_SUCCESS)
-                       PMD_INIT_LOG(DEBUG, "Failed to stop lldp");
-
                ret = i40e_init_dcb(hw);
-               /* if sw_dcb, lldp agent is stopped, the return from
+               /* If lldp agent is stopped, the return value from
                 * i40e_init_dcb we expect is failure with I40E_AQ_RC_EPERM
-                * adminq status.
+                * adminq status. Otherwise, it should return success.
                 */
-               if (ret != I40E_SUCCESS &&
-                   hw->aq.asq_last_status == I40E_AQ_RC_EPERM) {
+               if ((ret == I40E_SUCCESS) || (ret != I40E_SUCCESS &&
+                   hw->aq.asq_last_status == I40E_AQ_RC_EPERM)) {
                        memset(&hw->local_dcbx_config, 0,
                                sizeof(struct i40e_dcbx_config));
                        /* set dcb default configuration */
                        hw->local_dcbx_config.etscfg.willing = 0;
                        hw->local_dcbx_config.etscfg.maxtcs = 0;
                        hw->local_dcbx_config.etscfg.tcbwtable[0] = 100;
-                       hw->local_dcbx_config.etscfg.tsatable[0] = I40E_IEEE_TSA_ETS;
-#ifdef TREX_PATCH_LOW_LATENCY
-            hw->local_dcbx_config.etscfg.tcbwtable[1] = 0;
-            hw->local_dcbx_config.etscfg.tsatable[1] = I40E_IEEE_TSA_STRICT;
-            hw->local_dcbx_config.etscfg.prioritytable[1] = 1;
-#endif
+                       hw->local_dcbx_config.etscfg.tsatable[0] =
+                                               I40E_IEEE_TSA_ETS;
                        hw->local_dcbx_config.etsrec =
                                hw->local_dcbx_config.etscfg;
                        hw->local_dcbx_config.pfc.willing = 0;
@@ -9298,22 +9877,15 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb)
                                                I40E_APP_PROTOID_FCOE;
                        ret = i40e_set_dcb_config(hw);
                        if (ret) {
-                               PMD_INIT_LOG(INFO, "default dcb config fails."
-                                       " err = %d, aq_err = %d.", ret,
-                                         hw->aq.asq_last_status);
+                               PMD_INIT_LOG(ERR,
+                                       "default dcb config fails. err = %d, aq_err = %d.",
+                                       ret, hw->aq.asq_last_status);
                                return -ENOSYS;
                        }
-#ifdef TREX_PATCH_LOW_LATENCY
-            if (i40e_vsi_update_tc_bandwidth_ex(pf->main_vsi) !=
-                I40E_SUCCESS) {
-                PMD_DRV_LOG(ERR, "Failed to update TC bandwidth");
-                return -ENOSYS;
-            }
-#endif
                } else {
-                       PMD_INIT_LOG(INFO, "DCBX configuration failed, err = %d,"
-                                         " aq_err = %d.", ret,
-                                         hw->aq.asq_last_status);
+                       PMD_INIT_LOG(ERR,
+                               "DCB initialization in FW fails, err = %d, aq_err = %d.",
+                               ret, hw->aq.asq_last_status);
                        return -ENOTSUP;
                }
        } else {
@@ -9324,14 +9896,14 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb)
                ret = i40e_init_dcb(hw);
                if (!ret) {
                        if (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED) {
-                               PMD_INIT_LOG(INFO, "HW doesn't support"
-                                                 " DCBX offload.");
+                               PMD_INIT_LOG(ERR,
+                                       "HW doesn't support DCBX offload.");
                                return -ENOTSUP;
                        }
                } else {
-                       PMD_INIT_LOG(INFO, "DCBX configuration failed, err = %d,"
-                                         " aq_err = %d.", ret,
-                                         hw->aq.asq_last_status);
+                       PMD_INIT_LOG(ERR,
+                               "DCBX configuration failed, err = %d, aq_err = %d.",
+                               ret, hw->aq.asq_last_status);
                        return -ENOTSUP;
                }
        }
@@ -9440,7 +10012,8 @@ i40e_dev_get_dcb_info(struct rte_eth_dev *dev,
 static int
 i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint16_t interval =
                i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL);
@@ -9465,7 +10038,7 @@ i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
                                I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT));
 
        I40E_WRITE_FLUSH(hw);
-       rte_intr_enable(&dev->pci_dev->intr_handle);
+       rte_intr_enable(&pci_dev->intr_handle);
 
        return 0;
 }
@@ -9473,7 +10046,8 @@ i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 static int
 i40e_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint16_t msix_intr;
 
@@ -9605,8 +10179,7 @@ i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 
        /* mtu setting is forbidden if port is start */
        if (dev_data->dev_started) {
-               PMD_DRV_LOG(ERR,
-                           "port %d must be stopped before configuration\n",
+               PMD_DRV_LOG(ERR, "port %d must be stopped before configuration",
                            dev_data->port_id);
                return -EBUSY;
        }
@@ -9620,3 +10193,997 @@ i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 
        return ret;
 }
+
+/* Restore ethertype filter */
+static void
+i40e_ethertype_filter_restore(struct i40e_pf *pf)
+{
+       struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+       struct i40e_ethertype_filter_list
+               *ethertype_list = &pf->ethertype.ethertype_list;
+       struct i40e_ethertype_filter *f;
+       struct i40e_control_filter_stats stats;
+       uint16_t flags;
+
+       TAILQ_FOREACH(f, ethertype_list, rules) {
+               flags = 0;
+               if (!(f->flags & RTE_ETHTYPE_FLAGS_MAC))
+                       flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC;
+               if (f->flags & RTE_ETHTYPE_FLAGS_DROP)
+                       flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP;
+               flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TO_QUEUE;
+
+               memset(&stats, 0, sizeof(stats));
+               i40e_aq_add_rem_control_packet_filter(hw,
+                                           f->input.mac_addr.addr_bytes,
+                                           f->input.ether_type,
+                                           flags, pf->main_vsi->seid,
+                                           f->queue, 1, &stats, NULL);
+       }
+       PMD_DRV_LOG(INFO, "Ethertype filter:"
+                   " mac_etype_used = %u, etype_used = %u,"
+                   " mac_etype_free = %u, etype_free = %u",
+                   stats.mac_etype_used, stats.etype_used,
+                   stats.mac_etype_free, stats.etype_free);
+}
+
+/* Restore tunnel filter */
+static void
+i40e_tunnel_filter_restore(struct i40e_pf *pf)
+{
+       struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+       struct i40e_vsi *vsi = pf->main_vsi;
+       struct i40e_tunnel_filter_list
+               *tunnel_list = &pf->tunnel.tunnel_list;
+       struct i40e_tunnel_filter *f;
+       struct i40e_aqc_add_remove_cloud_filters_element_data cld_filter;
+
+       TAILQ_FOREACH(f, tunnel_list, rules) {
+               memset(&cld_filter, 0, sizeof(cld_filter));
+               rte_memcpy(&cld_filter, &f->input, sizeof(f->input));
+               cld_filter.queue_number = f->queue;
+               i40e_aq_add_cloud_filters(hw, vsi->seid, &cld_filter, 1);
+       }
+}
+
+static void
+i40e_filter_restore(struct i40e_pf *pf)
+{
+       i40e_ethertype_filter_restore(pf);
+       i40e_tunnel_filter_restore(pf);
+       i40e_fdir_filter_restore(pf);
+}
+
+static int
+is_i40e_pmd(const char *driver_name)
+{
+       if (!strstr(driver_name, "i40e"))
+               return -ENOTSUP;
+
+       if (strstr(driver_name, "i40e_vf"))
+               return -ENOTSUP;
+
+       return 0;
+}
+
+int
+rte_pmd_i40e_ping_vfs(uint8_t port, uint16_t vf)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+       if (vf >= pf->vf_num || !pf->vfs) {
+               PMD_DRV_LOG(ERR, "Invalid argument.");
+               return -EINVAL;
+       }
+
+       i40e_notify_vf_link_status(dev, &pf->vfs[vf]);
+
+       return 0;
+}
+
+int
+rte_pmd_i40e_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf_id, uint8_t on)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+       struct i40e_vsi *vsi;
+       struct i40e_hw *hw;
+       struct i40e_vsi_context ctxt;
+       int ret;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+       if (vf_id >= pf->vf_num || !pf->vfs) {
+               PMD_DRV_LOG(ERR, "Invalid argument.");
+               return -EINVAL;
+       }
+
+       vsi = pf->vfs[vf_id].vsi;
+       if (!vsi) {
+               PMD_DRV_LOG(ERR, "Invalid VSI.");
+               return -EINVAL;
+       }
+
+       /* Check if it has been already on or off */
+       if (vsi->info.valid_sections &
+               rte_cpu_to_le_16(I40E_AQ_VSI_PROP_SECURITY_VALID)) {
+               if (on) {
+                       if ((vsi->info.sec_flags &
+                            I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK) ==
+                           I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK)
+                               return 0; /* already on */
+               } else {
+                       if ((vsi->info.sec_flags &
+                            I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK) == 0)
+                               return 0; /* already off */
+               }
+       }
+
+       vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID);
+       if (on)
+               vsi->info.sec_flags |= I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK;
+       else
+               vsi->info.sec_flags &= ~I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK;
+
+       memset(&ctxt, 0, sizeof(ctxt));
+       (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+       ctxt.seid = vsi->seid;
+
+       hw = I40E_VSI_TO_HW(vsi);
+       ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+       if (ret != I40E_SUCCESS) {
+               ret = -ENOTSUP;
+               PMD_DRV_LOG(ERR, "Failed to update VSI params");
+       }
+
+       return ret;
+}
+
+static int
+i40e_add_rm_all_vlan_filter(struct i40e_vsi *vsi, uint8_t add)
+{
+       uint32_t j, k;
+       uint16_t vlan_id;
+       struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
+       struct i40e_aqc_add_remove_vlan_element_data vlan_data = {0};
+       int ret;
+
+       for (j = 0; j < I40E_VFTA_SIZE; j++) {
+               if (!vsi->vfta[j])
+                       continue;
+
+               for (k = 0; k < I40E_UINT32_BIT_SIZE; k++) {
+                       if (!(vsi->vfta[j] & (1 << k)))
+                               continue;
+
+                       vlan_id = j * I40E_UINT32_BIT_SIZE + k;
+                       if (!vlan_id)
+                               continue;
+
+                       vlan_data.vlan_tag = rte_cpu_to_le_16(vlan_id);
+                       if (add)
+                               ret = i40e_aq_add_vlan(hw, vsi->seid,
+                                                      &vlan_data, 1, NULL);
+                       else
+                               ret = i40e_aq_remove_vlan(hw, vsi->seid,
+                                                         &vlan_data, 1, NULL);
+                       if (ret != I40E_SUCCESS) {
+                               PMD_DRV_LOG(ERR,
+                                           "Failed to add/rm vlan filter");
+                               return ret;
+                       }
+               }
+       }
+
+       return I40E_SUCCESS;
+}
+
+int
+rte_pmd_i40e_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf_id, uint8_t on)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+       struct i40e_vsi *vsi;
+       struct i40e_hw *hw;
+       struct i40e_vsi_context ctxt;
+       int ret;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+       if (vf_id >= pf->vf_num || !pf->vfs) {
+               PMD_DRV_LOG(ERR, "Invalid argument.");
+               return -EINVAL;
+       }
+
+       vsi = pf->vfs[vf_id].vsi;
+       if (!vsi) {
+               PMD_DRV_LOG(ERR, "Invalid VSI.");
+               return -EINVAL;
+       }
+
+       /* Check if it has been already on or off */
+       if (vsi->vlan_anti_spoof_on == on)
+               return 0; /* already on or off */
+
+       vsi->vlan_anti_spoof_on = on;
+       ret = i40e_add_rm_all_vlan_filter(vsi, on);
+       if (ret) {
+               PMD_DRV_LOG(ERR, "Failed to remove VLAN filters.");
+               return -ENOTSUP;
+       }
+
+       vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID);
+       if (on)
+               vsi->info.sec_flags |= I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK;
+       else
+               vsi->info.sec_flags &= ~I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK;
+
+       memset(&ctxt, 0, sizeof(ctxt));
+       (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+       ctxt.seid = vsi->seid;
+
+       hw = I40E_VSI_TO_HW(vsi);
+       ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+       if (ret != I40E_SUCCESS) {
+               ret = -ENOTSUP;
+               PMD_DRV_LOG(ERR, "Failed to update VSI params");
+       }
+
+       return ret;
+}
+
+static int
+i40e_vsi_rm_mac_filter(struct i40e_vsi *vsi)
+{
+       struct i40e_mac_filter *f;
+       struct i40e_macvlan_filter *mv_f;
+       int i, vlan_num;
+       enum rte_mac_filter_type filter_type;
+       int ret = I40E_SUCCESS;
+       void *temp;
+
+       /* remove all the MACs */
+       TAILQ_FOREACH_SAFE(f, &vsi->mac_list, next, temp) {
+               vlan_num = vsi->vlan_num;
+               filter_type = f->mac_info.filter_type;
+               if (filter_type == RTE_MACVLAN_PERFECT_MATCH ||
+                   filter_type == RTE_MACVLAN_HASH_MATCH) {
+                       if (vlan_num == 0) {
+                               PMD_DRV_LOG(ERR, "VLAN number shouldn't be 0");
+                               return I40E_ERR_PARAM;
+                       }
+               } else if (filter_type == RTE_MAC_PERFECT_MATCH ||
+                          filter_type == RTE_MAC_HASH_MATCH)
+                       vlan_num = 1;
+
+               mv_f = rte_zmalloc("macvlan_data", vlan_num * sizeof(*mv_f), 0);
+               if (!mv_f) {
+                       PMD_DRV_LOG(ERR, "failed to allocate memory");
+                       return I40E_ERR_NO_MEMORY;
+               }
+
+               for (i = 0; i < vlan_num; i++) {
+                       mv_f[i].filter_type = filter_type;
+                       (void)rte_memcpy(&mv_f[i].macaddr,
+                                        &f->mac_info.mac_addr,
+                                        ETH_ADDR_LEN);
+               }
+               if (filter_type == RTE_MACVLAN_PERFECT_MATCH ||
+                   filter_type == RTE_MACVLAN_HASH_MATCH) {
+                       ret = i40e_find_all_vlan_for_mac(vsi, mv_f, vlan_num,
+                                                        &f->mac_info.mac_addr);
+                       if (ret != I40E_SUCCESS) {
+                               rte_free(mv_f);
+                               return ret;
+                       }
+               }
+
+               ret = i40e_remove_macvlan_filters(vsi, mv_f, vlan_num);
+               if (ret != I40E_SUCCESS) {
+                       rte_free(mv_f);
+                       return ret;
+               }
+
+               rte_free(mv_f);
+               ret = I40E_SUCCESS;
+       }
+
+       return ret;
+}
+
+static int
+i40e_vsi_restore_mac_filter(struct i40e_vsi *vsi)
+{
+       struct i40e_mac_filter *f;
+       struct i40e_macvlan_filter *mv_f;
+       int i, vlan_num = 0;
+       int ret = I40E_SUCCESS;
+       void *temp;
+
+       /* restore all the MACs */
+       TAILQ_FOREACH_SAFE(f, &vsi->mac_list, next, temp) {
+               if ((f->mac_info.filter_type == RTE_MACVLAN_PERFECT_MATCH) ||
+                   (f->mac_info.filter_type == RTE_MACVLAN_HASH_MATCH)) {
+                       /**
+                        * If vlan_num is 0, that's the first time to add mac,
+                        * set mask for vlan_id 0.
+                        */
+                       if (vsi->vlan_num == 0) {
+                               i40e_set_vlan_filter(vsi, 0, 1);
+                               vsi->vlan_num = 1;
+                       }
+                       vlan_num = vsi->vlan_num;
+               } else if ((f->mac_info.filter_type == RTE_MAC_PERFECT_MATCH) ||
+                          (f->mac_info.filter_type == RTE_MAC_HASH_MATCH))
+                       vlan_num = 1;
+
+               mv_f = rte_zmalloc("macvlan_data", vlan_num * sizeof(*mv_f), 0);
+               if (!mv_f) {
+                       PMD_DRV_LOG(ERR, "failed to allocate memory");
+                       return I40E_ERR_NO_MEMORY;
+               }
+
+               for (i = 0; i < vlan_num; i++) {
+                       mv_f[i].filter_type = f->mac_info.filter_type;
+                       (void)rte_memcpy(&mv_f[i].macaddr,
+                                        &f->mac_info.mac_addr,
+                                        ETH_ADDR_LEN);
+               }
+
+               if (f->mac_info.filter_type == RTE_MACVLAN_PERFECT_MATCH ||
+                   f->mac_info.filter_type == RTE_MACVLAN_HASH_MATCH) {
+                       ret = i40e_find_all_vlan_for_mac(vsi, mv_f, vlan_num,
+                                                        &f->mac_info.mac_addr);
+                       if (ret != I40E_SUCCESS) {
+                               rte_free(mv_f);
+                               return ret;
+                       }
+               }
+
+               ret = i40e_add_macvlan_filters(vsi, mv_f, vlan_num);
+               if (ret != I40E_SUCCESS) {
+                       rte_free(mv_f);
+                       return ret;
+               }
+
+               rte_free(mv_f);
+               ret = I40E_SUCCESS;
+       }
+
+       return ret;
+}
+
+static int
+i40e_vsi_set_tx_loopback(struct i40e_vsi *vsi, uint8_t on)
+{
+       struct i40e_vsi_context ctxt;
+       struct i40e_hw *hw;
+       int ret;
+
+       if (!vsi)
+               return -EINVAL;
+
+       hw = I40E_VSI_TO_HW(vsi);
+
+       /* Use the FW API if FW >= v5.0 */
+       if (hw->aq.fw_maj_ver < 5) {
+               PMD_INIT_LOG(ERR, "FW < v5.0, cannot enable loopback");
+               return -ENOTSUP;
+       }
+
+       /* Check if it has been already on or off */
+       if (vsi->info.valid_sections &
+               rte_cpu_to_le_16(I40E_AQ_VSI_PROP_SWITCH_VALID)) {
+               if (on) {
+                       if ((vsi->info.switch_id &
+                            I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB) ==
+                           I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB)
+                               return 0; /* already on */
+               } else {
+                       if ((vsi->info.switch_id &
+                            I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB) == 0)
+                               return 0; /* already off */
+               }
+       }
+
+       /* remove all the MAC and VLAN first */
+       ret = i40e_vsi_rm_mac_filter(vsi);
+       if (ret) {
+               PMD_INIT_LOG(ERR, "Failed to remove MAC filters.");
+               return ret;
+       }
+       if (vsi->vlan_anti_spoof_on) {
+               ret = i40e_add_rm_all_vlan_filter(vsi, 0);
+               if (ret) {
+                       PMD_INIT_LOG(ERR, "Failed to remove VLAN filters.");
+                       return ret;
+               }
+       }
+
+       vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+       if (on)
+               vsi->info.switch_id |= I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB;
+       else
+               vsi->info.switch_id &= ~I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB;
+
+       memset(&ctxt, 0, sizeof(ctxt));
+       (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+       ctxt.seid = vsi->seid;
+
+       ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+       if (ret != I40E_SUCCESS) {
+               PMD_DRV_LOG(ERR, "Failed to update VSI params");
+               return ret;
+       }
+
+       /* add all the MAC and VLAN back */
+       ret = i40e_vsi_restore_mac_filter(vsi);
+       if (ret)
+               return ret;
+       if (vsi->vlan_anti_spoof_on) {
+               ret = i40e_add_rm_all_vlan_filter(vsi, 1);
+               if (ret)
+                       return ret;
+       }
+
+       return ret;
+}
+
+int
+rte_pmd_i40e_set_tx_loopback(uint8_t port, uint8_t on)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+       struct i40e_pf_vf *vf;
+       struct i40e_vsi *vsi;
+       uint16_t vf_id;
+       int ret;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+       /* setup PF TX loopback */
+       vsi = pf->main_vsi;
+       ret = i40e_vsi_set_tx_loopback(vsi, on);
+       if (ret)
+               return -ENOTSUP;
+
+       /* setup TX loopback for all the VFs */
+       if (!pf->vfs) {
+               /* if no VF, do nothing. */
+               return 0;
+       }
+
+       for (vf_id = 0; vf_id < pf->vf_num; vf_id++) {
+               vf = &pf->vfs[vf_id];
+               vsi = vf->vsi;
+
+               ret = i40e_vsi_set_tx_loopback(vsi, on);
+               if (ret)
+                       return -ENOTSUP;
+       }
+
+       return ret;
+}
+
+int
+rte_pmd_i40e_set_vf_unicast_promisc(uint8_t port, uint16_t vf_id, uint8_t on)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+       struct i40e_vsi *vsi;
+       struct i40e_hw *hw;
+       int ret;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+       if (vf_id >= pf->vf_num || !pf->vfs) {
+               PMD_DRV_LOG(ERR, "Invalid argument.");
+               return -EINVAL;
+       }
+
+       vsi = pf->vfs[vf_id].vsi;
+       if (!vsi) {
+               PMD_DRV_LOG(ERR, "Invalid VSI.");
+               return -EINVAL;
+       }
+
+       hw = I40E_VSI_TO_HW(vsi);
+
+       ret = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid,
+                                                 on, NULL, true);
+       if (ret != I40E_SUCCESS) {
+               ret = -ENOTSUP;
+               PMD_DRV_LOG(ERR, "Failed to set unicast promiscuous mode");
+       }
+
+       return ret;
+}
+
+int
+rte_pmd_i40e_set_vf_multicast_promisc(uint8_t port, uint16_t vf_id, uint8_t on)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+       struct i40e_vsi *vsi;
+       struct i40e_hw *hw;
+       int ret;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+       if (vf_id >= pf->vf_num || !pf->vfs) {
+               PMD_DRV_LOG(ERR, "Invalid argument.");
+               return -EINVAL;
+       }
+
+       vsi = pf->vfs[vf_id].vsi;
+       if (!vsi) {
+               PMD_DRV_LOG(ERR, "Invalid VSI.");
+               return -EINVAL;
+       }
+
+       hw = I40E_VSI_TO_HW(vsi);
+
+       ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid,
+                                                   on, NULL);
+       if (ret != I40E_SUCCESS) {
+               ret = -ENOTSUP;
+               PMD_DRV_LOG(ERR, "Failed to set multicast promiscuous mode");
+       }
+
+       return ret;
+}
+
+int
+rte_pmd_i40e_set_vf_mac_addr(uint8_t port, uint16_t vf_id,
+                            struct ether_addr *mac_addr)
+{
+       struct i40e_mac_filter *f;
+       struct rte_eth_dev *dev;
+       struct i40e_pf_vf *vf;
+       struct i40e_vsi *vsi;
+       struct i40e_pf *pf;
+       void *temp;
+
+       if (i40e_validate_mac_addr((u8 *)mac_addr) != I40E_SUCCESS)
+               return -EINVAL;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+       if (vf_id >= pf->vf_num || !pf->vfs)
+               return -EINVAL;
+
+       vf = &pf->vfs[vf_id];
+       vsi = vf->vsi;
+       if (!vsi) {
+               PMD_DRV_LOG(ERR, "Invalid VSI.");
+               return -EINVAL;
+       }
+
+       ether_addr_copy(mac_addr, &vf->mac_addr);
+
+       /* Remove all existing mac */
+       TAILQ_FOREACH_SAFE(f, &vsi->mac_list, next, temp)
+               i40e_vsi_delete_mac(vsi, &f->mac_info.mac_addr);
+
+       return 0;
+}
+
+/* Set vlan strip on/off for specific VF from host */
+int
+rte_pmd_i40e_set_vf_vlan_stripq(uint8_t port, uint16_t vf_id, uint8_t on)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+       struct i40e_vsi *vsi;
+       int ret;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+       if (vf_id >= pf->vf_num || !pf->vfs) {
+               PMD_DRV_LOG(ERR, "Invalid argument.");
+               return -EINVAL;
+       }
+
+       vsi = pf->vfs[vf_id].vsi;
+
+       if (!vsi)
+               return -EINVAL;
+
+       ret = i40e_vsi_config_vlan_stripping(vsi, !!on);
+       if (ret != I40E_SUCCESS) {
+               ret = -ENOTSUP;
+               PMD_DRV_LOG(ERR, "Failed to set VLAN stripping!");
+       }
+
+       return ret;
+}
+
+int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id,
+                                   uint16_t vlan_id)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+       struct i40e_hw *hw;
+       struct i40e_vsi *vsi;
+       struct i40e_vsi_context ctxt;
+       int ret;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       if (vlan_id > ETHER_MAX_VLAN_ID) {
+               PMD_DRV_LOG(ERR, "Invalid VLAN ID.");
+               return -EINVAL;
+       }
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       hw = I40E_PF_TO_HW(pf);
+
+       /**
+        * return -ENODEV if SRIOV not enabled, VF number not configured
+        * or no queue assigned.
+        */
+       if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 ||
+           pf->vf_nb_qps == 0)
+               return -ENODEV;
+
+       if (vf_id >= pf->vf_num || !pf->vfs) {
+               PMD_DRV_LOG(ERR, "Invalid VF ID.");
+               return -EINVAL;
+       }
+
+       vsi = pf->vfs[vf_id].vsi;
+       if (!vsi) {
+               PMD_DRV_LOG(ERR, "Invalid VSI.");
+               return -EINVAL;
+       }
+
+       vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
+       vsi->info.pvid = vlan_id;
+       if (vlan_id > 0)
+               vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_INSERT_PVID;
+       else
+               vsi->info.port_vlan_flags &= ~I40E_AQ_VSI_PVLAN_INSERT_PVID;
+
+       memset(&ctxt, 0, sizeof(ctxt));
+       (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+       ctxt.seid = vsi->seid;
+
+       hw = I40E_VSI_TO_HW(vsi);
+       ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+       if (ret != I40E_SUCCESS) {
+               ret = -ENOTSUP;
+               PMD_DRV_LOG(ERR, "Failed to update VSI params");
+       }
+
+       return ret;
+}
+
+int rte_pmd_i40e_set_vf_broadcast(uint8_t port, uint16_t vf_id,
+                                 uint8_t on)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+       struct i40e_vsi *vsi;
+       struct i40e_hw *hw;
+       int ret;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       if (on > 1) {
+               PMD_DRV_LOG(ERR, "on should be 0 or 1.");
+               return -EINVAL;
+       }
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       hw = I40E_PF_TO_HW(pf);
+
+       if (vf_id >= pf->vf_num || !pf->vfs) {
+               PMD_DRV_LOG(ERR, "Invalid VF ID.");
+               return -EINVAL;
+       }
+
+       /**
+        * return -ENODEV if SRIOV not enabled, VF number not configured
+        * or no queue assigned.
+        */
+       if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 ||
+           pf->vf_nb_qps == 0) {
+               PMD_DRV_LOG(ERR, "SRIOV is not enabled or no queue.");
+               return -ENODEV;
+       }
+
+       vsi = pf->vfs[vf_id].vsi;
+       if (!vsi) {
+               PMD_DRV_LOG(ERR, "Invalid VSI.");
+               return -EINVAL;
+       }
+
+       hw = I40E_VSI_TO_HW(vsi);
+
+       ret = i40e_aq_set_vsi_broadcast(hw, vsi->seid, on, NULL);
+       if (ret != I40E_SUCCESS) {
+               ret = -ENOTSUP;
+               PMD_DRV_LOG(ERR, "Failed to set VSI broadcast");
+       }
+
+       return ret;
+}
+
+int rte_pmd_i40e_set_vf_vlan_tag(uint8_t port, uint16_t vf_id, uint8_t on)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+       struct i40e_hw *hw;
+       struct i40e_vsi *vsi;
+       struct i40e_vsi_context ctxt;
+       int ret;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       if (on > 1) {
+               PMD_DRV_LOG(ERR, "on should be 0 or 1.");
+               return -EINVAL;
+       }
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       hw = I40E_PF_TO_HW(pf);
+
+       /**
+        * return -ENODEV if SRIOV not enabled, VF number not configured
+        * or no queue assigned.
+        */
+       if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 ||
+           pf->vf_nb_qps == 0) {
+               PMD_DRV_LOG(ERR, "SRIOV is not enabled or no queue.");
+               return -ENODEV;
+       }
+
+       if (vf_id >= pf->vf_num || !pf->vfs) {
+               PMD_DRV_LOG(ERR, "Invalid VF ID.");
+               return -EINVAL;
+       }
+
+       vsi = pf->vfs[vf_id].vsi;
+       if (!vsi) {
+               PMD_DRV_LOG(ERR, "Invalid VSI.");
+               return -EINVAL;
+       }
+
+       vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
+       if (on) {
+               vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_TAGGED;
+               vsi->info.port_vlan_flags &= ~I40E_AQ_VSI_PVLAN_MODE_UNTAGGED;
+       } else {
+               vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_UNTAGGED;
+               vsi->info.port_vlan_flags &= ~I40E_AQ_VSI_PVLAN_MODE_TAGGED;
+       }
+
+       memset(&ctxt, 0, sizeof(ctxt));
+       (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+       ctxt.seid = vsi->seid;
+
+       hw = I40E_VSI_TO_HW(vsi);
+       ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+       if (ret != I40E_SUCCESS) {
+               ret = -ENOTSUP;
+               PMD_DRV_LOG(ERR, "Failed to update VSI params");
+       }
+
+       return ret;
+}
+
+int rte_pmd_i40e_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id,
+                                   uint64_t vf_mask, uint8_t on)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+       struct i40e_hw *hw;
+       uint16_t vf_idx;
+       int ret = I40E_SUCCESS;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       if (vlan_id > ETHER_MAX_VLAN_ID) {
+               PMD_DRV_LOG(ERR, "Invalid VLAN ID.");
+               return -EINVAL;
+       }
+
+       if (vf_mask == 0) {
+               PMD_DRV_LOG(ERR, "No VF.");
+               return -EINVAL;
+       }
+
+       if (on > 1) {
+               PMD_DRV_LOG(ERR, "on is should be 0 or 1.");
+               return -EINVAL;
+       }
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       hw = I40E_PF_TO_HW(pf);
+
+       /**
+        * return -ENODEV if SRIOV not enabled, VF number not configured
+        * or no queue assigned.
+        */
+       if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 ||
+           pf->vf_nb_qps == 0) {
+               PMD_DRV_LOG(ERR, "SRIOV is not enabled or no queue.");
+               return -ENODEV;
+       }
+
+       for (vf_idx = 0; vf_idx < 64 && ret == I40E_SUCCESS; vf_idx++) {
+               if (vf_mask & ((uint64_t)(1ULL << vf_idx))) {
+                       if (on)
+                               ret = i40e_vsi_add_vlan(pf->vfs[vf_idx].vsi,
+                                                       vlan_id);
+                       else
+                               ret = i40e_vsi_delete_vlan(pf->vfs[vf_idx].vsi,
+                                                          vlan_id);
+               }
+       }
+
+       if (ret != I40E_SUCCESS) {
+               ret = -ENOTSUP;
+               PMD_DRV_LOG(ERR, "Failed to set VF VLAN filter, on = %d", on);
+       }
+
+       return ret;
+}
+
+int
+rte_pmd_i40e_get_vf_stats(uint8_t port,
+                         uint16_t vf_id,
+                         struct rte_eth_stats *stats)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+       struct i40e_vsi *vsi;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+       if (vf_id >= pf->vf_num || !pf->vfs) {
+               PMD_DRV_LOG(ERR, "Invalid VF ID.");
+               return -EINVAL;
+       }
+
+       vsi = pf->vfs[vf_id].vsi;
+       if (!vsi) {
+               PMD_DRV_LOG(ERR, "Invalid VSI.");
+               return -EINVAL;
+       }
+
+       i40e_update_vsi_stats(vsi);
+
+       stats->ipackets = vsi->eth_stats.rx_unicast +
+                       vsi->eth_stats.rx_multicast +
+                       vsi->eth_stats.rx_broadcast;
+       stats->opackets = vsi->eth_stats.tx_unicast +
+                       vsi->eth_stats.tx_multicast +
+                       vsi->eth_stats.tx_broadcast;
+       stats->ibytes   = vsi->eth_stats.rx_bytes;
+       stats->obytes   = vsi->eth_stats.tx_bytes;
+       stats->ierrors  = vsi->eth_stats.rx_discards;
+       stats->oerrors  = vsi->eth_stats.tx_errors + vsi->eth_stats.tx_discards;
+
+       return 0;
+}
+
+int
+rte_pmd_i40e_reset_vf_stats(uint8_t port,
+                           uint16_t vf_id)
+{
+       struct rte_eth_dev *dev;
+       struct i40e_pf *pf;
+       struct i40e_vsi *vsi;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+
+       if (is_i40e_pmd(dev->data->drv_name))
+               return -ENOTSUP;
+
+       pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+       if (vf_id >= pf->vf_num || !pf->vfs) {
+               PMD_DRV_LOG(ERR, "Invalid VF ID.");
+               return -EINVAL;
+       }
+
+       vsi = pf->vfs[vf_id].vsi;
+       if (!vsi) {
+               PMD_DRV_LOG(ERR, "Invalid VSI.");
+               return -EINVAL;
+       }
+
+       vsi->offset_loaded = false;
+       i40e_update_vsi_stats(vsi);
+
+       return 0;
+}
index 92c8fad..9e2f7a2 100644 (file)
@@ -37,6 +37,8 @@
 #include <rte_eth_ctrl.h>
 #include <rte_time.h>
 #include <rte_kvargs.h>
+#include <rte_hash.h>
+#include <rte_flow_driver.h>
 
 #define I40E_VLAN_TAG_SIZE        4
 
@@ -126,6 +128,7 @@ enum i40e_flxpld_layer_idx {
 #define I40E_FLAG_FDIR                  (1ULL << 6)
 #define I40E_FLAG_VXLAN                 (1ULL << 7)
 #define I40E_FLAG_RSS_AQ_CAPABLE        (1ULL << 8)
+#define I40E_FLAG_VF_MAC_BY_PF          (1ULL << 9)
 #define I40E_FLAG_ALL (I40E_FLAG_RSS | \
                       I40E_FLAG_DCB | \
                       I40E_FLAG_VMDQ | \
@@ -134,7 +137,8 @@ enum i40e_flxpld_layer_idx {
                       I40E_FLAG_HEADER_SPLIT_ENABLED | \
                       I40E_FLAG_FDIR | \
                       I40E_FLAG_VXLAN | \
-                      I40E_FLAG_RSS_AQ_CAPABLE)
+                      I40E_FLAG_RSS_AQ_CAPABLE | \
+                      I40E_FLAG_VF_MAC_BY_PF)
 
 #define I40E_RSS_OFFLOAD_ALL ( \
        ETH_RSS_FRAG_IPV4 | \
@@ -149,6 +153,16 @@ enum i40e_flxpld_layer_idx {
        ETH_RSS_NONFRAG_IPV6_OTHER | \
        ETH_RSS_L2_PAYLOAD)
 
+/* All bits of RSS hash enable for X722*/
+#define I40E_RSS_HENA_ALL_X722 ( \
+       (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \
+       (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \
+       (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | \
+       (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \
+       (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) | \
+       (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK) | \
+       I40E_RSS_HENA_ALL)
+
 /* All bits of RSS hash enable */
 #define I40E_RSS_HENA_ALL ( \
        (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \
@@ -178,6 +192,65 @@ enum i40e_flxpld_layer_idx {
 #define FLOATING_VEB_SUPPORTED_FW_MAJ 5
 #define FLOATING_VEB_SUPPORTED_FW_MIN 0
 
+#define I40E_GL_SWT_L2TAGCTRL(_i)             (0x001C0A70 + ((_i) * 4))
+#define I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT 16
+#define I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_MASK  \
+       I40E_MASK(0xFFFF, I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT)
+
+#define I40E_INSET_NONE            0x00000000000000000ULL
+
+/* bit0 ~ bit 7 */
+#define I40E_INSET_DMAC            0x0000000000000001ULL
+#define I40E_INSET_SMAC            0x0000000000000002ULL
+#define I40E_INSET_VLAN_OUTER      0x0000000000000004ULL
+#define I40E_INSET_VLAN_INNER      0x0000000000000008ULL
+#define I40E_INSET_VLAN_TUNNEL     0x0000000000000010ULL
+
+/* bit 8 ~ bit 15 */
+#define I40E_INSET_IPV4_SRC        0x0000000000000100ULL
+#define I40E_INSET_IPV4_DST        0x0000000000000200ULL
+#define I40E_INSET_IPV6_SRC        0x0000000000000400ULL
+#define I40E_INSET_IPV6_DST        0x0000000000000800ULL
+#define I40E_INSET_SRC_PORT        0x0000000000001000ULL
+#define I40E_INSET_DST_PORT        0x0000000000002000ULL
+#define I40E_INSET_SCTP_VT         0x0000000000004000ULL
+
+/* bit 16 ~ bit 31 */
+#define I40E_INSET_IPV4_TOS        0x0000000000010000ULL
+#define I40E_INSET_IPV4_PROTO      0x0000000000020000ULL
+#define I40E_INSET_IPV4_TTL        0x0000000000040000ULL
+#define I40E_INSET_IPV6_TC         0x0000000000080000ULL
+#define I40E_INSET_IPV6_FLOW       0x0000000000100000ULL
+#define I40E_INSET_IPV6_NEXT_HDR   0x0000000000200000ULL
+#define I40E_INSET_IPV6_HOP_LIMIT  0x0000000000400000ULL
+#define I40E_INSET_TCP_FLAGS       0x0000000000800000ULL
+
+/* bit 32 ~ bit 47, tunnel fields */
+#define I40E_INSET_TUNNEL_IPV4_DST       0x0000000100000000ULL
+#define I40E_INSET_TUNNEL_IPV6_DST       0x0000000200000000ULL
+#define I40E_INSET_TUNNEL_DMAC           0x0000000400000000ULL
+#define I40E_INSET_TUNNEL_SRC_PORT       0x0000000800000000ULL
+#define I40E_INSET_TUNNEL_DST_PORT       0x0000001000000000ULL
+#define I40E_INSET_TUNNEL_ID             0x0000002000000000ULL
+
+/* bit 48 ~ bit 55 */
+#define I40E_INSET_LAST_ETHER_TYPE 0x0001000000000000ULL
+
+/* bit 56 ~ bit 63, Flex Payload */
+#define I40E_INSET_FLEX_PAYLOAD_W1 0x0100000000000000ULL
+#define I40E_INSET_FLEX_PAYLOAD_W2 0x0200000000000000ULL
+#define I40E_INSET_FLEX_PAYLOAD_W3 0x0400000000000000ULL
+#define I40E_INSET_FLEX_PAYLOAD_W4 0x0800000000000000ULL
+#define I40E_INSET_FLEX_PAYLOAD_W5 0x1000000000000000ULL
+#define I40E_INSET_FLEX_PAYLOAD_W6 0x2000000000000000ULL
+#define I40E_INSET_FLEX_PAYLOAD_W7 0x4000000000000000ULL
+#define I40E_INSET_FLEX_PAYLOAD_W8 0x8000000000000000ULL
+#define I40E_INSET_FLEX_PAYLOAD \
+       (I40E_INSET_FLEX_PAYLOAD_W1 | I40E_INSET_FLEX_PAYLOAD_W2 | \
+       I40E_INSET_FLEX_PAYLOAD_W3 | I40E_INSET_FLEX_PAYLOAD_W4 | \
+       I40E_INSET_FLEX_PAYLOAD_W5 | I40E_INSET_FLEX_PAYLOAD_W6 | \
+       I40E_INSET_FLEX_PAYLOAD_W7 | I40E_INSET_FLEX_PAYLOAD_W8)
+
 struct i40e_adapter;
 
 /**
@@ -290,6 +363,7 @@ struct i40e_vsi {
        uint16_t msix_intr; /* The MSIX interrupt binds to VSI */
        uint16_t nb_msix;   /* The max number of msix vector */
        uint8_t enabled_tc; /* The traffic class enabled */
+       uint8_t vlan_anti_spoof_on; /* The VLAN anti-spoofing enabled */
        struct i40e_bw_info bw_info; /* VSI bandwidth information */
 };
 
@@ -366,6 +440,14 @@ struct i40e_fdir_flex_mask {
 };
 
 #define I40E_FILTER_PCTYPE_MAX 64
+#define I40E_MAX_FDIR_FILTER_NUM (1024 * 8)
+
+struct i40e_fdir_filter {
+       TAILQ_ENTRY(i40e_fdir_filter) rules;
+       struct rte_eth_fdir_filter fdir;
+};
+
+TAILQ_HEAD(i40e_fdir_filter_list, i40e_fdir_filter);
 /*
  *  A structure used to define fields of a FDIR related info.
  */
@@ -384,6 +466,60 @@ struct i40e_fdir_info {
         */
        struct i40e_fdir_flex_pit flex_set[I40E_MAX_FLXPLD_LAYER * I40E_MAX_FLXPLD_FIED];
        struct i40e_fdir_flex_mask flex_mask[I40E_FILTER_PCTYPE_MAX];
+
+       struct i40e_fdir_filter_list fdir_list;
+       struct i40e_fdir_filter **hash_map;
+       struct rte_hash *hash_table;
+};
+
+/* Ethertype filter number HW supports */
+#define I40E_MAX_ETHERTYPE_FILTER_NUM 768
+
+/* Ethertype filter struct */
+struct i40e_ethertype_filter_input {
+       struct ether_addr mac_addr;   /* Mac address to match */
+       uint16_t ether_type;          /* Ether type to match */
+};
+
+struct i40e_ethertype_filter {
+       TAILQ_ENTRY(i40e_ethertype_filter) rules;
+       struct i40e_ethertype_filter_input input;
+       uint16_t flags;              /* Flags from RTE_ETHTYPE_FLAGS_* */
+       uint16_t queue;              /* Queue assigned to when match */
+};
+
+TAILQ_HEAD(i40e_ethertype_filter_list, i40e_ethertype_filter);
+
+struct i40e_ethertype_rule {
+       struct i40e_ethertype_filter_list ethertype_list;
+       struct i40e_ethertype_filter  **hash_map;
+       struct rte_hash *hash_table;
+};
+
+/* Tunnel filter number HW supports */
+#define I40E_MAX_TUNNEL_FILTER_NUM 400
+
+/* Tunnel filter struct */
+struct i40e_tunnel_filter_input {
+       uint8_t outer_mac[6];    /* Outer mac address to match */
+       uint8_t inner_mac[6];    /* Inner mac address to match */
+       uint16_t inner_vlan;     /* Inner vlan address to match */
+       uint16_t flags;          /* Filter type flag */
+       uint32_t tenant_id;      /* Tenant id to match */
+};
+
+struct i40e_tunnel_filter {
+       TAILQ_ENTRY(i40e_tunnel_filter) rules;
+       struct i40e_tunnel_filter_input input;
+       uint16_t queue; /* Queue assigned to when match */
+};
+
+TAILQ_HEAD(i40e_tunnel_filter_list, i40e_tunnel_filter);
+
+struct i40e_tunnel_rule {
+       struct i40e_tunnel_filter_list tunnel_list;
+       struct i40e_tunnel_filter  **hash_map;
+       struct rte_hash *hash_table;
 };
 
 #define I40E_MIRROR_MAX_ENTRIES_PER_RULE   64
@@ -407,6 +543,17 @@ struct i40e_mirror_rule {
 
 TAILQ_HEAD(i40e_mirror_rule_list, i40e_mirror_rule);
 
+/*
+ * Struct to store flow created.
+ */
+struct rte_flow {
+       TAILQ_ENTRY(rte_flow) node;
+       enum rte_filter_type filter_type;
+       void *rule;
+};
+
+TAILQ_HEAD(i40e_flow_list, rte_flow);
+
 /*
  * Structure to store private data specific for PF instance.
  */
@@ -456,12 +603,15 @@ struct i40e_pf {
        struct i40e_vmdq_info *vmdq;
 
        struct i40e_fdir_info fdir; /* flow director info */
+       struct i40e_ethertype_rule ethertype; /* Ethertype filter rule */
+       struct i40e_tunnel_rule tunnel; /* Tunnel filter rule */
        struct i40e_fc_conf fc_conf; /* Flow control conf */
        struct i40e_mirror_rule_list mirror_list;
        uint16_t nb_mirror_rule;   /* The number of mirror rules */
        bool floating_veb; /* The flag to use the floating VEB */
        /* The floating enable flag for the specific VF */
        bool floating_veb_list[I40E_MAX_VF];
+       struct i40e_flow_list flow_list;
 };
 
 enum pending_msg {
@@ -517,7 +667,7 @@ struct i40e_vf {
        enum i40e_aq_link_speed link_speed;
        bool vf_reset;
        volatile uint32_t pend_cmd; /* pending command not finished yet */
-       uint32_t cmd_retval; /* return value of the cmd response from PF */
+       int32_t cmd_retval; /* return value of the cmd response from PF */
        u16 pend_msg; /* flags indicates events from pf not handled yet */
        uint8_t *aq_resp; /* buffer to store the adminq response from PF */
 
@@ -554,6 +704,25 @@ struct i40e_adapter {
        struct rte_timecounter tx_tstamp_tc;
 };
 
+extern const struct rte_flow_ops i40e_flow_ops;
+
+union i40e_filter_t {
+       struct rte_eth_ethertype_filter ethertype_filter;
+       struct rte_eth_fdir_filter fdir_filter;
+       struct rte_eth_tunnel_filter_conf tunnel_filter;
+};
+
+typedef int (*parse_filter_t)(struct rte_eth_dev *dev,
+                             const struct rte_flow_attr *attr,
+                             const struct rte_flow_item pattern[],
+                             const struct rte_flow_action actions[],
+                             struct rte_flow_error *error,
+                             union i40e_filter_t *filter);
+struct i40e_valid_pattern {
+       enum rte_flow_item_type *items;
+       parse_filter_t parse_filter;
+};
+
 int i40e_dev_switch_queues(struct i40e_pf *pf, bool on);
 int i40e_vsi_release(struct i40e_vsi *vsi);
 struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf,
@@ -577,7 +746,7 @@ int i40e_vsi_vlan_pvid_set(struct i40e_vsi *vsi,
                           struct i40e_vsi_vlan_pvid_info *info);
 int i40e_vsi_config_vlan_stripping(struct i40e_vsi *vsi, bool on);
 int i40e_vsi_config_vlan_filter(struct i40e_vsi *vsi, bool on);
-uint64_t i40e_config_hena(uint64_t flags);
+uint64_t i40e_config_hena(uint64_t flags, enum i40e_mac_type type);
 uint64_t i40e_parse_hena(uint64_t flags);
 enum i40e_status_code i40e_fdir_setup_tx_resources(struct i40e_pf *pf);
 enum i40e_status_code i40e_fdir_setup_rx_resources(struct i40e_pf *pf);
@@ -595,15 +764,44 @@ int i40e_fdir_ctrl_func(struct rte_eth_dev *dev,
 int i40e_select_filter_input_set(struct i40e_hw *hw,
                                 struct rte_eth_input_set_conf *conf,
                                 enum rte_filter_type filter);
+void i40e_fdir_filter_restore(struct i40e_pf *pf);
 int i40e_hash_filter_inset_select(struct i40e_hw *hw,
                             struct rte_eth_input_set_conf *conf);
 int i40e_fdir_filter_inset_select(struct i40e_pf *pf,
                             struct rte_eth_input_set_conf *conf);
-
+int i40e_pf_host_send_msg_to_vf(struct i40e_pf_vf *vf, uint32_t opcode,
+                               uint32_t retval, uint8_t *msg,
+                               uint16_t msglen);
 void i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
        struct rte_eth_rxq_info *qinfo);
 void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
        struct rte_eth_txq_info *qinfo);
+struct i40e_ethertype_filter *
+i40e_sw_ethertype_filter_lookup(struct i40e_ethertype_rule *ethertype_rule,
+                       const struct i40e_ethertype_filter_input *input);
+int i40e_sw_ethertype_filter_del(struct i40e_pf *pf,
+                                struct i40e_ethertype_filter_input *input);
+int i40e_sw_fdir_filter_del(struct i40e_pf *pf,
+                           struct rte_eth_fdir_input *input);
+struct i40e_tunnel_filter *
+i40e_sw_tunnel_filter_lookup(struct i40e_tunnel_rule *tunnel_rule,
+                            const struct i40e_tunnel_filter_input *input);
+int i40e_sw_tunnel_filter_del(struct i40e_pf *pf,
+                             struct i40e_tunnel_filter_input *input);
+uint64_t i40e_get_default_input_set(uint16_t pctype);
+int i40e_ethertype_filter_set(struct i40e_pf *pf,
+                             struct rte_eth_ethertype_filter *filter,
+                             bool add);
+int i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
+                            const struct rte_eth_fdir_filter *filter,
+                            bool add);
+int i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
+                              struct rte_eth_tunnel_filter_conf *tunnel_filter,
+                              uint8_t add);
+int i40e_fdir_flush(struct rte_eth_dev *dev);
+
+#define I40E_DEV_TO_PCI(eth_dev) \
+       RTE_DEV_TO_PCI((eth_dev)->device)
 
 /* I40E_DEV_PRIVATE_TO */
 #define I40E_DEV_PRIVATE_TO_PF(adapter) \
@@ -699,6 +897,25 @@ i40e_calc_itr_interval(int16_t interval)
        (flow_type) == RTE_ETH_FLOW_NONFRAG_IPV6_OTHER || \
        (flow_type) == RTE_ETH_FLOW_L2_PAYLOAD)
 
+#define I40E_VALID_PCTYPE_X722(pctype) \
+       ((pctype) == I40E_FILTER_PCTYPE_FRAG_IPV4 || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_TCP || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_UDP || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_SCTP || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_OTHER || \
+       (pctype) == I40E_FILTER_PCTYPE_FRAG_IPV6 || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_UDP || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_TCP || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_SCTP || \
+       (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_OTHER || \
+       (pctype) == I40E_FILTER_PCTYPE_L2_PAYLOAD)
+
 #define I40E_VALID_PCTYPE(pctype) \
        ((pctype) == I40E_FILTER_PCTYPE_FRAG_IPV4 || \
        (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_TCP || \
@@ -712,4 +929,18 @@ i40e_calc_itr_interval(int16_t interval)
        (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_OTHER || \
        (pctype) == I40E_FILTER_PCTYPE_L2_PAYLOAD)
 
+#define I40E_PHY_TYPE_SUPPORT_40G(phy_type) \
+       (((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_KR4) || \
+       ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU) || \
+       ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_AOC) || \
+       ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_CR4) || \
+       ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_SR4) || \
+       ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_LR4))
+
+#define I40E_PHY_TYPE_SUPPORT_25G(phy_type) \
+       (((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_KR) || \
+       ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_CR) || \
+       ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_SR) || \
+       ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_LR))
+
 #endif /* _I40E_ETHDEV_H_ */
index a616ae0..a606aef 100644 (file)
@@ -126,8 +126,6 @@ static void i40evf_dev_promiscuous_enable(struct rte_eth_dev *dev);
 static void i40evf_dev_promiscuous_disable(struct rte_eth_dev *dev);
 static void i40evf_dev_allmulticast_enable(struct rte_eth_dev *dev);
 static void i40evf_dev_allmulticast_disable(struct rte_eth_dev *dev);
-static int i40evf_get_link_status(struct rte_eth_dev *dev,
-                                 struct rte_eth_link *link);
 static int i40evf_init_vlan(struct rte_eth_dev *dev);
 static int i40evf_dev_rx_queue_start(struct rte_eth_dev *dev,
                                     uint16_t rx_queue_id);
@@ -153,6 +151,9 @@ static int i40evf_dev_rss_hash_update(struct rte_eth_dev *dev,
                                      struct rte_eth_rss_conf *rss_conf);
 static int i40evf_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
                                        struct rte_eth_rss_conf *rss_conf);
+static int i40evf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
+static void i40evf_set_default_mac_addr(struct rte_eth_dev *dev,
+                                       struct ether_addr *mac_addr);
 static int
 i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
 static int
@@ -178,11 +179,11 @@ static const struct rte_i40evf_xstats_name_off rte_i40evf_stats_strings[] = {
        {"rx_unknown_protocol_packets", offsetof(struct i40e_eth_stats,
                rx_unknown_protocol)},
        {"tx_bytes", offsetof(struct i40e_eth_stats, tx_bytes)},
-       {"tx_unicast_packets", offsetof(struct i40e_eth_stats, tx_bytes)},
-       {"tx_multicast_packets", offsetof(struct i40e_eth_stats, tx_bytes)},
-       {"tx_broadcast_packets", offsetof(struct i40e_eth_stats, tx_bytes)},
-       {"tx_dropped_packets", offsetof(struct i40e_eth_stats, tx_bytes)},
-       {"tx_error_packets", offsetof(struct i40e_eth_stats, tx_bytes)},
+       {"tx_unicast_packets", offsetof(struct i40e_eth_stats, tx_unicast)},
+       {"tx_multicast_packets", offsetof(struct i40e_eth_stats, tx_multicast)},
+       {"tx_broadcast_packets", offsetof(struct i40e_eth_stats, tx_broadcast)},
+       {"tx_dropped_packets", offsetof(struct i40e_eth_stats, tx_discards)},
+       {"tx_error_packets", offsetof(struct i40e_eth_stats, tx_errors)},
 };
 
 #define I40EVF_NB_XSTATS (sizeof(rte_i40evf_stats_strings) / \
@@ -227,6 +228,8 @@ static const struct eth_dev_ops i40evf_eth_dev_ops = {
        .reta_query           = i40evf_dev_rss_reta_query,
        .rss_hash_update      = i40evf_dev_rss_hash_update,
        .rss_hash_conf_get    = i40evf_dev_rss_hash_conf_get,
+       .mtu_set              = i40evf_dev_mtu_set,
+       .mac_addr_set         = i40evf_set_default_mac_addr,
 };
 
 /*
@@ -363,6 +366,7 @@ i40evf_execute_vf_cmd(struct rte_eth_dev *dev, struct vf_cmd_info *args)
                err = -1;
                do {
                        ret = i40evf_read_pfmsg(dev, &info);
+                       vf->cmd_retval = info.result;
                        if (ret == I40EVF_MSG_CMD) {
                                err = 0;
                                break;
@@ -641,7 +645,7 @@ i40evf_configure_vsi_queues(struct rte_eth_dev *dev)
        ret = i40evf_execute_vf_cmd(dev, &args);
        if (ret)
                PMD_DRV_LOG(ERR, "Failed to execute command of "
-                       "I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES\n");
+                       "I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES");
 
        return ret;
 }
@@ -694,7 +698,7 @@ i40evf_configure_vsi_queues_ext(struct rte_eth_dev *dev)
        ret = i40evf_execute_vf_cmd(dev, &args);
        if (ret)
                PMD_DRV_LOG(ERR, "Failed to execute command of "
-                       "I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT\n");
+                       "I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT");
 
        return ret;
 }
@@ -720,7 +724,8 @@ i40evf_config_irq_map(struct rte_eth_dev *dev)
        uint8_t cmd_buffer[sizeof(struct i40e_virtchnl_irq_map_info) + \
                sizeof(struct i40e_virtchnl_vector_map)];
        struct i40e_virtchnl_irq_map_info *map_info;
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        uint32_t vector_id;
        int i, err;
 
@@ -888,19 +893,16 @@ i40evf_add_mac_addr(struct rte_eth_dev *dev,
 }
 
 static void
-i40evf_del_mac_addr(struct rte_eth_dev *dev, uint32_t index)
+i40evf_del_mac_addr_by_addr(struct rte_eth_dev *dev,
+                           struct ether_addr *addr)
 {
        struct i40e_virtchnl_ether_addr_list *list;
        struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-       struct rte_eth_dev_data *data = dev->data;
-       struct ether_addr *addr;
        uint8_t cmd_buffer[sizeof(struct i40e_virtchnl_ether_addr_list) + \
                        sizeof(struct i40e_virtchnl_ether_addr)];
        int err;
        struct vf_cmd_info args;
 
-       addr = &(data->mac_addrs[index]);
-
        if (i40e_validate_mac_addr(addr->addr_bytes) != I40E_SUCCESS) {
                PMD_DRV_LOG(ERR, "Invalid mac:%x-%x-%x-%x-%x-%x",
                            addr->addr_bytes[0], addr->addr_bytes[1],
@@ -927,6 +929,17 @@ i40evf_del_mac_addr(struct rte_eth_dev *dev, uint32_t index)
        return;
 }
 
+static void
+i40evf_del_mac_addr(struct rte_eth_dev *dev, uint32_t index)
+{
+       struct rte_eth_dev_data *data = dev->data;
+       struct ether_addr *addr;
+
+       addr = &data->mac_addrs[index];
+
+       i40evf_del_mac_addr_by_addr(dev, addr);
+}
+
 static int
 i40evf_update_stats(struct rte_eth_dev *dev, struct i40e_eth_stats **pstats)
 {
@@ -954,7 +967,7 @@ i40evf_update_stats(struct rte_eth_dev *dev, struct i40e_eth_stats **pstats)
 }
 
 static int
-i40evf_get_statics(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+i40evf_get_statistics(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
        int ret;
        struct i40e_eth_stats *pstats = NULL;
@@ -1084,37 +1097,11 @@ i40evf_del_vlan(struct rte_eth_dev *dev, uint16_t vlanid)
        return err;
 }
 
-static int
-i40evf_get_link_status(struct rte_eth_dev *dev, struct rte_eth_link *link)
-{
-       struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-       int err;
-       struct vf_cmd_info args;
-       struct rte_eth_link *new_link;
-
-       args.ops = (enum i40e_virtchnl_ops)I40E_VIRTCHNL_OP_GET_LINK_STAT;
-       args.in_args = NULL;
-       args.in_args_size = 0;
-       args.out_buffer = vf->aq_resp;
-       args.out_size = I40E_AQ_BUF_SZ;
-       err = i40evf_execute_vf_cmd(dev, &args);
-       if (err) {
-               PMD_DRV_LOG(ERR, "fail to execute command OP_GET_LINK_STAT");
-               return err;
-       }
-
-       new_link = (struct rte_eth_link *)args.out_buffer;
-       (void)rte_memcpy(link, new_link, sizeof(*link));
-
-       return 0;
-}
-
 static const struct rte_pci_id pci_id_i40evf_map[] = {
        { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_VF) },
        { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_VF_HV) },
        { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_A0_VF) },
        { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_VF) },
-       { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_VF_HV) },
        { .vendor_id = 0, /* sentinel */ },
 };
 
@@ -1208,7 +1195,6 @@ i40evf_init_vf(struct rte_eth_dev *dev)
        int i, err, bufsz;
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-       struct ether_addr *p_mac_addr;
        uint16_t interval =
                i40e_calc_itr_interval(I40E_QUEUE_ITR_INTERVAL_MAX);
 
@@ -1285,9 +1271,8 @@ i40evf_init_vf(struct rte_eth_dev *dev)
        vf->vsi.adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 
        /* Store the MAC address configured by host, or generate random one */
-       p_mac_addr = (struct ether_addr *)(vf->vsi_res->default_mac_addr);
-       if (is_valid_assigned_ether_addr(p_mac_addr)) /* Configured by host */
-               ether_addr_copy(p_mac_addr, (struct ether_addr *)hw->mac.addr);
+       if (is_valid_assigned_ether_addr((struct ether_addr *)hw->mac.addr))
+               vf->flags |= I40E_FLAG_VF_MAC_BY_PF;
        else
                eth_random_addr(hw->mac.addr); /* Generate a random one */
 
@@ -1340,16 +1325,16 @@ i40evf_handle_pf_event(__rte_unused struct rte_eth_dev *dev,
 
        switch (pf_msg->event) {
        case I40E_VIRTCHNL_EVENT_RESET_IMPENDING:
-               PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_RESET_IMPENDING event\n");
-               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET);
+               PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_RESET_IMPENDING event");
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, NULL);
                break;
        case I40E_VIRTCHNL_EVENT_LINK_CHANGE:
-               PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event\n");
+               PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event");
                vf->link_up = pf_msg->event_data.link_event.link_status;
                vf->link_speed = pf_msg->event_data.link_event.link_speed;
                break;
        case I40E_VIRTCHNL_EVENT_PF_DRIVER_CLOSE:
-               PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_PF_DRIVER_CLOSE event\n");
+               PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_PF_DRIVER_CLOSE event");
                break;
        default:
                PMD_DRV_LOG(ERR, " unknown event received %u", pf_msg->event);
@@ -1363,8 +1348,9 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct i40e_arq_event_info info;
-       struct i40e_virtchnl_msg *v_msg;
-       uint16_t pending, opcode;
+       uint16_t pending, aq_opc;
+       enum i40e_virtchnl_ops msg_opc;
+       enum i40e_status_code msg_ret;
        int ret;
 
        info.buf_len = I40E_AQ_BUF_SZ;
@@ -1373,7 +1359,6 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
                return;
        }
        info.msg_buf = vf->aq_resp;
-       v_msg = (struct i40e_virtchnl_msg *)&info.desc;
 
        pending = 1;
        while (pending) {
@@ -1384,32 +1369,39 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
                                    "ret: %d", ret);
                        break;
                }
-               opcode = rte_le_to_cpu_16(info.desc.opcode);
-
-               switch (opcode) {
+               aq_opc = rte_le_to_cpu_16(info.desc.opcode);
+               /* For the message sent from pf to vf, opcode is stored in
+                * cookie_high of struct i40e_aq_desc, while return error code
+                * are stored in cookie_low, Which is done by
+                * i40e_aq_send_msg_to_vf in PF driver.*/
+               msg_opc = (enum i40e_virtchnl_ops)rte_le_to_cpu_32(
+                                                 info.desc.cookie_high);
+               msg_ret = (enum i40e_status_code)rte_le_to_cpu_32(
+                                                 info.desc.cookie_low);
+               switch (aq_opc) {
                case i40e_aqc_opc_send_msg_to_vf:
-                       if (v_msg->v_opcode == I40E_VIRTCHNL_OP_EVENT)
+                       if (msg_opc == I40E_VIRTCHNL_OP_EVENT)
                                /* process event*/
                                i40evf_handle_pf_event(dev, info.msg_buf,
                                                       info.msg_len);
                        else {
                                /* read message and it's expected one */
-                               if (v_msg->v_opcode == vf->pend_cmd) {
-                                       vf->cmd_retval = v_msg->v_retval;
+                               if (msg_opc == vf->pend_cmd) {
+                                       vf->cmd_retval = msg_ret;
                                        /* prevent compiler reordering */
                                        rte_compiler_barrier();
                                        _clear_cmd(vf);
                                } else
                                        PMD_DRV_LOG(ERR, "command mismatch,"
                                                "expect %u, get %u",
-                                               vf->pend_cmd, v_msg->v_opcode);
+                                               vf->pend_cmd, msg_opc);
                                PMD_DRV_LOG(DEBUG, "adminq response is received,"
-                                            " opcode = %d\n", v_msg->v_opcode);
+                                            " opcode = %d", msg_opc);
                        }
                        break;
                default:
                        PMD_DRV_LOG(ERR, "Request %u is not supported yet",
-                                   opcode);
+                                   aq_opc);
                        break;
                }
        }
@@ -1428,7 +1420,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
  *  void
  */
 static void
-i40evf_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
+i40evf_dev_interrupt_handler(struct rte_intr_handle *intr_handle,
                             void *param)
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
@@ -1442,31 +1434,31 @@ i40evf_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
 
        /* No interrupt event indicated */
        if (!(icr0 & I40E_VFINT_ICR01_INTEVENT_MASK)) {
-               PMD_DRV_LOG(DEBUG, "No interrupt event, nothing to do\n");
+               PMD_DRV_LOG(DEBUG, "No interrupt event, nothing to do");
                goto done;
        }
 
        if (icr0 & I40E_VFINT_ICR01_ADMINQ_MASK) {
-               PMD_DRV_LOG(DEBUG, "ICR01_ADMINQ is reported\n");
+               PMD_DRV_LOG(DEBUG, "ICR01_ADMINQ is reported");
                i40evf_handle_aq_msg(dev);
        }
 
        /* Link Status Change interrupt */
        if (icr0 & I40E_VFINT_ICR01_LINK_STAT_CHANGE_MASK)
                PMD_DRV_LOG(DEBUG, "LINK_STAT_CHANGE is reported,"
-                                  " do nothing\n");
+                                  " do nothing");
 
 done:
        i40evf_enable_irq0(hw);
-       rte_intr_enable(&dev->pci_dev->intr_handle);
+       rte_intr_enable(intr_handle);
 }
 
 static int
 i40evf_dev_init(struct rte_eth_dev *eth_dev)
 {
-       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(\
-                       eth_dev->data->dev_private);
-       struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+       struct i40e_hw *hw
+               = I40E_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(eth_dev);
 
        PMD_INIT_FUNC_TRACE();
 
@@ -1485,15 +1477,16 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
                return 0;
        }
 
-       rte_eth_copy_pci_info(eth_dev, eth_dev->pci_dev);
+       rte_eth_copy_pci_info(eth_dev, pci_dev);
+       eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 
-       hw->vendor_id = eth_dev->pci_dev->id.vendor_id;
-       hw->device_id = eth_dev->pci_dev->id.device_id;
-       hw->subsystem_vendor_id = eth_dev->pci_dev->id.subsystem_vendor_id;
-       hw->subsystem_device_id = eth_dev->pci_dev->id.subsystem_device_id;
-       hw->bus.device = eth_dev->pci_dev->addr.devid;
-       hw->bus.func = eth_dev->pci_dev->addr.function;
-       hw->hw_addr = (void *)eth_dev->pci_dev->mem_resource[0].addr;
+       hw->vendor_id = pci_dev->id.vendor_id;
+       hw->device_id = pci_dev->id.device_id;
+       hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
+       hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
+       hw->bus.device = pci_dev->addr.devid;
+       hw->bus.func = pci_dev->addr.function;
+       hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
        hw->adapter_stopped = 0;
 
        if(i40evf_init_vf(eth_dev) != 0) {
@@ -1554,38 +1547,19 @@ i40evf_dev_uninit(struct rte_eth_dev *eth_dev)
  */
 static struct eth_driver rte_i40evf_pmd = {
        .pci_drv = {
-               .name = "rte_i40evf_pmd",
                .id_table = pci_id_i40evf_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = i40evf_dev_init,
        .eth_dev_uninit = i40evf_dev_uninit,
        .dev_private_size = sizeof(struct i40e_adapter),
 };
 
-/*
- * VF Driver initialization routine.
- * Invoked one at EAL init time.
- * Register itself as the [Virtual Poll Mode] Driver of PCI Fortville devices.
- */
-static int
-rte_i40evf_pmd_init(const char *name __rte_unused,
-                   const char *params __rte_unused)
-{
-       PMD_INIT_FUNC_TRACE();
-
-       rte_eth_driver_register(&rte_i40evf_pmd);
-
-       return 0;
-}
-
-static struct rte_driver rte_i40evf_driver = {
-       .type = PMD_PDEV,
-       .init = rte_i40evf_pmd_init,
-};
-
-PMD_REGISTER_DRIVER(rte_i40evf_driver, i40evf);
-DRIVER_REGISTER_PCI_TABLE(i40evf, pci_id_i40evf_map);
+RTE_PMD_REGISTER_PCI(net_i40e_vf, rte_i40evf_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_i40e_vf, pci_id_i40evf_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_i40e_vf, "* igb_uio | vfio");
 
 static int
 i40evf_dev_configure(struct rte_eth_dev *dev)
@@ -1900,7 +1874,8 @@ i40evf_enable_queues_intr(struct rte_eth_dev *dev)
 {
        struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        if (!rte_intr_allow_others(intr_handle)) {
                I40E_WRITE_REG(hw,
@@ -1932,7 +1907,8 @@ i40evf_disable_queues_intr(struct rte_eth_dev *dev)
 {
        struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        if (!rte_intr_allow_others(intr_handle)) {
                I40E_WRITE_REG(hw, I40E_VFINT_DYN_CTL01,
@@ -1958,7 +1934,8 @@ i40evf_disable_queues_intr(struct rte_eth_dev *dev)
 static int
 i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint16_t interval =
                i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL);
@@ -1984,7 +1961,7 @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 
        I40EVF_WRITE_FLUSH(hw);
 
-       rte_intr_enable(&dev->pci_dev->intr_handle);
+       rte_intr_enable(&pci_dev->intr_handle);
 
        return 0;
 }
@@ -1992,7 +1969,8 @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 static int
 i40evf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint16_t msix_intr;
 
@@ -2072,7 +2050,8 @@ i40evf_dev_start(struct rte_eth_dev *dev)
 {
        struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        uint32_t intr_vector = 0;
 
        PMD_INIT_FUNC_TRACE();
@@ -2096,7 +2075,7 @@ i40evf_dev_start(struct rte_eth_dev *dev)
                                    dev->data->nb_rx_queues * sizeof(int), 0);
                if (!intr_handle->intr_vec) {
                        PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
-                                    " intr_vec\n", dev->data->nb_rx_queues);
+                                    " intr_vec", dev->data->nb_rx_queues);
                        return -ENOMEM;
                }
        }
@@ -2137,7 +2116,8 @@ err_queue:
 static void
 i40evf_dev_stop(struct rte_eth_dev *dev)
 {
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -2166,35 +2146,33 @@ i40evf_dev_link_update(struct rte_eth_dev *dev,
         * DPDK pf host provide interfacet to acquire link status
         * while Linux driver does not
         */
-       if (vf->version_major == I40E_DPDK_VERSION_MAJOR)
-               i40evf_get_link_status(dev, &new_link);
-       else {
-               /* Linux driver PF host */
-               switch (vf->link_speed) {
-               case I40E_LINK_SPEED_100MB:
-                       new_link.link_speed = ETH_SPEED_NUM_100M;
-                       break;
-               case I40E_LINK_SPEED_1GB:
-                       new_link.link_speed = ETH_SPEED_NUM_1G;
-                       break;
-               case I40E_LINK_SPEED_10GB:
-                       new_link.link_speed = ETH_SPEED_NUM_10G;
-                       break;
-               case I40E_LINK_SPEED_20GB:
-                       new_link.link_speed = ETH_SPEED_NUM_20G;
-                       break;
-               case I40E_LINK_SPEED_40GB:
-                       new_link.link_speed = ETH_SPEED_NUM_40G;
-                       break;
-               default:
-                       new_link.link_speed = ETH_SPEED_NUM_100M;
-                       break;
-               }
-               /* full duplex only */
-               new_link.link_duplex = ETH_LINK_FULL_DUPLEX;
-               new_link.link_status = vf->link_up ? ETH_LINK_UP :
-                                                    ETH_LINK_DOWN;
+
+       /* Linux driver PF host */
+       switch (vf->link_speed) {
+       case I40E_LINK_SPEED_100MB:
+               new_link.link_speed = ETH_SPEED_NUM_100M;
+               break;
+       case I40E_LINK_SPEED_1GB:
+               new_link.link_speed = ETH_SPEED_NUM_1G;
+               break;
+       case I40E_LINK_SPEED_10GB:
+               new_link.link_speed = ETH_SPEED_NUM_10G;
+               break;
+       case I40E_LINK_SPEED_20GB:
+               new_link.link_speed = ETH_SPEED_NUM_20G;
+               break;
+       case I40E_LINK_SPEED_40GB:
+               new_link.link_speed = ETH_SPEED_NUM_40G;
+               break;
+       default:
+               new_link.link_speed = ETH_SPEED_NUM_100M;
+               break;
        }
+       /* full duplex only */
+       new_link.link_duplex = ETH_LINK_FULL_DUPLEX;
+       new_link.link_status = vf->link_up ? ETH_LINK_UP :
+                                            ETH_LINK_DOWN;
+
        i40evf_dev_atomic_write_link_status(dev, &new_link);
 
        return 0;
@@ -2266,6 +2244,7 @@ i40evf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 
        memset(dev_info, 0, sizeof(*dev_info));
+       dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device);
        dev_info->max_rx_queues = vf->vsi_res->num_queue_pairs;
        dev_info->max_tx_queues = vf->vsi_res->num_queue_pairs;
        dev_info->min_rx_bufsize = I40E_BUF_SIZE_MIN;
@@ -2326,15 +2305,16 @@ i40evf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 static void
 i40evf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-       if (i40evf_get_statics(dev, stats))
-               PMD_DRV_LOG(ERR, "Get statics failed");
+       if (i40evf_get_statistics(dev, stats))
+               PMD_DRV_LOG(ERR, "Get statistics failed");
 }
 
 static void
 i40evf_dev_close(struct rte_eth_dev *dev)
 {
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct rte_pci_device *pci_dev = dev->pci_dev;
+       struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        i40evf_dev_stop(dev);
        hw->adapter_stopped = 1;
@@ -2342,11 +2322,11 @@ i40evf_dev_close(struct rte_eth_dev *dev)
        i40evf_reset_vf(hw);
        i40e_shutdown_adminq(hw);
        /* disable uio intr before callback unregister */
-       rte_intr_disable(&pci_dev->intr_handle);
+       rte_intr_disable(intr_handle);
 
        /* unregister callback func from eal lib */
-       rte_intr_callback_unregister(&pci_dev->intr_handle,
-               i40evf_dev_interrupt_handler, (void *)dev);
+       rte_intr_callback_unregister(intr_handle,
+                                    i40evf_dev_interrupt_handler, dev);
        i40evf_disable_irq0(hw);
 }
 
@@ -2423,7 +2403,7 @@ i40evf_dev_rss_reta_update(struct rte_eth_dev *dev,
        if (reta_size != ETH_RSS_RETA_SIZE_64) {
                PMD_DRV_LOG(ERR, "The size of hash lookup table configured "
                        "(%d) doesn't match the number of hardware can "
-                       "support (%d)\n", reta_size, ETH_RSS_RETA_SIZE_64);
+                       "support (%d)", reta_size, ETH_RSS_RETA_SIZE_64);
                return -EINVAL;
        }
 
@@ -2462,7 +2442,7 @@ i40evf_dev_rss_reta_query(struct rte_eth_dev *dev,
        if (reta_size != ETH_RSS_RETA_SIZE_64) {
                PMD_DRV_LOG(ERR, "The size of hash lookup table configured "
                        "(%d) doesn't match the number of hardware can "
-                       "support (%d)\n", reta_size, ETH_RSS_RETA_SIZE_64);
+                       "support (%d)", reta_size, ETH_RSS_RETA_SIZE_64);
                return -EINVAL;
        }
 
@@ -2568,8 +2548,11 @@ i40evf_hw_rss_hash_set(struct i40e_vf *vf, struct rte_eth_rss_conf *rss_conf)
        rss_hf = rss_conf->rss_hf;
        hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0));
        hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32;
-       hena &= ~I40E_RSS_HENA_ALL;
-       hena |= i40e_config_hena(rss_hf);
+       if (hw->mac.type == I40E_MAC_X722)
+               hena &= ~I40E_RSS_HENA_ALL_X722;
+       else
+               hena &= ~I40E_RSS_HENA_ALL;
+       hena |= i40e_config_hena(rss_hf, hw->mac.type);
        i40e_write_rx_ctl(hw, I40E_VFQF_HENA(0), (uint32_t)hena);
        i40e_write_rx_ctl(hw, I40E_VFQF_HENA(1), (uint32_t)(hena >> 32));
        I40EVF_WRITE_FLUSH(hw);
@@ -2585,7 +2568,10 @@ i40evf_disable_rss(struct i40e_vf *vf)
 
        hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0));
        hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32;
-       hena &= ~I40E_RSS_HENA_ALL;
+       if (hw->mac.type == I40E_MAC_X722)
+               hena &= ~I40E_RSS_HENA_ALL_X722;
+       else
+               hena &= ~I40E_RSS_HENA_ALL;
        i40e_write_rx_ctl(hw, I40E_VFQF_HENA(0), (uint32_t)hena);
        i40e_write_rx_ctl(hw, I40E_VFQF_HENA(1), (uint32_t)(hena >> 32));
        I40EVF_WRITE_FLUSH(hw);
@@ -2601,7 +2587,7 @@ i40evf_config_rss(struct i40e_vf *vf)
 
        if (vf->dev_data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_RSS) {
                i40evf_disable_rss(vf);
-               PMD_DRV_LOG(DEBUG, "RSS not configured\n");
+               PMD_DRV_LOG(DEBUG, "RSS not configured");
                return 0;
        }
 
@@ -2618,7 +2604,7 @@ i40evf_config_rss(struct i40e_vf *vf)
        rss_conf = vf->dev_data->dev_conf.rx_adv_conf.rss_conf;
        if ((rss_conf.rss_hf & I40E_RSS_OFFLOAD_ALL) == 0) {
                i40evf_disable_rss(vf);
-               PMD_DRV_LOG(DEBUG, "No hash flag is set\n");
+               PMD_DRV_LOG(DEBUG, "No hash flag is set");
                return 0;
        }
 
@@ -2646,7 +2632,9 @@ i40evf_dev_rss_hash_update(struct rte_eth_dev *dev,
 
        hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0));
        hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32;
-       if (!(hena & I40E_RSS_HENA_ALL)) { /* RSS disabled */
+       if (!(hena & ((hw->mac.type == I40E_MAC_X722)
+                ? I40E_RSS_HENA_ALL_X722
+                : I40E_RSS_HENA_ALL))) { /* RSS disabled */
                if (rss_hf != 0) /* Enable RSS */
                        return -EINVAL;
                return 0;
@@ -2676,3 +2664,55 @@ i40evf_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
 
        return 0;
 }
+
+static int
+i40evf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+       struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+       struct rte_eth_dev_data *dev_data = vf->dev_data;
+       uint32_t frame_size = mtu + ETHER_HDR_LEN
+                             + ETHER_CRC_LEN + I40E_VLAN_TAG_SIZE;
+       int ret = 0;
+
+       /* check if mtu is within the allowed range */
+       if ((mtu < ETHER_MIN_MTU) || (frame_size > I40E_FRAME_SIZE_MAX))
+               return -EINVAL;
+
+       /* mtu setting is forbidden if port is start */
+       if (dev_data->dev_started) {
+               PMD_DRV_LOG(ERR, "port %d must be stopped before configuration",
+                           dev_data->port_id);
+               return -EBUSY;
+       }
+
+       if (frame_size > ETHER_MAX_LEN)
+               dev_data->dev_conf.rxmode.jumbo_frame = 1;
+       else
+               dev_data->dev_conf.rxmode.jumbo_frame = 0;
+
+       dev_data->dev_conf.rxmode.max_rx_pkt_len = frame_size;
+
+       return ret;
+}
+
+static void
+i40evf_set_default_mac_addr(struct rte_eth_dev *dev,
+                           struct ether_addr *mac_addr)
+{
+       struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+       if (!is_valid_assigned_ether_addr(mac_addr)) {
+               PMD_DRV_LOG(ERR, "Tried to set invalid MAC address.");
+               return;
+       }
+
+       if (is_same_ether_addr(mac_addr, dev->data->mac_addrs))
+               return;
+
+       if (vf->flags & I40E_FLAG_VF_MAC_BY_PF)
+               return;
+
+       i40evf_del_mac_addr_by_addr(dev, dev->data->mac_addrs);
+
+       i40evf_add_mac_addr(dev, mac_addr, 0, 0);
+}
index 33cb6da..0700253 100644 (file)
 #define I40E_FDIR_UDP_DEFAULT_LEN           400
 
 /* Wait count and interval for fdir filter programming */
-#define TREX_PATCH
-// TREX_PATCH - Values were 10 and 1000. These numbers give much better performance when
-// configuring large amount of rules
-#define I40E_FDIR_WAIT_COUNT       100
-#define I40E_FDIR_WAIT_INTERVAL_US 100
+#define I40E_FDIR_WAIT_COUNT       10
+#define I40E_FDIR_WAIT_INTERVAL_US 1000
 
 /* Wait count and interval for fdir filter flush */
 #define I40E_FDIR_FLUSH_RETRY       50
@@ -122,7 +119,13 @@ static int i40e_fdir_filter_programming(struct i40e_pf *pf,
                        enum i40e_filter_pctype pctype,
                        const struct rte_eth_fdir_filter *filter,
                        bool add);
-static int i40e_fdir_flush(struct rte_eth_dev *dev);
+static int i40e_fdir_filter_convert(const struct rte_eth_fdir_filter *input,
+                        struct i40e_fdir_filter *filter);
+static struct i40e_fdir_filter *
+i40e_sw_fdir_filter_lookup(struct i40e_fdir_info *fdir_info,
+                       const struct rte_eth_fdir_input *input);
+static int i40e_sw_fdir_filter_insert(struct i40e_pf *pf,
+                                  struct i40e_fdir_filter *filter);
 
 static int
 i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
@@ -254,7 +257,7 @@ i40e_fdir_setup(struct i40e_pf *pf)
 
        /* reserve memory for the fdir programming packet */
        snprintf(z_name, sizeof(z_name), "%s_%s_%d",
-                       eth_dev->driver->pci_drv.name,
+                       eth_dev->driver->pci_drv.driver.name,
                        I40E_FDIR_MZ_NAME,
                        eth_dev->data->port_id);
        mz = i40e_memzone_reserve(z_name, I40E_FDIR_PKT_LEN, SOCKET_ID_ANY);
@@ -356,8 +359,15 @@ i40e_init_flx_pld(struct i40e_pf *pf)
        /* initialize the masks */
        for (pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
             pctype <= I40E_FILTER_PCTYPE_L2_PAYLOAD; pctype++) {
-               if (!I40E_VALID_PCTYPE((enum i40e_filter_pctype)pctype))
-                       continue;
+               if (hw->mac.type == I40E_MAC_X722) {
+                       if (!I40E_VALID_PCTYPE_X722(
+                                (enum i40e_filter_pctype)pctype))
+                               continue;
+               } else {
+                       if (!I40E_VALID_PCTYPE(
+                                (enum i40e_filter_pctype)pctype))
+                               continue;
+               }
                pf->fdir.flex_mask[pctype].word_mask = 0;
                i40e_write_rx_ctl(hw, I40E_PRTQF_FD_FLXINSET(pctype), 0);
                for (i = 0; i < I40E_FDIR_BITMASK_NUM_WORD; i++) {
@@ -667,7 +677,16 @@ i40e_fdir_configure(struct rte_eth_dev *dev)
                i40e_set_flx_pld_cfg(pf, &conf->flex_set[i]);
        /* configure flex mask*/
        for (i = 0; i < conf->nb_flexmasks; i++) {
-               pctype = i40e_flowtype_to_pctype(conf->flex_mask[i].flow_type);
+               if (hw->mac.type == I40E_MAC_X722) {
+                       /* get translated pctype value in fd pctype register */
+                       pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl(
+                               hw, I40E_GLQF_FD_PCTYPES(
+                               (int)i40e_flowtype_to_pctype(
+                               conf->flex_mask[i].flow_type)));
+               } else
+                       pctype = i40e_flowtype_to_pctype(
+                               conf->flex_mask[i].flow_type);
+
                i40e_set_flex_mask_on_pctype(pf, pctype, &conf->flex_mask[i]);
        }
 
@@ -732,9 +751,6 @@ i40e_fdir_fill_eth_ip_head(const struct rte_eth_fdir_input *fdir_input,
                                        fdir_input->flow.ip4_flow.ttl :
                                        I40E_FDIR_IP_DEFAULT_TTL;
                ip->type_of_service = fdir_input->flow.ip4_flow.tos;
-#ifdef TREX_PATCH
-        ip->packet_id = rte_cpu_to_be_16(fdir_input->flow.ip4_flow.ip_id);
-#endif
                /*
                 * The source and destination fields in the transmitted packet
                 * need to be presented in a reversed order with respect
@@ -755,11 +771,7 @@ i40e_fdir_fill_eth_ip_head(const struct rte_eth_fdir_input *fdir_input,
                ip6->vtc_flow =
                        rte_cpu_to_be_32(I40E_FDIR_IPv6_DEFAULT_VTC_FLOW |
                                         (fdir_input->flow.ipv6_flow.tc <<
-                                         I40E_FDIR_IPv6_TC_OFFSET)
-#ifdef TREX_PATCH
-                             | (fdir_input->flow.ipv6_flow.flow_label & 0x000fffff)
-#endif
-                             );
+                                         I40E_FDIR_IPv6_TC_OFFSET));
                ip6->payload_len =
                        rte_cpu_to_be_16(I40E_FDIR_IPv6_PAYLOAD_LEN);
                ip6->proto = fdir_input->flow.ipv6_flow.proto ?
@@ -1011,20 +1023,92 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
        return ret;
 }
 
+static int
+i40e_fdir_filter_convert(const struct rte_eth_fdir_filter *input,
+                        struct i40e_fdir_filter *filter)
+{
+       rte_memcpy(&filter->fdir, input, sizeof(struct rte_eth_fdir_filter));
+       return 0;
+}
+
+/* Check if there exists the flow director filter */
+static struct i40e_fdir_filter *
+i40e_sw_fdir_filter_lookup(struct i40e_fdir_info *fdir_info,
+                       const struct rte_eth_fdir_input *input)
+{
+       int ret;
+
+       ret = rte_hash_lookup(fdir_info->hash_table, (const void *)input);
+       if (ret < 0)
+               return NULL;
+
+       return fdir_info->hash_map[ret];
+}
+
+/* Add a flow director filter into the SW list */
+static int
+i40e_sw_fdir_filter_insert(struct i40e_pf *pf, struct i40e_fdir_filter *filter)
+{
+       struct i40e_fdir_info *fdir_info = &pf->fdir;
+       int ret;
+
+       ret = rte_hash_add_key(fdir_info->hash_table,
+                              &filter->fdir.input);
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR,
+                           "Failed to insert fdir filter to hash table %d!",
+                           ret);
+               return ret;
+       }
+       fdir_info->hash_map[ret] = filter;
+
+       TAILQ_INSERT_TAIL(&fdir_info->fdir_list, filter, rules);
+
+       return 0;
+}
+
+/* Delete a flow director filter from the SW list */
+int
+i40e_sw_fdir_filter_del(struct i40e_pf *pf, struct rte_eth_fdir_input *input)
+{
+       struct i40e_fdir_info *fdir_info = &pf->fdir;
+       struct i40e_fdir_filter *filter;
+       int ret;
+
+       ret = rte_hash_del_key(fdir_info->hash_table, input);
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR,
+                           "Failed to delete fdir filter to hash table %d!",
+                           ret);
+               return ret;
+       }
+       filter = fdir_info->hash_map[ret];
+       fdir_info->hash_map[ret] = NULL;
+
+       TAILQ_REMOVE(&fdir_info->fdir_list, filter, rules);
+       rte_free(filter);
+
+       return 0;
+}
+
 /*
  * i40e_add_del_fdir_filter - add or remove a flow director filter.
  * @pf: board private structure
  * @filter: fdir filter entry
  * @add: 0 - delete, 1 - add
  */
-static int
+int
 i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
                            const struct rte_eth_fdir_filter *filter,
                            bool add)
 {
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        unsigned char *pkt = (unsigned char *)pf->fdir.prg_pkt;
        enum i40e_filter_pctype pctype;
+       struct i40e_fdir_info *fdir_info = &pf->fdir;
+       struct i40e_fdir_filter *fdir_filter, *node;
+       struct i40e_fdir_filter check_filter; /* Check if the filter exists */
        int ret = 0;
 
        if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_PERFECT) {
@@ -1047,6 +1131,22 @@ i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
                return -EINVAL;
        }
 
+       /* Check if there is the filter in SW list */
+       memset(&check_filter, 0, sizeof(check_filter));
+       i40e_fdir_filter_convert(filter, &check_filter);
+       node = i40e_sw_fdir_filter_lookup(fdir_info, &check_filter.fdir.input);
+       if (add && node) {
+               PMD_DRV_LOG(ERR,
+                           "Conflict with existing flow director rules!");
+               return -EINVAL;
+       }
+
+       if (!add && !node) {
+               PMD_DRV_LOG(ERR,
+                           "There's no corresponding flow firector filter!");
+               return -EINVAL;
+       }
+
        memset(pkt, 0, I40E_FDIR_PKT_LEN);
 
        ret = i40e_fdir_construct_pkt(pf, &filter->input, pkt);
@@ -1054,13 +1154,32 @@ i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
                PMD_DRV_LOG(ERR, "construct packet for fdir fails.");
                return ret;
        }
-       pctype = i40e_flowtype_to_pctype(filter->input.flow_type);
+
+       if (hw->mac.type == I40E_MAC_X722) {
+               /* get translated pctype value in fd pctype register */
+               pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl(
+                       hw, I40E_GLQF_FD_PCTYPES(
+                       (int)i40e_flowtype_to_pctype(
+                       filter->input.flow_type)));
+       } else
+               pctype = i40e_flowtype_to_pctype(filter->input.flow_type);
+
        ret = i40e_fdir_filter_programming(pf, pctype, filter, add);
        if (ret < 0) {
                PMD_DRV_LOG(ERR, "fdir programming fails for PCTYPE(%u).",
                            pctype);
                return ret;
        }
+
+       if (add) {
+               fdir_filter = rte_zmalloc("fdir_filter",
+                                         sizeof(*fdir_filter), 0);
+               rte_memcpy(fdir_filter, &check_filter, sizeof(check_filter));
+               ret = i40e_sw_fdir_filter_insert(pf, fdir_filter);
+       } else {
+               ret = i40e_sw_fdir_filter_del(pf, &node->fdir.input);
+       }
+
        return ret;
 }
 
@@ -1153,12 +1272,8 @@ i40e_fdir_filter_programming(struct i40e_pf *pf,
        fdirdp->dtype_cmd_cntindex |=
                        rte_cpu_to_le_32(I40E_TXD_FLTR_QW1_CNT_ENA_MASK);
        fdirdp->dtype_cmd_cntindex |=
-#ifdef TREX_PATCH
-                       rte_cpu_to_le_32((fdir_action->stat_count_index <<
-#else
                        rte_cpu_to_le_32(
                        ((uint32_t)pf->fdir.match_counter_index <<
-#endif
                        I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
                        I40E_TXD_FLTR_QW1_CNTINDEX_MASK);
 
@@ -1182,17 +1297,11 @@ i40e_fdir_filter_programming(struct i40e_pf *pf,
        I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
 
        for (i = 0; i < I40E_FDIR_WAIT_COUNT; i++) {
-#ifndef TREX_PATCH
-        /* itay: moved this delay after the check to avoid first check */
                rte_delay_us(I40E_FDIR_WAIT_INTERVAL_US);
-#endif
                if ((txdp->cmd_type_offset_bsz &
                                rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
                                rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
                        break;
-#ifdef TREX_PATCH
-        rte_delay_us(I40E_FDIR_WAIT_INTERVAL_US);
-#endif
        }
        if (i >= I40E_FDIR_WAIT_COUNT) {
                PMD_DRV_LOG(ERR, "Failed to program FDIR filter:"
@@ -1200,10 +1309,7 @@ i40e_fdir_filter_programming(struct i40e_pf *pf,
                return -ETIMEDOUT;
        }
        /* totally delay 10 ms to check programming status*/
-#ifndef TREX_PATCH
-    /* itay: tests show this is not needed */
        rte_delay_us((I40E_FDIR_WAIT_COUNT - i) * I40E_FDIR_WAIT_INTERVAL_US);
-#endif
        if (i40e_check_fdir_programming_status(rxq) < 0) {
                PMD_DRV_LOG(ERR, "Failed to program FDIR filter:"
                            " programming status reported.");
@@ -1217,7 +1323,7 @@ i40e_fdir_filter_programming(struct i40e_pf *pf,
  * i40e_fdir_flush - clear all filters of Flow Director table
  * @pf: board private structure
  */
-static int
+int
 i40e_fdir_flush(struct rte_eth_dev *dev)
 {
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -1296,6 +1402,7 @@ i40e_fdir_info_get_flex_mask(struct i40e_pf *pf,
 {
        struct i40e_fdir_flex_mask *mask;
        struct rte_eth_fdir_flex_mask *ptr = flex_mask;
+       struct i40e_hw *hw = I40E_PF_TO_HW(pf);
        uint16_t flow_type;
        uint8_t i, j;
        uint16_t off_bytes, mask_tmp;
@@ -1304,8 +1411,13 @@ i40e_fdir_info_get_flex_mask(struct i40e_pf *pf,
             i <= I40E_FILTER_PCTYPE_L2_PAYLOAD;
             i++) {
                mask =  &pf->fdir.flex_mask[i];
-               if (!I40E_VALID_PCTYPE((enum i40e_filter_pctype)i))
-                       continue;
+               if (hw->mac.type == I40E_MAC_X722) {
+                       if (!I40E_VALID_PCTYPE_X722((enum i40e_filter_pctype)i))
+                               continue;
+               } else {
+                       if (!I40E_VALID_PCTYPE((enum i40e_filter_pctype)i))
+                               continue;
+               }
                flow_type = i40e_pctype_to_flowtype((enum i40e_filter_pctype)i);
                for (j = 0; j < I40E_FDIR_MAX_FLEXWORD_NUM; j++) {
                        if (mask->word_mask & I40E_FLEX_WORD_MASK(j)) {
@@ -1472,3 +1584,34 @@ i40e_fdir_ctrl_func(struct rte_eth_dev *dev,
        }
        return ret;
 }
+
+/* Restore flow director filter */
+void
+i40e_fdir_filter_restore(struct i40e_pf *pf)
+{
+       struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(pf->main_vsi);
+       struct i40e_fdir_filter_list *fdir_list = &pf->fdir.fdir_list;
+       struct i40e_fdir_filter *f;
+#ifdef RTE_LIBRTE_I40E_DEBUG_DRIVER
+       struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+       uint32_t fdstat;
+       uint32_t guarant_cnt;  /**< Number of filters in guaranteed spaces. */
+       uint32_t best_cnt;     /**< Number of filters in best effort spaces. */
+#endif /* RTE_LIBRTE_I40E_DEBUG_DRIVER */
+
+       TAILQ_FOREACH(f, fdir_list, rules)
+               i40e_add_del_fdir_filter(dev, &f->fdir, TRUE);
+
+#ifdef RTE_LIBRTE_I40E_DEBUG_DRIVER
+       fdstat = I40E_READ_REG(hw, I40E_PFQF_FDSTAT);
+       guarant_cnt =
+               (uint32_t)((fdstat & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK) >>
+                          I40E_PFQF_FDSTAT_GUARANT_CNT_SHIFT);
+       best_cnt =
+               (uint32_t)((fdstat & I40E_PFQF_FDSTAT_BEST_CNT_MASK) >>
+                          I40E_PFQF_FDSTAT_BEST_CNT_SHIFT);
+#endif /* RTE_LIBRTE_I40E_DEBUG_DRIVER */
+
+       PMD_DRV_LOG(INFO, "FDIR: Guarant count: %d,  Best count: %d",
+                   guarant_cnt, best_cnt);
+}
diff --git a/src/dpdk/drivers/net/i40e/i40e_flow.c b/src/dpdk/drivers/net/i40e/i40e_flow.c
new file mode 100644 (file)
index 0000000..76bb332
--- /dev/null
@@ -0,0 +1,1849 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2016 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_eth_ctrl.h>
+#include <rte_tailq.h>
+#include <rte_flow_driver.h>
+
+#include "i40e_logs.h"
+#include "base/i40e_type.h"
+#include "base/i40e_prototype.h"
+#include "i40e_ethdev.h"
+
+#define I40E_IPV4_TC_SHIFT     4
+#define I40E_IPV6_TC_MASK      (0x00FF << I40E_IPV4_TC_SHIFT)
+#define I40E_IPV6_FRAG_HEADER  44
+#define I40E_TENANT_ARRAY_NUM  3
+#define I40E_TCI_MASK          0xFFFF
+
+static int i40e_flow_validate(struct rte_eth_dev *dev,
+                             const struct rte_flow_attr *attr,
+                             const struct rte_flow_item pattern[],
+                             const struct rte_flow_action actions[],
+                             struct rte_flow_error *error);
+static struct rte_flow *i40e_flow_create(struct rte_eth_dev *dev,
+                                        const struct rte_flow_attr *attr,
+                                        const struct rte_flow_item pattern[],
+                                        const struct rte_flow_action actions[],
+                                        struct rte_flow_error *error);
+static int i40e_flow_destroy(struct rte_eth_dev *dev,
+                            struct rte_flow *flow,
+                            struct rte_flow_error *error);
+static int i40e_flow_flush(struct rte_eth_dev *dev,
+                          struct rte_flow_error *error);
+static int
+i40e_flow_parse_ethertype_pattern(struct rte_eth_dev *dev,
+                                 const struct rte_flow_item *pattern,
+                                 struct rte_flow_error *error,
+                                 struct rte_eth_ethertype_filter *filter);
+static int i40e_flow_parse_ethertype_action(struct rte_eth_dev *dev,
+                                   const struct rte_flow_action *actions,
+                                   struct rte_flow_error *error,
+                                   struct rte_eth_ethertype_filter *filter);
+static int i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
+                                       const struct rte_flow_item *pattern,
+                                       struct rte_flow_error *error,
+                                       struct rte_eth_fdir_filter *filter);
+static int i40e_flow_parse_fdir_action(struct rte_eth_dev *dev,
+                                      const struct rte_flow_action *actions,
+                                      struct rte_flow_error *error,
+                                      struct rte_eth_fdir_filter *filter);
+static int i40e_flow_parse_tunnel_pattern(__rte_unused struct rte_eth_dev *dev,
+                                 const struct rte_flow_item *pattern,
+                                 struct rte_flow_error *error,
+                                 struct rte_eth_tunnel_filter_conf *filter);
+static int i40e_flow_parse_tunnel_action(struct rte_eth_dev *dev,
+                                const struct rte_flow_action *actions,
+                                struct rte_flow_error *error,
+                                struct rte_eth_tunnel_filter_conf *filter);
+static int i40e_flow_parse_attr(const struct rte_flow_attr *attr,
+                               struct rte_flow_error *error);
+static int i40e_flow_parse_ethertype_filter(struct rte_eth_dev *dev,
+                                   const struct rte_flow_attr *attr,
+                                   const struct rte_flow_item pattern[],
+                                   const struct rte_flow_action actions[],
+                                   struct rte_flow_error *error,
+                                   union i40e_filter_t *filter);
+static int i40e_flow_parse_fdir_filter(struct rte_eth_dev *dev,
+                                      const struct rte_flow_attr *attr,
+                                      const struct rte_flow_item pattern[],
+                                      const struct rte_flow_action actions[],
+                                      struct rte_flow_error *error,
+                                      union i40e_filter_t *filter);
+static int i40e_flow_parse_tunnel_filter(struct rte_eth_dev *dev,
+                                        const struct rte_flow_attr *attr,
+                                        const struct rte_flow_item pattern[],
+                                        const struct rte_flow_action actions[],
+                                        struct rte_flow_error *error,
+                                        union i40e_filter_t *filter);
+static int i40e_flow_destroy_ethertype_filter(struct i40e_pf *pf,
+                                     struct i40e_ethertype_filter *filter);
+static int i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf,
+                                          struct i40e_tunnel_filter *filter);
+static int i40e_flow_flush_fdir_filter(struct i40e_pf *pf);
+static int i40e_flow_flush_ethertype_filter(struct i40e_pf *pf);
+static int i40e_flow_flush_tunnel_filter(struct i40e_pf *pf);
+
+const struct rte_flow_ops i40e_flow_ops = {
+       .validate = i40e_flow_validate,
+       .create = i40e_flow_create,
+       .destroy = i40e_flow_destroy,
+       .flush = i40e_flow_flush,
+};
+
+union i40e_filter_t cons_filter;
+enum rte_filter_type cons_filter_type = RTE_ETH_FILTER_NONE;
+
+/* Pattern matched ethertype filter */
+static enum rte_flow_item_type pattern_ethertype[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+/* Pattern matched flow director filter */
+static enum rte_flow_item_type pattern_fdir_ipv4[] = {
+       RTE_FLOW_ITEM_TYPE_IPV4,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv4_ext[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_IPV4,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv4_udp[] = {
+       RTE_FLOW_ITEM_TYPE_IPV4,
+       RTE_FLOW_ITEM_TYPE_UDP,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv4_udp_ext[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_IPV4,
+       RTE_FLOW_ITEM_TYPE_UDP,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv4_tcp[] = {
+       RTE_FLOW_ITEM_TYPE_IPV4,
+       RTE_FLOW_ITEM_TYPE_TCP,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv4_tcp_ext[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_IPV4,
+       RTE_FLOW_ITEM_TYPE_TCP,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv4_sctp[] = {
+       RTE_FLOW_ITEM_TYPE_IPV4,
+       RTE_FLOW_ITEM_TYPE_SCTP,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv4_sctp_ext[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_IPV4,
+       RTE_FLOW_ITEM_TYPE_SCTP,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv6[] = {
+       RTE_FLOW_ITEM_TYPE_IPV6,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv6_ext[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_IPV6,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv6_udp[] = {
+       RTE_FLOW_ITEM_TYPE_IPV6,
+       RTE_FLOW_ITEM_TYPE_UDP,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv6_udp_ext[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_IPV6,
+       RTE_FLOW_ITEM_TYPE_UDP,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv6_tcp[] = {
+       RTE_FLOW_ITEM_TYPE_IPV6,
+       RTE_FLOW_ITEM_TYPE_TCP,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv6_tcp_ext[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_IPV6,
+       RTE_FLOW_ITEM_TYPE_TCP,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv6_sctp[] = {
+       RTE_FLOW_ITEM_TYPE_IPV6,
+       RTE_FLOW_ITEM_TYPE_SCTP,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv6_sctp_ext[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_IPV6,
+       RTE_FLOW_ITEM_TYPE_SCTP,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+/* Pattern matched tunnel filter */
+static enum rte_flow_item_type pattern_vxlan_1[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_IPV4,
+       RTE_FLOW_ITEM_TYPE_UDP,
+       RTE_FLOW_ITEM_TYPE_VXLAN,
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_vxlan_2[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_IPV6,
+       RTE_FLOW_ITEM_TYPE_UDP,
+       RTE_FLOW_ITEM_TYPE_VXLAN,
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_vxlan_3[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_IPV4,
+       RTE_FLOW_ITEM_TYPE_UDP,
+       RTE_FLOW_ITEM_TYPE_VXLAN,
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_VLAN,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_vxlan_4[] = {
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_IPV6,
+       RTE_FLOW_ITEM_TYPE_UDP,
+       RTE_FLOW_ITEM_TYPE_VXLAN,
+       RTE_FLOW_ITEM_TYPE_ETH,
+       RTE_FLOW_ITEM_TYPE_VLAN,
+       RTE_FLOW_ITEM_TYPE_END,
+};
+
+static struct i40e_valid_pattern i40e_supported_patterns[] = {
+       /* Ethertype */
+       { pattern_ethertype, i40e_flow_parse_ethertype_filter },
+       /* FDIR */
+       { pattern_fdir_ipv4, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv4_ext, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv4_udp, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv4_udp_ext, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv4_tcp, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv4_tcp_ext, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv4_sctp, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv4_sctp_ext, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv6, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv6_ext, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv6_udp, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv6_udp_ext, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv6_tcp, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv6_tcp_ext, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv6_sctp, i40e_flow_parse_fdir_filter },
+       { pattern_fdir_ipv6_sctp_ext, i40e_flow_parse_fdir_filter },
+       /* tunnel */
+       { pattern_vxlan_1, i40e_flow_parse_tunnel_filter },
+       { pattern_vxlan_2, i40e_flow_parse_tunnel_filter },
+       { pattern_vxlan_3, i40e_flow_parse_tunnel_filter },
+       { pattern_vxlan_4, i40e_flow_parse_tunnel_filter },
+};
+
+#define NEXT_ITEM_OF_ACTION(act, actions, index)                        \
+       do {                                                            \
+               act = actions + index;                                  \
+               while (act->type == RTE_FLOW_ACTION_TYPE_VOID) {        \
+                       index++;                                        \
+                       act = actions + index;                          \
+               }                                                       \
+       } while (0)
+
+/* Find the first VOID or non-VOID item pointer */
+static const struct rte_flow_item *
+i40e_find_first_item(const struct rte_flow_item *item, bool is_void)
+{
+       bool is_find;
+
+       while (item->type != RTE_FLOW_ITEM_TYPE_END) {
+               if (is_void)
+                       is_find = item->type == RTE_FLOW_ITEM_TYPE_VOID;
+               else
+                       is_find = item->type != RTE_FLOW_ITEM_TYPE_VOID;
+               if (is_find)
+                       break;
+               item++;
+       }
+       return item;
+}
+
+/* Skip all VOID items of the pattern */
+static void
+i40e_pattern_skip_void_item(struct rte_flow_item *items,
+                           const struct rte_flow_item *pattern)
+{
+       uint32_t cpy_count = 0;
+       const struct rte_flow_item *pb = pattern, *pe = pattern;
+
+       for (;;) {
+               /* Find a non-void item first */
+               pb = i40e_find_first_item(pb, false);
+               if (pb->type == RTE_FLOW_ITEM_TYPE_END) {
+                       pe = pb;
+                       break;
+               }
+
+               /* Find a void item */
+               pe = i40e_find_first_item(pb + 1, true);
+
+               cpy_count = pe - pb;
+               rte_memcpy(items, pb, sizeof(struct rte_flow_item) * cpy_count);
+
+               items += cpy_count;
+
+               if (pe->type == RTE_FLOW_ITEM_TYPE_END) {
+                       pb = pe;
+                       break;
+               }
+
+               pb = pe + 1;
+       }
+       /* Copy the END item. */
+       rte_memcpy(items, pe, sizeof(struct rte_flow_item));
+}
+
+/* Check if the pattern matches a supported item type array */
+static bool
+i40e_match_pattern(enum rte_flow_item_type *item_array,
+                  struct rte_flow_item *pattern)
+{
+       struct rte_flow_item *item = pattern;
+
+       while ((*item_array == item->type) &&
+              (*item_array != RTE_FLOW_ITEM_TYPE_END)) {
+               item_array++;
+               item++;
+       }
+
+       return (*item_array == RTE_FLOW_ITEM_TYPE_END &&
+               item->type == RTE_FLOW_ITEM_TYPE_END);
+}
+
+/* Find if there's parse filter function matched */
+static parse_filter_t
+i40e_find_parse_filter_func(struct rte_flow_item *pattern)
+{
+       parse_filter_t parse_filter = NULL;
+       uint8_t i = 0;
+
+       for (; i < RTE_DIM(i40e_supported_patterns); i++) {
+               if (i40e_match_pattern(i40e_supported_patterns[i].items,
+                                       pattern)) {
+                       parse_filter = i40e_supported_patterns[i].parse_filter;
+                       break;
+               }
+       }
+
+       return parse_filter;
+}
+
+/* Parse attributes */
+static int
+i40e_flow_parse_attr(const struct rte_flow_attr *attr,
+                    struct rte_flow_error *error)
+{
+       /* Must be input direction */
+       if (!attr->ingress) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                                  attr, "Only support ingress.");
+               return -rte_errno;
+       }
+
+       /* Not supported */
+       if (attr->egress) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+                                  attr, "Not support egress.");
+               return -rte_errno;
+       }
+
+       /* Not supported */
+       if (attr->priority) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+                                  attr, "Not support priority.");
+               return -rte_errno;
+       }
+
+       /* Not supported */
+       if (attr->group) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+                                  attr, "Not support group.");
+               return -rte_errno;
+       }
+
+       return 0;
+}
+
+static uint16_t
+i40e_get_outer_vlan(struct rte_eth_dev *dev)
+{
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       int qinq = dev->data->dev_conf.rxmode.hw_vlan_extend;
+       uint64_t reg_r = 0;
+       uint16_t reg_id;
+       uint16_t tpid;
+
+       if (qinq)
+               reg_id = 2;
+       else
+               reg_id = 3;
+
+       i40e_aq_debug_read_register(hw, I40E_GL_SWT_L2TAGCTRL(reg_id),
+                                   &reg_r, NULL);
+
+       tpid = (reg_r >> I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT) & 0xFFFF;
+
+       return tpid;
+}
+
+/* 1. Last in item should be NULL as range is not supported.
+ * 2. Supported filter types: MAC_ETHTYPE and ETHTYPE.
+ * 3. SRC mac_addr mask should be 00:00:00:00:00:00.
+ * 4. DST mac_addr mask should be 00:00:00:00:00:00 or
+ *    FF:FF:FF:FF:FF:FF
+ * 5. Ether_type mask should be 0xFFFF.
+ */
+static int
+i40e_flow_parse_ethertype_pattern(struct rte_eth_dev *dev,
+                                 const struct rte_flow_item *pattern,
+                                 struct rte_flow_error *error,
+                                 struct rte_eth_ethertype_filter *filter)
+{
+       const struct rte_flow_item *item = pattern;
+       const struct rte_flow_item_eth *eth_spec;
+       const struct rte_flow_item_eth *eth_mask;
+       enum rte_flow_item_type item_type;
+       uint16_t outer_tpid;
+
+       outer_tpid = i40e_get_outer_vlan(dev);
+
+       for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM,
+                                          item,
+                                          "Not support range");
+                       return -rte_errno;
+               }
+               item_type = item->type;
+               switch (item_type) {
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       eth_spec = (const struct rte_flow_item_eth *)item->spec;
+                       eth_mask = (const struct rte_flow_item_eth *)item->mask;
+                       /* Get the MAC info. */
+                       if (!eth_spec || !eth_mask) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "NULL ETH spec/mask");
+                               return -rte_errno;
+                       }
+
+                       /* Mask bits of source MAC address must be full of 0.
+                        * Mask bits of destination MAC address must be full
+                        * of 1 or full of 0.
+                        */
+                       if (!is_zero_ether_addr(&eth_mask->src) ||
+                           (!is_zero_ether_addr(&eth_mask->dst) &&
+                            !is_broadcast_ether_addr(&eth_mask->dst))) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid MAC_addr mask");
+                               return -rte_errno;
+                       }
+
+                       if ((eth_mask->type & UINT16_MAX) != UINT16_MAX) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid ethertype mask");
+                               return -rte_errno;
+                       }
+
+                       /* If mask bits of destination MAC address
+                        * are full of 1, set RTE_ETHTYPE_FLAGS_MAC.
+                        */
+                       if (is_broadcast_ether_addr(&eth_mask->dst)) {
+                               filter->mac_addr = eth_spec->dst;
+                               filter->flags |= RTE_ETHTYPE_FLAGS_MAC;
+                       } else {
+                               filter->flags &= ~RTE_ETHTYPE_FLAGS_MAC;
+                       }
+                       filter->ether_type = rte_be_to_cpu_16(eth_spec->type);
+
+                       if (filter->ether_type == ETHER_TYPE_IPv4 ||
+                           filter->ether_type == ETHER_TYPE_IPv6 ||
+                           filter->ether_type == outer_tpid) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Unsupported ether_type in"
+                                                  " control packet filter.");
+                               return -rte_errno;
+                       }
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       return 0;
+}
+
+/* Ethertype action only supports QUEUE or DROP. */
+static int
+i40e_flow_parse_ethertype_action(struct rte_eth_dev *dev,
+                                const struct rte_flow_action *actions,
+                                struct rte_flow_error *error,
+                                struct rte_eth_ethertype_filter *filter)
+{
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       const struct rte_flow_action *act;
+       const struct rte_flow_action_queue *act_q;
+       uint32_t index = 0;
+
+       /* Check if the first non-void action is QUEUE or DROP. */
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE &&
+           act->type != RTE_FLOW_ACTION_TYPE_DROP) {
+               rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+                                  act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+               act_q = (const struct rte_flow_action_queue *)act->conf;
+               filter->queue = act_q->index;
+               if (filter->queue >= pf->dev_data->nb_rx_queues) {
+                       rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ACTION,
+                                          act, "Invalid queue ID for"
+                                          " ethertype_filter.");
+                       return -rte_errno;
+               }
+       } else {
+               filter->flags |= RTE_ETHTYPE_FLAGS_DROP;
+       }
+
+       /* Check if the next non-void item is END */
+       index++;
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+               rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+                                  act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       return 0;
+}
+
+static int
+i40e_flow_parse_ethertype_filter(struct rte_eth_dev *dev,
+                                const struct rte_flow_attr *attr,
+                                const struct rte_flow_item pattern[],
+                                const struct rte_flow_action actions[],
+                                struct rte_flow_error *error,
+                                union i40e_filter_t *filter)
+{
+       struct rte_eth_ethertype_filter *ethertype_filter =
+               &filter->ethertype_filter;
+       int ret;
+
+       ret = i40e_flow_parse_ethertype_pattern(dev, pattern, error,
+                                               ethertype_filter);
+       if (ret)
+               return ret;
+
+       ret = i40e_flow_parse_ethertype_action(dev, actions, error,
+                                              ethertype_filter);
+       if (ret)
+               return ret;
+
+       ret = i40e_flow_parse_attr(attr, error);
+       if (ret)
+               return ret;
+
+       cons_filter_type = RTE_ETH_FILTER_ETHERTYPE;
+
+       return ret;
+}
+
+/* 1. Last in item should be NULL as range is not supported.
+ * 2. Supported flow type and input set: refer to array
+ *    default_inset_table in i40e_ethdev.c.
+ * 3. Mask of fields which need to be matched should be
+ *    filled with 1.
+ * 4. Mask of fields which needn't to be matched should be
+ *    filled with 0.
+ */
+static int
+i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
+                            const struct rte_flow_item *pattern,
+                            struct rte_flow_error *error,
+                            struct rte_eth_fdir_filter *filter)
+{
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       const struct rte_flow_item *item = pattern;
+       const struct rte_flow_item_eth *eth_spec, *eth_mask;
+       const struct rte_flow_item_ipv4 *ipv4_spec, *ipv4_mask;
+       const struct rte_flow_item_ipv6 *ipv6_spec, *ipv6_mask;
+       const struct rte_flow_item_tcp *tcp_spec, *tcp_mask;
+       const struct rte_flow_item_udp *udp_spec, *udp_mask;
+       const struct rte_flow_item_sctp *sctp_spec, *sctp_mask;
+       const struct rte_flow_item_vf *vf_spec;
+       uint32_t flow_type = RTE_ETH_FLOW_UNKNOWN;
+       enum i40e_filter_pctype pctype;
+       uint64_t input_set = I40E_INSET_NONE;
+       uint16_t flag_offset;
+       enum rte_flow_item_type item_type;
+       enum rte_flow_item_type l3 = RTE_FLOW_ITEM_TYPE_END;
+       uint32_t j;
+
+       for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM,
+                                          item,
+                                          "Not support range");
+                       return -rte_errno;
+               }
+               item_type = item->type;
+               switch (item_type) {
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       eth_spec = (const struct rte_flow_item_eth *)item->spec;
+                       eth_mask = (const struct rte_flow_item_eth *)item->mask;
+                       if (eth_spec || eth_mask) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid ETH spec/mask");
+                               return -rte_errno;
+                       }
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+                       l3 = RTE_FLOW_ITEM_TYPE_IPV4;
+                       ipv4_spec =
+                               (const struct rte_flow_item_ipv4 *)item->spec;
+                       ipv4_mask =
+                               (const struct rte_flow_item_ipv4 *)item->mask;
+                       if (!ipv4_spec || !ipv4_mask) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "NULL IPv4 spec/mask");
+                               return -rte_errno;
+                       }
+
+                       /* Check IPv4 mask and update input set */
+                       if (ipv4_mask->hdr.version_ihl ||
+                           ipv4_mask->hdr.total_length ||
+                           ipv4_mask->hdr.packet_id ||
+                           ipv4_mask->hdr.fragment_offset ||
+                           ipv4_mask->hdr.hdr_checksum) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid IPv4 mask.");
+                               return -rte_errno;
+                       }
+
+                       if (ipv4_mask->hdr.src_addr == UINT32_MAX)
+                               input_set |= I40E_INSET_IPV4_SRC;
+                       if (ipv4_mask->hdr.dst_addr == UINT32_MAX)
+                               input_set |= I40E_INSET_IPV4_DST;
+                       if (ipv4_mask->hdr.type_of_service == UINT8_MAX)
+                               input_set |= I40E_INSET_IPV4_TOS;
+                       if (ipv4_mask->hdr.time_to_live == UINT8_MAX)
+                               input_set |= I40E_INSET_IPV4_TTL;
+                       if (ipv4_mask->hdr.next_proto_id == UINT8_MAX)
+                               input_set |= I40E_INSET_IPV4_PROTO;
+
+                       /* Get filter info */
+                       flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_OTHER;
+                       /* Check if it is fragment. */
+                       flag_offset =
+                             rte_be_to_cpu_16(ipv4_spec->hdr.fragment_offset);
+                       if (flag_offset & IPV4_HDR_OFFSET_MASK ||
+                           flag_offset & IPV4_HDR_MF_FLAG)
+                               flow_type = RTE_ETH_FLOW_FRAG_IPV4;
+
+                       /* Get the filter info */
+                       filter->input.flow.ip4_flow.proto =
+                               ipv4_spec->hdr.next_proto_id;
+                       filter->input.flow.ip4_flow.tos =
+                               ipv4_spec->hdr.type_of_service;
+                       filter->input.flow.ip4_flow.ttl =
+                               ipv4_spec->hdr.time_to_live;
+                       filter->input.flow.ip4_flow.src_ip =
+                               ipv4_spec->hdr.src_addr;
+                       filter->input.flow.ip4_flow.dst_ip =
+                               ipv4_spec->hdr.dst_addr;
+
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+                       l3 = RTE_FLOW_ITEM_TYPE_IPV6;
+                       ipv6_spec =
+                               (const struct rte_flow_item_ipv6 *)item->spec;
+                       ipv6_mask =
+                               (const struct rte_flow_item_ipv6 *)item->mask;
+                       if (!ipv6_spec || !ipv6_mask) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "NULL IPv6 spec/mask");
+                               return -rte_errno;
+                       }
+
+                       /* Check IPv6 mask and update input set */
+                       if (ipv6_mask->hdr.payload_len) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid IPv6 mask");
+                               return -rte_errno;
+                       }
+
+                       /* SCR and DST address of IPv6 shouldn't be masked */
+                       for (j = 0; j < RTE_DIM(ipv6_mask->hdr.src_addr); j++) {
+                               if (ipv6_mask->hdr.src_addr[j] != UINT8_MAX ||
+                                   ipv6_mask->hdr.dst_addr[j] != UINT8_MAX) {
+                                       rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid IPv6 mask");
+                                       return -rte_errno;
+                               }
+                       }
+
+                       input_set |= I40E_INSET_IPV6_SRC;
+                       input_set |= I40E_INSET_IPV6_DST;
+
+                       if ((ipv6_mask->hdr.vtc_flow &
+                            rte_cpu_to_be_16(I40E_IPV6_TC_MASK))
+                           == rte_cpu_to_be_16(I40E_IPV6_TC_MASK))
+                               input_set |= I40E_INSET_IPV6_TC;
+                       if (ipv6_mask->hdr.proto == UINT8_MAX)
+                               input_set |= I40E_INSET_IPV6_NEXT_HDR;
+                       if (ipv6_mask->hdr.hop_limits == UINT8_MAX)
+                               input_set |= I40E_INSET_IPV6_HOP_LIMIT;
+
+                       /* Get filter info */
+                       filter->input.flow.ipv6_flow.tc =
+                               (uint8_t)(ipv6_spec->hdr.vtc_flow <<
+                                         I40E_IPV4_TC_SHIFT);
+                       filter->input.flow.ipv6_flow.proto =
+                               ipv6_spec->hdr.proto;
+                       filter->input.flow.ipv6_flow.hop_limits =
+                               ipv6_spec->hdr.hop_limits;
+
+                       rte_memcpy(filter->input.flow.ipv6_flow.src_ip,
+                                  ipv6_spec->hdr.src_addr, 16);
+                       rte_memcpy(filter->input.flow.ipv6_flow.dst_ip,
+                                  ipv6_spec->hdr.dst_addr, 16);
+
+                       /* Check if it is fragment. */
+                       if (ipv6_spec->hdr.proto == I40E_IPV6_FRAG_HEADER)
+                               flow_type = RTE_ETH_FLOW_FRAG_IPV6;
+                       else
+                               flow_type = RTE_ETH_FLOW_NONFRAG_IPV6_OTHER;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_TCP:
+                       tcp_spec = (const struct rte_flow_item_tcp *)item->spec;
+                       tcp_mask = (const struct rte_flow_item_tcp *)item->mask;
+                       if (!tcp_spec || !tcp_mask) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "NULL TCP spec/mask");
+                               return -rte_errno;
+                       }
+
+                       /* Check TCP mask and update input set */
+                       if (tcp_mask->hdr.sent_seq ||
+                           tcp_mask->hdr.recv_ack ||
+                           tcp_mask->hdr.data_off ||
+                           tcp_mask->hdr.tcp_flags ||
+                           tcp_mask->hdr.rx_win ||
+                           tcp_mask->hdr.cksum ||
+                           tcp_mask->hdr.tcp_urp) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid TCP mask");
+                               return -rte_errno;
+                       }
+
+                       if (tcp_mask->hdr.src_port != UINT16_MAX ||
+                           tcp_mask->hdr.dst_port != UINT16_MAX) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid TCP mask");
+                               return -rte_errno;
+                       }
+
+                       input_set |= I40E_INSET_SRC_PORT;
+                       input_set |= I40E_INSET_DST_PORT;
+
+                       /* Get filter info */
+                       if (l3 == RTE_FLOW_ITEM_TYPE_IPV4)
+                               flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_TCP;
+                       else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6)
+                               flow_type = RTE_ETH_FLOW_NONFRAG_IPV6_TCP;
+
+                       if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) {
+                               filter->input.flow.tcp4_flow.src_port =
+                                       tcp_spec->hdr.src_port;
+                               filter->input.flow.tcp4_flow.dst_port =
+                                       tcp_spec->hdr.dst_port;
+                       } else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) {
+                               filter->input.flow.tcp6_flow.src_port =
+                                       tcp_spec->hdr.src_port;
+                               filter->input.flow.tcp6_flow.dst_port =
+                                       tcp_spec->hdr.dst_port;
+                       }
+                       break;
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       udp_spec = (const struct rte_flow_item_udp *)item->spec;
+                       udp_mask = (const struct rte_flow_item_udp *)item->mask;
+                       if (!udp_spec || !udp_mask) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "NULL UDP spec/mask");
+                               return -rte_errno;
+                       }
+
+                       /* Check UDP mask and update input set*/
+                       if (udp_mask->hdr.dgram_len ||
+                           udp_mask->hdr.dgram_cksum) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid UDP mask");
+                               return -rte_errno;
+                       }
+
+                       if (udp_mask->hdr.src_port != UINT16_MAX ||
+                           udp_mask->hdr.dst_port != UINT16_MAX) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid UDP mask");
+                               return -rte_errno;
+                       }
+
+                       input_set |= I40E_INSET_SRC_PORT;
+                       input_set |= I40E_INSET_DST_PORT;
+
+                       /* Get filter info */
+                       if (l3 == RTE_FLOW_ITEM_TYPE_IPV4)
+                               flow_type =
+                                       RTE_ETH_FLOW_NONFRAG_IPV4_UDP;
+                       else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6)
+                               flow_type =
+                                       RTE_ETH_FLOW_NONFRAG_IPV6_UDP;
+
+                       if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) {
+                               filter->input.flow.udp4_flow.src_port =
+                                       udp_spec->hdr.src_port;
+                               filter->input.flow.udp4_flow.dst_port =
+                                       udp_spec->hdr.dst_port;
+                       } else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) {
+                               filter->input.flow.udp6_flow.src_port =
+                                       udp_spec->hdr.src_port;
+                               filter->input.flow.udp6_flow.dst_port =
+                                       udp_spec->hdr.dst_port;
+                       }
+                       break;
+               case RTE_FLOW_ITEM_TYPE_SCTP:
+                       sctp_spec =
+                               (const struct rte_flow_item_sctp *)item->spec;
+                       sctp_mask =
+                               (const struct rte_flow_item_sctp *)item->mask;
+                       if (!sctp_spec || !sctp_mask) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "NULL SCTP spec/mask");
+                               return -rte_errno;
+                       }
+
+                       /* Check SCTP mask and update input set */
+                       if (sctp_mask->hdr.cksum) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid UDP mask");
+                               return -rte_errno;
+                       }
+
+                       if (sctp_mask->hdr.src_port != UINT16_MAX ||
+                           sctp_mask->hdr.dst_port != UINT16_MAX ||
+                           sctp_mask->hdr.tag != UINT32_MAX) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid UDP mask");
+                               return -rte_errno;
+                       }
+                       input_set |= I40E_INSET_SRC_PORT;
+                       input_set |= I40E_INSET_DST_PORT;
+                       input_set |= I40E_INSET_SCTP_VT;
+
+                       /* Get filter info */
+                       if (l3 == RTE_FLOW_ITEM_TYPE_IPV4)
+                               flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_SCTP;
+                       else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6)
+                               flow_type = RTE_ETH_FLOW_NONFRAG_IPV6_SCTP;
+
+                       if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) {
+                               filter->input.flow.sctp4_flow.src_port =
+                                       sctp_spec->hdr.src_port;
+                               filter->input.flow.sctp4_flow.dst_port =
+                                       sctp_spec->hdr.dst_port;
+                               filter->input.flow.sctp4_flow.verify_tag =
+                                       sctp_spec->hdr.tag;
+                       } else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) {
+                               filter->input.flow.sctp6_flow.src_port =
+                                       sctp_spec->hdr.src_port;
+                               filter->input.flow.sctp6_flow.dst_port =
+                                       sctp_spec->hdr.dst_port;
+                               filter->input.flow.sctp6_flow.verify_tag =
+                                       sctp_spec->hdr.tag;
+                       }
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VF:
+                       vf_spec = (const struct rte_flow_item_vf *)item->spec;
+                       filter->input.flow_ext.is_vf = 1;
+                       filter->input.flow_ext.dst_id = vf_spec->id;
+                       if (filter->input.flow_ext.is_vf &&
+                           filter->input.flow_ext.dst_id >= pf->vf_num) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid VF ID for FDIR.");
+                               return -rte_errno;
+                       }
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       pctype = i40e_flowtype_to_pctype(flow_type);
+       if (pctype == 0 || pctype > I40E_FILTER_PCTYPE_L2_PAYLOAD) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                  "Unsupported flow type");
+               return -rte_errno;
+       }
+
+       if (input_set != i40e_get_default_input_set(pctype)) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                  "Invalid input set.");
+               return -rte_errno;
+       }
+       filter->input.flow_type = flow_type;
+
+       return 0;
+}
+
+/* Parse to get the action info of a FDIR filter.
+ * FDIR action supports QUEUE or (QUEUE + MARK).
+ */
+static int
+i40e_flow_parse_fdir_action(struct rte_eth_dev *dev,
+                           const struct rte_flow_action *actions,
+                           struct rte_flow_error *error,
+                           struct rte_eth_fdir_filter *filter)
+{
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       const struct rte_flow_action *act;
+       const struct rte_flow_action_queue *act_q;
+       const struct rte_flow_action_mark *mark_spec;
+       uint32_t index = 0;
+
+       /* Check if the first non-void action is QUEUE or DROP. */
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE &&
+           act->type != RTE_FLOW_ACTION_TYPE_DROP) {
+               rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+                                  act, "Invalid action.");
+               return -rte_errno;
+       }
+
+       act_q = (const struct rte_flow_action_queue *)act->conf;
+       filter->action.flex_off = 0;
+       if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE)
+               filter->action.behavior = RTE_ETH_FDIR_ACCEPT;
+       else
+               filter->action.behavior = RTE_ETH_FDIR_REJECT;
+
+       filter->action.report_status = RTE_ETH_FDIR_REPORT_ID;
+       filter->action.rx_queue = act_q->index;
+
+       if (filter->action.rx_queue >= pf->dev_data->nb_rx_queues) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ACTION, act,
+                                  "Invalid queue ID for FDIR.");
+               return -rte_errno;
+       }
+
+       /* Check if the next non-void item is MARK or END. */
+       index++;
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_MARK &&
+           act->type != RTE_FLOW_ACTION_TYPE_END) {
+               rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+                                  act, "Invalid action.");
+               return -rte_errno;
+       }
+
+       if (act->type == RTE_FLOW_ACTION_TYPE_MARK) {
+               mark_spec = (const struct rte_flow_action_mark *)act->conf;
+               filter->soft_id = mark_spec->id;
+
+               /* Check if the next non-void item is END */
+               index++;
+               NEXT_ITEM_OF_ACTION(act, actions, index);
+               if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+                       rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ACTION,
+                                          act, "Invalid action.");
+                       return -rte_errno;
+               }
+       }
+
+       return 0;
+}
+
+static int
+i40e_flow_parse_fdir_filter(struct rte_eth_dev *dev,
+                           const struct rte_flow_attr *attr,
+                           const struct rte_flow_item pattern[],
+                           const struct rte_flow_action actions[],
+                           struct rte_flow_error *error,
+                           union i40e_filter_t *filter)
+{
+       struct rte_eth_fdir_filter *fdir_filter =
+               &filter->fdir_filter;
+       int ret;
+
+       ret = i40e_flow_parse_fdir_pattern(dev, pattern, error, fdir_filter);
+       if (ret)
+               return ret;
+
+       ret = i40e_flow_parse_fdir_action(dev, actions, error, fdir_filter);
+       if (ret)
+               return ret;
+
+       ret = i40e_flow_parse_attr(attr, error);
+       if (ret)
+               return ret;
+
+       cons_filter_type = RTE_ETH_FILTER_FDIR;
+
+       if (dev->data->dev_conf.fdir_conf.mode !=
+           RTE_FDIR_MODE_PERFECT) {
+               rte_flow_error_set(error, ENOTSUP,
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                  NULL,
+                                  "Check the mode in fdir_conf.");
+               return -rte_errno;
+       }
+
+       return 0;
+}
+
+/* Parse to get the action info of a tunnle filter
+ * Tunnel action only supports QUEUE.
+ */
+static int
+i40e_flow_parse_tunnel_action(struct rte_eth_dev *dev,
+                             const struct rte_flow_action *actions,
+                             struct rte_flow_error *error,
+                             struct rte_eth_tunnel_filter_conf *filter)
+{
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       const struct rte_flow_action *act;
+       const struct rte_flow_action_queue *act_q;
+       uint32_t index = 0;
+
+       /* Check if the first non-void action is QUEUE. */
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) {
+               rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+                                  act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       act_q = (const struct rte_flow_action_queue *)act->conf;
+       filter->queue_id = act_q->index;
+       if (filter->queue_id >= pf->dev_data->nb_rx_queues) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ACTION,
+                                  act, "Invalid queue ID for tunnel filter");
+               return -rte_errno;
+       }
+
+       /* Check if the next non-void item is END */
+       index++;
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+               rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+                                  act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       return 0;
+}
+
+static int
+i40e_check_tenant_id_mask(const uint8_t *mask)
+{
+       uint32_t j;
+       int is_masked = 0;
+
+       for (j = 0; j < I40E_TENANT_ARRAY_NUM; j++) {
+               if (*(mask + j) == UINT8_MAX) {
+                       if (j > 0 && (*(mask + j) != *(mask + j - 1)))
+                               return -EINVAL;
+                       is_masked = 0;
+               } else if (*(mask + j) == 0) {
+                       if (j > 0 && (*(mask + j) != *(mask + j - 1)))
+                               return -EINVAL;
+                       is_masked = 1;
+               } else {
+                       return -EINVAL;
+               }
+       }
+
+       return is_masked;
+}
+
+/* 1. Last in item should be NULL as range is not supported.
+ * 2. Supported filter types: IMAC_IVLAN_TENID, IMAC_IVLAN,
+ *    IMAC_TENID, OMAC_TENID_IMAC and IMAC.
+ * 3. Mask of fields which need to be matched should be
+ *    filled with 1.
+ * 4. Mask of fields which needn't to be matched should be
+ *    filled with 0.
+ */
+static int
+i40e_flow_parse_vxlan_pattern(const struct rte_flow_item *pattern,
+                             struct rte_flow_error *error,
+                             struct rte_eth_tunnel_filter_conf *filter)
+{
+       const struct rte_flow_item *item = pattern;
+       const struct rte_flow_item_eth *eth_spec;
+       const struct rte_flow_item_eth *eth_mask;
+       const struct rte_flow_item_eth *o_eth_spec = NULL;
+       const struct rte_flow_item_eth *o_eth_mask = NULL;
+       const struct rte_flow_item_vxlan *vxlan_spec = NULL;
+       const struct rte_flow_item_vxlan *vxlan_mask = NULL;
+       const struct rte_flow_item_eth *i_eth_spec = NULL;
+       const struct rte_flow_item_eth *i_eth_mask = NULL;
+       const struct rte_flow_item_vlan *vlan_spec = NULL;
+       const struct rte_flow_item_vlan *vlan_mask = NULL;
+       bool is_vni_masked = 0;
+       enum rte_flow_item_type item_type;
+       bool vxlan_flag = 0;
+
+       for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM,
+                                          item,
+                                          "Not support range");
+                       return -rte_errno;
+               }
+               item_type = item->type;
+               switch (item_type) {
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       eth_spec = (const struct rte_flow_item_eth *)item->spec;
+                       eth_mask = (const struct rte_flow_item_eth *)item->mask;
+                       if ((!eth_spec && eth_mask) ||
+                           (eth_spec && !eth_mask)) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid ether spec/mask");
+                               return -rte_errno;
+                       }
+
+                       if (eth_spec && eth_mask) {
+                               /* DST address of inner MAC shouldn't be masked.
+                                * SRC address of Inner MAC should be masked.
+                                */
+                               if (!is_broadcast_ether_addr(&eth_mask->dst) ||
+                                   !is_zero_ether_addr(&eth_mask->src) ||
+                                   eth_mask->type) {
+                                       rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid ether spec/mask");
+                                       return -rte_errno;
+                               }
+
+                               if (!vxlan_flag)
+                                       rte_memcpy(&filter->outer_mac,
+                                                  &eth_spec->dst,
+                                                  ETHER_ADDR_LEN);
+                               else
+                                       rte_memcpy(&filter->inner_mac,
+                                                  &eth_spec->dst,
+                                                  ETHER_ADDR_LEN);
+                       }
+
+                       if (!vxlan_flag) {
+                               o_eth_spec = eth_spec;
+                               o_eth_mask = eth_mask;
+                       } else {
+                               i_eth_spec = eth_spec;
+                               i_eth_mask = eth_mask;
+                       }
+
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VLAN:
+                       vlan_spec =
+                               (const struct rte_flow_item_vlan *)item->spec;
+                       vlan_mask =
+                               (const struct rte_flow_item_vlan *)item->mask;
+                       if (vxlan_flag) {
+                               vlan_spec =
+                               (const struct rte_flow_item_vlan *)item->spec;
+                               vlan_mask =
+                               (const struct rte_flow_item_vlan *)item->mask;
+                               if (!(vlan_spec && vlan_mask)) {
+                                       rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid vlan item");
+                                       return -rte_errno;
+                               }
+                       } else {
+                               if (vlan_spec || vlan_mask)
+                                       rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid vlan item");
+                               return -rte_errno;
+                       }
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       /* IPv4/IPv6/UDP are used to describe protocol,
+                        * spec amd mask should be NULL.
+                        */
+                       if (item->spec || item->mask) {
+                               rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM,
+                                          item,
+                                          "Invalid IPv4 item");
+                               return -rte_errno;
+                       }
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN:
+                       vxlan_spec =
+                               (const struct rte_flow_item_vxlan *)item->spec;
+                       vxlan_mask =
+                               (const struct rte_flow_item_vxlan *)item->mask;
+                       /* Check if VXLAN item is used to describe protocol.
+                        * If yes, both spec and mask should be NULL.
+                        * If no, either spec or mask shouldn't be NULL.
+                        */
+                       if ((!vxlan_spec && vxlan_mask) ||
+                           (vxlan_spec && !vxlan_mask)) {
+                               rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM,
+                                          item,
+                                          "Invalid VXLAN item");
+                               return -rte_errno;
+                       }
+
+                       /* Check if VNI is masked. */
+                       if (vxlan_mask) {
+                               is_vni_masked =
+                               i40e_check_tenant_id_mask(vxlan_mask->vni);
+                               if (is_vni_masked < 0) {
+                                       rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  item,
+                                                  "Invalid VNI mask");
+                                       return -rte_errno;
+                               }
+                       }
+                       vxlan_flag = 1;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       /* Check specification and mask to get the filter type */
+       if (vlan_spec && vlan_mask &&
+           (vlan_mask->tci == rte_cpu_to_be_16(I40E_TCI_MASK))) {
+               /* If there's inner vlan */
+               filter->inner_vlan = rte_be_to_cpu_16(vlan_spec->tci)
+                       & I40E_TCI_MASK;
+               if (vxlan_spec && vxlan_mask && !is_vni_masked) {
+                       /* If there's vxlan */
+                       rte_memcpy(&filter->tenant_id, vxlan_spec->vni,
+                                  RTE_DIM(vxlan_spec->vni));
+                       if (!o_eth_spec && !o_eth_mask &&
+                               i_eth_spec && i_eth_mask)
+                               filter->filter_type =
+                                       RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID;
+                       else {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  NULL,
+                                                  "Invalid filter type");
+                               return -rte_errno;
+                       }
+               } else if (!vxlan_spec && !vxlan_mask) {
+                       /* If there's no vxlan */
+                       if (!o_eth_spec && !o_eth_mask &&
+                               i_eth_spec && i_eth_mask)
+                               filter->filter_type =
+                                       RTE_TUNNEL_FILTER_IMAC_IVLAN;
+                       else {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  NULL,
+                                                  "Invalid filter type");
+                               return -rte_errno;
+                       }
+               } else {
+                       rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM,
+                                          NULL,
+                                          "Invalid filter type");
+                       return -rte_errno;
+               }
+       } else if ((!vlan_spec && !vlan_mask) ||
+                  (vlan_spec && vlan_mask && vlan_mask->tci == 0x0)) {
+               /* If there's no inner vlan */
+               if (vxlan_spec && vxlan_mask && !is_vni_masked) {
+                       /* If there's vxlan */
+                       rte_memcpy(&filter->tenant_id, vxlan_spec->vni,
+                                  RTE_DIM(vxlan_spec->vni));
+                       if (!o_eth_spec && !o_eth_mask &&
+                               i_eth_spec && i_eth_mask)
+                               filter->filter_type =
+                                       RTE_TUNNEL_FILTER_IMAC_TENID;
+                       else if (o_eth_spec && o_eth_mask &&
+                               i_eth_spec && i_eth_mask)
+                               filter->filter_type =
+                                       RTE_TUNNEL_FILTER_OMAC_TENID_IMAC;
+               } else if (!vxlan_spec && !vxlan_mask) {
+                       /* If there's no vxlan */
+                       if (!o_eth_spec && !o_eth_mask &&
+                               i_eth_spec && i_eth_mask) {
+                               filter->filter_type = ETH_TUNNEL_FILTER_IMAC;
+                       } else {
+                               rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+                                          "Invalid filter type");
+                               return -rte_errno;
+                       }
+               } else {
+                       rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+                                          "Invalid filter type");
+                       return -rte_errno;
+               }
+       } else {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
+                                  "Not supported by tunnel filter.");
+               return -rte_errno;
+       }
+
+       filter->tunnel_type = RTE_TUNNEL_TYPE_VXLAN;
+
+       return 0;
+}
+
+static int
+i40e_flow_parse_tunnel_pattern(__rte_unused struct rte_eth_dev *dev,
+                              const struct rte_flow_item *pattern,
+                              struct rte_flow_error *error,
+                              struct rte_eth_tunnel_filter_conf *filter)
+{
+       int ret;
+
+       ret = i40e_flow_parse_vxlan_pattern(pattern, error, filter);
+
+       return ret;
+}
+
+static int
+i40e_flow_parse_tunnel_filter(struct rte_eth_dev *dev,
+                             const struct rte_flow_attr *attr,
+                             const struct rte_flow_item pattern[],
+                             const struct rte_flow_action actions[],
+                             struct rte_flow_error *error,
+                             union i40e_filter_t *filter)
+{
+       struct rte_eth_tunnel_filter_conf *tunnel_filter =
+               &filter->tunnel_filter;
+       int ret;
+
+       ret = i40e_flow_parse_tunnel_pattern(dev, pattern,
+                                            error, tunnel_filter);
+       if (ret)
+               return ret;
+
+       ret = i40e_flow_parse_tunnel_action(dev, actions, error, tunnel_filter);
+       if (ret)
+               return ret;
+
+       ret = i40e_flow_parse_attr(attr, error);
+       if (ret)
+               return ret;
+
+       cons_filter_type = RTE_ETH_FILTER_TUNNEL;
+
+       return ret;
+}
+
+static int
+i40e_flow_validate(struct rte_eth_dev *dev,
+                  const struct rte_flow_attr *attr,
+                  const struct rte_flow_item pattern[],
+                  const struct rte_flow_action actions[],
+                  struct rte_flow_error *error)
+{
+       struct rte_flow_item *items; /* internal pattern w/o VOID items */
+       parse_filter_t parse_filter;
+       uint32_t item_num = 0; /* non-void item number of pattern*/
+       uint32_t i = 0;
+       int ret;
+
+       if (!pattern) {
+               rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+                                  NULL, "NULL pattern.");
+               return -rte_errno;
+       }
+
+       if (!actions) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ACTION_NUM,
+                                  NULL, "NULL action.");
+               return -rte_errno;
+       }
+
+       if (!attr) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR,
+                                  NULL, "NULL attribute.");
+               return -rte_errno;
+       }
+
+       memset(&cons_filter, 0, sizeof(cons_filter));
+
+       /* Get the non-void item number of pattern */
+       while ((pattern + i)->type != RTE_FLOW_ITEM_TYPE_END) {
+               if ((pattern + i)->type != RTE_FLOW_ITEM_TYPE_VOID)
+                       item_num++;
+               i++;
+       }
+       item_num++;
+
+       items = rte_zmalloc("i40e_pattern",
+                           item_num * sizeof(struct rte_flow_item), 0);
+       if (!items) {
+               rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+                                  NULL, "No memory for PMD internal items.");
+               return -ENOMEM;
+       }
+
+       i40e_pattern_skip_void_item(items, pattern);
+
+       /* Find if there's matched parse filter function */
+       parse_filter = i40e_find_parse_filter_func(items);
+       if (!parse_filter) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                  pattern, "Unsupported pattern");
+               return -rte_errno;
+       }
+
+       ret = parse_filter(dev, attr, items, actions, error, &cons_filter);
+
+       rte_free(items);
+
+       return ret;
+}
+
+static struct rte_flow *
+i40e_flow_create(struct rte_eth_dev *dev,
+                const struct rte_flow_attr *attr,
+                const struct rte_flow_item pattern[],
+                const struct rte_flow_action actions[],
+                struct rte_flow_error *error)
+{
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       struct rte_flow *flow;
+       int ret;
+
+       flow = rte_zmalloc("i40e_flow", sizeof(struct rte_flow), 0);
+       if (!flow) {
+               rte_flow_error_set(error, ENOMEM,
+                                  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+                                  "Failed to allocate memory");
+               return flow;
+       }
+
+       ret = i40e_flow_validate(dev, attr, pattern, actions, error);
+       if (ret < 0)
+               return NULL;
+
+       switch (cons_filter_type) {
+       case RTE_ETH_FILTER_ETHERTYPE:
+               ret = i40e_ethertype_filter_set(pf,
+                                       &cons_filter.ethertype_filter, 1);
+               if (ret)
+                       goto free_flow;
+               flow->rule = TAILQ_LAST(&pf->ethertype.ethertype_list,
+                                       i40e_ethertype_filter_list);
+               break;
+       case RTE_ETH_FILTER_FDIR:
+               ret = i40e_add_del_fdir_filter(dev,
+                                      &cons_filter.fdir_filter, 1);
+               if (ret)
+                       goto free_flow;
+               flow->rule = TAILQ_LAST(&pf->fdir.fdir_list,
+                                       i40e_fdir_filter_list);
+               break;
+       case RTE_ETH_FILTER_TUNNEL:
+               ret = i40e_dev_tunnel_filter_set(pf,
+                                        &cons_filter.tunnel_filter, 1);
+               if (ret)
+                       goto free_flow;
+               flow->rule = TAILQ_LAST(&pf->tunnel.tunnel_list,
+                                       i40e_tunnel_filter_list);
+               break;
+       default:
+               goto free_flow;
+       }
+
+       flow->filter_type = cons_filter_type;
+       TAILQ_INSERT_TAIL(&pf->flow_list, flow, node);
+       return flow;
+
+free_flow:
+       rte_flow_error_set(error, -ret,
+                          RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+                          "Failed to create flow.");
+       rte_free(flow);
+       return NULL;
+}
+
+static int
+i40e_flow_destroy(struct rte_eth_dev *dev,
+                 struct rte_flow *flow,
+                 struct rte_flow_error *error)
+{
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       enum rte_filter_type filter_type = flow->filter_type;
+       int ret = 0;
+
+       switch (filter_type) {
+       case RTE_ETH_FILTER_ETHERTYPE:
+               ret = i40e_flow_destroy_ethertype_filter(pf,
+                        (struct i40e_ethertype_filter *)flow->rule);
+               break;
+       case RTE_ETH_FILTER_TUNNEL:
+               ret = i40e_flow_destroy_tunnel_filter(pf,
+                             (struct i40e_tunnel_filter *)flow->rule);
+               break;
+       case RTE_ETH_FILTER_FDIR:
+               ret = i40e_add_del_fdir_filter(dev,
+                      &((struct i40e_fdir_filter *)flow->rule)->fdir, 0);
+               break;
+       default:
+               PMD_DRV_LOG(WARNING, "Filter type (%d) not supported",
+                           filter_type);
+               ret = -EINVAL;
+               break;
+       }
+
+       if (!ret) {
+               TAILQ_REMOVE(&pf->flow_list, flow, node);
+               rte_free(flow);
+       } else
+               rte_flow_error_set(error, -ret,
+                                  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+                                  "Failed to destroy flow.");
+
+       return ret;
+}
+
+static int
+i40e_flow_destroy_ethertype_filter(struct i40e_pf *pf,
+                                  struct i40e_ethertype_filter *filter)
+{
+       struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+       struct i40e_ethertype_rule *ethertype_rule = &pf->ethertype;
+       struct i40e_ethertype_filter *node;
+       struct i40e_control_filter_stats stats;
+       uint16_t flags = 0;
+       int ret = 0;
+
+       if (!(filter->flags & RTE_ETHTYPE_FLAGS_MAC))
+               flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC;
+       if (filter->flags & RTE_ETHTYPE_FLAGS_DROP)
+               flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP;
+       flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TO_QUEUE;
+
+       memset(&stats, 0, sizeof(stats));
+       ret = i40e_aq_add_rem_control_packet_filter(hw,
+                                   filter->input.mac_addr.addr_bytes,
+                                   filter->input.ether_type,
+                                   flags, pf->main_vsi->seid,
+                                   filter->queue, 0, &stats, NULL);
+       if (ret < 0)
+               return ret;
+
+       node = i40e_sw_ethertype_filter_lookup(ethertype_rule, &filter->input);
+       if (!node)
+               return -EINVAL;
+
+       ret = i40e_sw_ethertype_filter_del(pf, &node->input);
+
+       return ret;
+}
+
+static int
+i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf,
+                               struct i40e_tunnel_filter *filter)
+{
+       struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+       struct i40e_vsi *vsi = pf->main_vsi;
+       struct i40e_aqc_add_remove_cloud_filters_element_data cld_filter;
+       struct i40e_tunnel_rule *tunnel_rule = &pf->tunnel;
+       struct i40e_tunnel_filter *node;
+       int ret = 0;
+
+       memset(&cld_filter, 0, sizeof(cld_filter));
+       ether_addr_copy((struct ether_addr *)&filter->input.outer_mac,
+                       (struct ether_addr *)&cld_filter.outer_mac);
+       ether_addr_copy((struct ether_addr *)&filter->input.inner_mac,
+                       (struct ether_addr *)&cld_filter.inner_mac);
+       cld_filter.inner_vlan = filter->input.inner_vlan;
+       cld_filter.flags = filter->input.flags;
+       cld_filter.tenant_id = filter->input.tenant_id;
+       cld_filter.queue_number = filter->queue;
+
+       ret = i40e_aq_remove_cloud_filters(hw, vsi->seid,
+                                          &cld_filter, 1);
+       if (ret < 0)
+               return ret;
+
+       node = i40e_sw_tunnel_filter_lookup(tunnel_rule, &filter->input);
+       if (!node)
+               return -EINVAL;
+
+       ret = i40e_sw_tunnel_filter_del(pf, &node->input);
+
+       return ret;
+}
+
+static int
+i40e_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
+{
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+       int ret;
+
+       ret = i40e_flow_flush_fdir_filter(pf);
+       if (ret) {
+               rte_flow_error_set(error, -ret,
+                                  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+                                  "Failed to flush FDIR flows.");
+               return -rte_errno;
+       }
+
+       ret = i40e_flow_flush_ethertype_filter(pf);
+       if (ret) {
+               rte_flow_error_set(error, -ret,
+                                  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+                                  "Failed to ethertype flush flows.");
+               return -rte_errno;
+       }
+
+       ret = i40e_flow_flush_tunnel_filter(pf);
+       if (ret) {
+               rte_flow_error_set(error, -ret,
+                                  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+                                  "Failed to flush tunnel flows.");
+               return -rte_errno;
+       }
+
+       return ret;
+}
+
+static int
+i40e_flow_flush_fdir_filter(struct i40e_pf *pf)
+{
+       struct rte_eth_dev *dev = pf->adapter->eth_dev;
+       struct i40e_fdir_info *fdir_info = &pf->fdir;
+       struct i40e_fdir_filter *fdir_filter;
+       struct rte_flow *flow;
+       void *temp;
+       int ret;
+
+       ret = i40e_fdir_flush(dev);
+       if (!ret) {
+               /* Delete FDIR filters in FDIR list. */
+               while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list))) {
+                       ret = i40e_sw_fdir_filter_del(pf,
+                                                     &fdir_filter->fdir.input);
+                       if (ret < 0)
+                               return ret;
+               }
+
+               /* Delete FDIR flows in flow list. */
+               TAILQ_FOREACH_SAFE(flow, &pf->flow_list, node, temp) {
+                       if (flow->filter_type == RTE_ETH_FILTER_FDIR) {
+                               TAILQ_REMOVE(&pf->flow_list, flow, node);
+                               rte_free(flow);
+                       }
+               }
+       }
+
+       return ret;
+}
+
+/* Flush all ethertype filters */
+static int
+i40e_flow_flush_ethertype_filter(struct i40e_pf *pf)
+{
+       struct i40e_ethertype_filter_list
+               *ethertype_list = &pf->ethertype.ethertype_list;
+       struct i40e_ethertype_filter *filter;
+       struct rte_flow *flow;
+       void *temp;
+       int ret = 0;
+
+       while ((filter = TAILQ_FIRST(ethertype_list))) {
+               ret = i40e_flow_destroy_ethertype_filter(pf, filter);
+               if (ret)
+                       return ret;
+       }
+
+       /* Delete ethertype flows in flow list. */
+       TAILQ_FOREACH_SAFE(flow, &pf->flow_list, node, temp) {
+               if (flow->filter_type == RTE_ETH_FILTER_ETHERTYPE) {
+                       TAILQ_REMOVE(&pf->flow_list, flow, node);
+                       rte_free(flow);
+               }
+       }
+
+       return ret;
+}
+
+/* Flush all tunnel filters */
+static int
+i40e_flow_flush_tunnel_filter(struct i40e_pf *pf)
+{
+       struct i40e_tunnel_filter_list
+               *tunnel_list = &pf->tunnel.tunnel_list;
+       struct i40e_tunnel_filter *filter;
+       struct rte_flow *flow;
+       void *temp;
+       int ret = 0;
+
+       while ((filter = TAILQ_FIRST(tunnel_list))) {
+               ret = i40e_flow_destroy_tunnel_filter(pf, filter);
+               if (ret)
+                       return ret;
+       }
+
+       /* Delete tunnel flows in flow list. */
+       TAILQ_FOREACH_SAFE(flow, &pf->flow_list, node, temp) {
+               if (flow->filter_type == RTE_ETH_FILTER_TUNNEL) {
+                       TAILQ_REMOVE(&pf->flow_list, flow, node);
+                       rte_free(flow);
+               }
+       }
+
+       return ret;
+}
index d5b2d45..f771dfb 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -55,6 +55,7 @@
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
 #include "i40e_pf.h"
+#include "rte_pmd_i40e.h"
 
 #define I40E_CFG_CRCSTRIP_DEFAULT 1
 
@@ -138,7 +139,7 @@ i40e_pf_host_vf_reset(struct i40e_pf_vf *vf, bool do_hw_reset)
        abs_vf_id = vf_id + hw->func_caps.vf_base_id;
 
        /* Notify VF that we are in VFR progress */
-       I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_PF_VFR_INPROGRESS);
+       I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_VFR_INPROGRESS);
 
        /*
         * If require a SW VF reset, a VFLR interrupt will be generated,
@@ -219,7 +220,7 @@ i40e_pf_host_vf_reset(struct i40e_pf_vf *vf, bool do_hw_reset)
        }
 
        /* Reset done, Set COMPLETE flag and clear reset bit */
-       I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_PF_VFR_COMPLETED);
+       I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_VFR_COMPLETED);
        val = I40E_READ_REG(hw, I40E_VPGEN_VFRTRIG(vf_id));
        val &= ~I40E_VPGEN_VFRTRIG_VFSWR_MASK;
        I40E_WRITE_REG(hw, I40E_VPGEN_VFRTRIG(vf_id), val);
@@ -247,10 +248,12 @@ i40e_pf_host_vf_reset(struct i40e_pf_vf *vf, bool do_hw_reset)
                return -EFAULT;
        }
 
+       I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_VFR_VFACTIVE);
+
        return ret;
 }
 
-static int
+int
 i40e_pf_host_send_msg_to_vf(struct i40e_pf_vf *vf,
                            uint32_t opcode,
                            uint32_t retval,
@@ -272,14 +275,30 @@ i40e_pf_host_send_msg_to_vf(struct i40e_pf_vf *vf,
 }
 
 static void
-i40e_pf_host_process_cmd_version(struct i40e_pf_vf *vf)
+i40e_pf_host_process_cmd_version(struct i40e_pf_vf *vf, bool b_op)
 {
        struct i40e_virtchnl_version_info info;
 
-       info.major = I40E_DPDK_VERSION_MAJOR;
-       info.minor = I40E_DPDK_VERSION_MINOR;
-       i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_VERSION,
-               I40E_SUCCESS, (uint8_t *)&info, sizeof(info));
+       /* Respond like a Linux PF host in order to support both DPDK VF and
+        * Linux VF driver. The expense is original DPDK host specific feature
+        * like CFG_VLAN_PVID and CONFIG_VSI_QUEUES_EXT will not available.
+        *
+        * DPDK VF also can't identify host driver by version number returned.
+        * It always assume talking with Linux PF.
+        */
+       info.major = I40E_VIRTCHNL_VERSION_MAJOR;
+       info.minor = I40E_VIRTCHNL_VERSION_MINOR_NO_VF_CAPS;
+
+       if (b_op)
+               i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_VERSION,
+                                           I40E_SUCCESS,
+                                           (uint8_t *)&info,
+                                           sizeof(info));
+       else
+               i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_VERSION,
+                                           I40E_NOT_SUPPORTED,
+                                           (uint8_t *)&info,
+                                           sizeof(info));
 }
 
 static int
@@ -292,13 +311,20 @@ i40e_pf_host_process_cmd_reset_vf(struct i40e_pf_vf *vf)
 }
 
 static int
-i40e_pf_host_process_cmd_get_vf_resource(struct i40e_pf_vf *vf)
+i40e_pf_host_process_cmd_get_vf_resource(struct i40e_pf_vf *vf, bool b_op)
 {
        struct i40e_virtchnl_vf_resource *vf_res = NULL;
        struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf);
        uint32_t len = 0;
        int ret = I40E_SUCCESS;
 
+       if (!b_op) {
+               i40e_pf_host_send_msg_to_vf(vf,
+                                           I40E_VIRTCHNL_OP_GET_VF_RESOURCES,
+                                           I40E_NOT_SUPPORTED, NULL, 0);
+               return ret;
+       }
+
        /* only have 1 VSI by default */
        len =  sizeof(struct i40e_virtchnl_vf_resource) +
                                I40E_DEFAULT_VF_VSI_NUM *
@@ -321,8 +347,7 @@ i40e_pf_host_process_cmd_get_vf_resource(struct i40e_pf_vf *vf)
 
        /* Change below setting if PF host can support more VSIs for VF */
        vf_res->vsi_res[0].vsi_type = I40E_VSI_SRIOV;
-       /* As assume Vf only has single VSI now, always return 0 */
-       vf_res->vsi_res[0].vsi_id = 0;
+       vf_res->vsi_res[0].vsi_id = vf->vsi->vsi_id;
        vf_res->vsi_res[0].num_queue_pairs = vf->vsi->nb_qps;
        ether_addr_copy(&vf->mac_addr,
                (struct ether_addr *)vf_res->vsi_res[0].default_mac_addr);
@@ -393,10 +418,12 @@ i40e_pf_host_hmc_config_txq(struct i40e_hw *hw,
 
        /* clear the context structure first */
        memset(&tx_ctx, 0, sizeof(tx_ctx));
-       tx_ctx.new_context = 1;
        tx_ctx.base = txq->dma_ring_addr / I40E_QUEUE_BASE_ADDR_UNIT;
        tx_ctx.qlen = txq->ring_len;
        tx_ctx.rdylist = rte_le_to_cpu_16(vf->vsi->info.qs_handle[0]);
+       tx_ctx.head_wb_ena = txq->headwb_enabled;
+       tx_ctx.head_wb_addr = txq->dma_headwb_addr;
+
        err = i40e_clear_lan_tx_queue_context(hw, abs_queue_id);
        if (err != I40E_SUCCESS)
                return err;
@@ -423,7 +450,8 @@ i40e_pf_host_hmc_config_txq(struct i40e_hw *hw,
 static int
 i40e_pf_host_process_cmd_config_vsi_queues(struct i40e_pf_vf *vf,
                                           uint8_t *msg,
-                                          uint16_t msglen)
+                                          uint16_t msglen,
+                                          bool b_op)
 {
        struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf);
        struct i40e_vsi *vsi = vf->vsi;
@@ -432,11 +460,18 @@ i40e_pf_host_process_cmd_config_vsi_queues(struct i40e_pf_vf *vf,
        struct i40e_virtchnl_queue_pair_info *vc_qpi;
        int i, ret = I40E_SUCCESS;
 
+       if (!b_op) {
+               i40e_pf_host_send_msg_to_vf(vf,
+                                           I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+                                           I40E_NOT_SUPPORTED, NULL, 0);
+               return ret;
+       }
+
        if (!msg || vc_vqci->num_queue_pairs > vsi->nb_qps ||
                vc_vqci->num_queue_pairs > I40E_MAX_VSI_QP ||
                msglen < I40E_VIRTCHNL_CONFIG_VSI_QUEUES_SIZE(vc_vqci,
                                        vc_vqci->num_queue_pairs)) {
-               PMD_DRV_LOG(ERR, "vsi_queue_config_info argument wrong\n");
+               PMD_DRV_LOG(ERR, "vsi_queue_config_info argument wrong");
                ret = I40E_ERR_PARAM;
                goto send_msg;
        }
@@ -482,7 +517,8 @@ send_msg:
 static int
 i40e_pf_host_process_cmd_config_vsi_queues_ext(struct i40e_pf_vf *vf,
                                               uint8_t *msg,
-                                              uint16_t msglen)
+                                              uint16_t msglen,
+                                              bool b_op)
 {
        struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf);
        struct i40e_vsi *vsi = vf->vsi;
@@ -491,11 +527,19 @@ i40e_pf_host_process_cmd_config_vsi_queues_ext(struct i40e_pf_vf *vf,
        struct i40e_virtchnl_queue_pair_ext_info *vc_qpei;
        int i, ret = I40E_SUCCESS;
 
+       if (!b_op) {
+               i40e_pf_host_send_msg_to_vf(
+                       vf,
+                       I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT,
+                       I40E_NOT_SUPPORTED, NULL, 0);
+               return ret;
+       }
+
        if (!msg || vc_vqcei->num_queue_pairs > vsi->nb_qps ||
                vc_vqcei->num_queue_pairs > I40E_MAX_VSI_QP ||
                msglen < I40E_VIRTCHNL_CONFIG_VSI_QUEUES_SIZE(vc_vqcei,
                                        vc_vqcei->num_queue_pairs)) {
-               PMD_DRV_LOG(ERR, "vsi_queue_config_ext_info argument wrong\n");
+               PMD_DRV_LOG(ERR, "vsi_queue_config_ext_info argument wrong");
                ret = I40E_ERR_PARAM;
                goto send_msg;
        }
@@ -537,13 +581,125 @@ send_msg:
        return ret;
 }
 
+static void
+i40e_pf_config_irq_link_list(struct i40e_pf_vf *vf,
+                             struct i40e_virtchnl_vector_map *vvm)
+{
+#define BITS_PER_CHAR 8
+       uint64_t linklistmap = 0, tempmap;
+       struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf);
+       uint16_t qid;
+       bool b_first_q = true;
+       enum i40e_queue_type qtype;
+       uint16_t vector_id;
+       uint32_t reg, reg_idx;
+       uint16_t itr_idx = 0, i;
+
+       vector_id = vvm->vector_id;
+       /* setup the head */
+       if (!vector_id)
+               reg_idx = I40E_VPINT_LNKLST0(vf->vf_idx);
+       else
+               reg_idx = I40E_VPINT_LNKLSTN(
+               ((hw->func_caps.num_msix_vectors_vf - 1) * vf->vf_idx)
+               + (vector_id - 1));
+
+       if (vvm->rxq_map == 0 && vvm->txq_map == 0) {
+               I40E_WRITE_REG(hw, reg_idx,
+                       I40E_VPINT_LNKLST0_FIRSTQ_INDX_MASK);
+               goto cfg_irq_done;
+       }
+
+       /* sort all rx and tx queues */
+       tempmap = vvm->rxq_map;
+       for (i = 0; i < sizeof(vvm->rxq_map) * BITS_PER_CHAR; i++) {
+               if (tempmap & 0x1)
+                       linklistmap |= (1 << (2 * i));
+               tempmap >>= 1;
+       }
+
+       tempmap = vvm->txq_map;
+       for (i = 0; i < sizeof(vvm->txq_map) * BITS_PER_CHAR; i++) {
+               if (tempmap & 0x1)
+                       linklistmap |= (1 << (2 * i + 1));
+               tempmap >>= 1;
+       }
+
+       /* Link all rx and tx queues into a chained list */
+       tempmap = linklistmap;
+       i = 0;
+       b_first_q = true;
+       do {
+               if (tempmap & 0x1) {
+                       qtype = (enum i40e_queue_type)(i % 2);
+                       qid = vf->vsi->base_queue + i / 2;
+                       if (b_first_q) {
+                               /* This is header */
+                               b_first_q = false;
+                               reg = ((qtype <<
+                               I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT)
+                               | qid);
+                       } else {
+                               /* element in the link list */
+                               reg = (vector_id) |
+                               (qtype << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
+                               (qid << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
+                               BIT(I40E_QINT_RQCTL_CAUSE_ENA_SHIFT) |
+                               (itr_idx << I40E_QINT_RQCTL_ITR_INDX_SHIFT);
+                       }
+                       I40E_WRITE_REG(hw, reg_idx, reg);
+                       /* find next register to program */
+                       switch (qtype) {
+                       case I40E_QUEUE_TYPE_RX:
+                               reg_idx = I40E_QINT_RQCTL(qid);
+                               itr_idx = vvm->rxitr_idx;
+                               break;
+                       case I40E_QUEUE_TYPE_TX:
+                               reg_idx = I40E_QINT_TQCTL(qid);
+                               itr_idx = vvm->txitr_idx;
+                               break;
+                       default:
+                               break;
+                       }
+               }
+               i++;
+               tempmap >>= 1;
+       } while (tempmap);
+
+       /* Terminate the link list */
+       reg = (vector_id) |
+               (0 << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
+               (0x7FF << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
+               BIT(I40E_QINT_RQCTL_CAUSE_ENA_SHIFT) |
+               (itr_idx << I40E_QINT_RQCTL_ITR_INDX_SHIFT);
+       I40E_WRITE_REG(hw, reg_idx, reg);
+
+cfg_irq_done:
+       I40E_WRITE_FLUSH(hw);
+}
+
 static int
 i40e_pf_host_process_cmd_config_irq_map(struct i40e_pf_vf *vf,
-                                       uint8_t *msg, uint16_t msglen)
+                                       uint8_t *msg, uint16_t msglen,
+                                       bool b_op)
 {
        int ret = I40E_SUCCESS;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf);
        struct i40e_virtchnl_irq_map_info *irqmap =
            (struct i40e_virtchnl_irq_map_info *)msg;
+       struct i40e_virtchnl_vector_map *map;
+       int i;
+       uint16_t vector_id;
+       unsigned long qbit_max;
+
+       if (!b_op) {
+               i40e_pf_host_send_msg_to_vf(
+                       vf,
+                       I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP,
+                       I40E_NOT_SUPPORTED, NULL, 0);
+               return ret;
+       }
 
        if (msg == NULL || msglen < sizeof(struct i40e_virtchnl_irq_map_info)) {
                PMD_DRV_LOG(ERR, "buffer too short");
@@ -551,23 +707,46 @@ i40e_pf_host_process_cmd_config_irq_map(struct i40e_pf_vf *vf,
                goto send_msg;
        }
 
-       /* Assume VF only have 1 vector to bind all queues */
-       if (irqmap->num_vectors != 1) {
-               PMD_DRV_LOG(ERR, "DKDK host only support 1 vector");
-               ret = I40E_ERR_PARAM;
+       /* PF host will support both DPDK VF or Linux VF driver, identify by
+        * number of vectors requested.
+        */
+
+       /* DPDK VF only requires single vector */
+       if (irqmap->num_vectors == 1) {
+               /* This MSIX intr store the intr in VF range */
+               vf->vsi->msix_intr = irqmap->vecmap[0].vector_id;
+               vf->vsi->nb_msix = irqmap->num_vectors;
+               vf->vsi->nb_used_qps = vf->vsi->nb_qps;
+
+               /* Don't care how the TX/RX queue mapping with this vector.
+                * Link all VF RX queues together. Only did mapping work.
+                * VF can disable/enable the intr by itself.
+                */
+               i40e_vsi_queues_bind_intr(vf->vsi);
                goto send_msg;
        }
 
-       /* This MSIX intr store the intr in VF range */
-       vf->vsi->msix_intr = irqmap->vecmap[0].vector_id;
-       vf->vsi->nb_msix = irqmap->num_vectors;
-       vf->vsi->nb_used_qps = vf->vsi->nb_qps;
+       /* Then, it's Linux VF driver */
+       qbit_max = 1 << pf->vf_nb_qp_max;
+       for (i = 0; i < irqmap->num_vectors; i++) {
+               map = &irqmap->vecmap[i];
+
+               vector_id = map->vector_id;
+               /* validate msg params */
+               if (vector_id >= hw->func_caps.num_msix_vectors_vf) {
+                       ret = I40E_ERR_PARAM;
+                       goto send_msg;
+               }
+
+               if ((map->rxq_map < qbit_max) && (map->txq_map < qbit_max)) {
+                       i40e_pf_config_irq_link_list(vf, map);
+               } else {
+                       /* configured queue size excceed limit */
+                       ret = I40E_ERR_PARAM;
+                       goto send_msg;
+               }
+       }
 
-       /* Don't care how the TX/RX queue mapping with this vector.
-        * Link all VF RX queues together. Only did mapping work.
-        * VF can disable/enable the intr by itself.
-        */
-       i40e_vsi_queues_bind_intr(vf->vsi);
 send_msg:
        i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP,
                                                        ret, NULL, 0);
@@ -646,12 +825,21 @@ send_msg:
 static int
 i40e_pf_host_process_cmd_disable_queues(struct i40e_pf_vf *vf,
                                        uint8_t *msg,
-                                       uint16_t msglen)
+                                       uint16_t msglen,
+                                       bool b_op)
 {
        int ret = I40E_SUCCESS;
        struct i40e_virtchnl_queue_select *q_sel =
                (struct i40e_virtchnl_queue_select *)msg;
 
+       if (!b_op) {
+               i40e_pf_host_send_msg_to_vf(
+                       vf,
+                       I40E_VIRTCHNL_OP_DISABLE_QUEUES,
+                       I40E_NOT_SUPPORTED, NULL, 0);
+               return ret;
+       }
+
        if (msg == NULL || msglen != sizeof(*q_sel)) {
                ret = I40E_ERR_PARAM;
                goto send_msg;
@@ -669,7 +857,8 @@ send_msg:
 static int
 i40e_pf_host_process_cmd_add_ether_address(struct i40e_pf_vf *vf,
                                           uint8_t *msg,
-                                          uint16_t msglen)
+                                          uint16_t msglen,
+                                          bool b_op)
 {
        int ret = I40E_SUCCESS;
        struct i40e_virtchnl_ether_addr_list *addr_list =
@@ -678,6 +867,14 @@ i40e_pf_host_process_cmd_add_ether_address(struct i40e_pf_vf *vf,
        int i;
        struct ether_addr *mac;
 
+       if (!b_op) {
+               i40e_pf_host_send_msg_to_vf(
+                       vf,
+                       I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS,
+                       I40E_NOT_SUPPORTED, NULL, 0);
+               return ret;
+       }
+
        memset(&filter, 0 , sizeof(struct i40e_mac_filter_info));
 
        if (msg == NULL || msglen <= sizeof(*addr_list)) {
@@ -690,8 +887,8 @@ i40e_pf_host_process_cmd_add_ether_address(struct i40e_pf_vf *vf,
                mac = (struct ether_addr *)(addr_list->list[i].addr);
                (void)rte_memcpy(&filter.mac_addr, mac, ETHER_ADDR_LEN);
                filter.filter_type = RTE_MACVLAN_PERFECT_MATCH;
-               if(!is_valid_assigned_ether_addr(mac) ||
-                       i40e_vsi_add_mac(vf->vsi, &filter)) {
+               if (is_zero_ether_addr(mac) ||
+                   i40e_vsi_add_mac(vf->vsi, &filter)) {
                        ret = I40E_ERR_INVALID_MAC_ADDR;
                        goto send_msg;
                }
@@ -707,7 +904,8 @@ send_msg:
 static int
 i40e_pf_host_process_cmd_del_ether_address(struct i40e_pf_vf *vf,
                                           uint8_t *msg,
-                                          uint16_t msglen)
+                                          uint16_t msglen,
+                                          bool b_op)
 {
        int ret = I40E_SUCCESS;
        struct i40e_virtchnl_ether_addr_list *addr_list =
@@ -715,6 +913,14 @@ i40e_pf_host_process_cmd_del_ether_address(struct i40e_pf_vf *vf,
        int i;
        struct ether_addr *mac;
 
+       if (!b_op) {
+               i40e_pf_host_send_msg_to_vf(
+                       vf,
+                       I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS,
+                       I40E_NOT_SUPPORTED, NULL, 0);
+               return ret;
+       }
+
        if (msg == NULL || msglen <= sizeof(*addr_list)) {
                PMD_DRV_LOG(ERR, "delete_ether_address argument too short");
                ret = I40E_ERR_PARAM;
@@ -723,7 +929,7 @@ i40e_pf_host_process_cmd_del_ether_address(struct i40e_pf_vf *vf,
 
        for (i = 0; i < addr_list->num_elements; i++) {
                mac = (struct ether_addr *)(addr_list->list[i].addr);
-               if(!is_valid_assigned_ether_addr(mac) ||
+               if(is_zero_ether_addr(mac) ||
                        i40e_vsi_delete_mac(vf->vsi, mac)) {
                        ret = I40E_ERR_INVALID_MAC_ADDR;
                        goto send_msg;
@@ -739,7 +945,8 @@ send_msg:
 
 static int
 i40e_pf_host_process_cmd_add_vlan(struct i40e_pf_vf *vf,
-                               uint8_t *msg, uint16_t msglen)
+                               uint8_t *msg, uint16_t msglen,
+                               bool b_op)
 {
        int ret = I40E_SUCCESS;
        struct i40e_virtchnl_vlan_filter_list *vlan_filter_list =
@@ -747,6 +954,14 @@ i40e_pf_host_process_cmd_add_vlan(struct i40e_pf_vf *vf,
        int i;
        uint16_t *vid;
 
+       if (!b_op) {
+               i40e_pf_host_send_msg_to_vf(
+                       vf,
+                       I40E_VIRTCHNL_OP_ADD_VLAN,
+                       I40E_NOT_SUPPORTED, NULL, 0);
+               return ret;
+       }
+
        if (msg == NULL || msglen <= sizeof(*vlan_filter_list)) {
                PMD_DRV_LOG(ERR, "add_vlan argument too short");
                ret = I40E_ERR_PARAM;
@@ -771,7 +986,8 @@ send_msg:
 static int
 i40e_pf_host_process_cmd_del_vlan(struct i40e_pf_vf *vf,
                                  uint8_t *msg,
-                                 uint16_t msglen)
+                                 uint16_t msglen,
+                                 bool b_op)
 {
        int ret = I40E_SUCCESS;
        struct i40e_virtchnl_vlan_filter_list *vlan_filter_list =
@@ -779,6 +995,14 @@ i40e_pf_host_process_cmd_del_vlan(struct i40e_pf_vf *vf,
        int i;
        uint16_t *vid;
 
+       if (!b_op) {
+               i40e_pf_host_send_msg_to_vf(
+                       vf,
+                       I40E_VIRTCHNL_OP_DEL_VLAN,
+                       I40E_NOT_SUPPORTED, NULL, 0);
+               return ret;
+       }
+
        if (msg == NULL || msglen <= sizeof(*vlan_filter_list)) {
                PMD_DRV_LOG(ERR, "delete_vlan argument too short");
                ret = I40E_ERR_PARAM;
@@ -803,7 +1027,8 @@ static int
 i40e_pf_host_process_cmd_config_promisc_mode(
                                        struct i40e_pf_vf *vf,
                                        uint8_t *msg,
-                                       uint16_t msglen)
+                                       uint16_t msglen,
+                                       bool b_op)
 {
        int ret = I40E_SUCCESS;
        struct i40e_virtchnl_promisc_info *promisc =
@@ -811,6 +1036,14 @@ i40e_pf_host_process_cmd_config_promisc_mode(
        struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf);
        bool unicast = FALSE, multicast = FALSE;
 
+       if (!b_op) {
+               i40e_pf_host_send_msg_to_vf(
+                       vf,
+                       I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+                       I40E_NOT_SUPPORTED, NULL, 0);
+               return ret;
+       }
+
        if (msg == NULL || msglen != sizeof(*promisc)) {
                ret = I40E_ERR_PARAM;
                goto send_msg;
@@ -836,39 +1069,43 @@ send_msg:
 }
 
 static int
-i40e_pf_host_process_cmd_get_stats(struct i40e_pf_vf *vf)
+i40e_pf_host_process_cmd_get_stats(struct i40e_pf_vf *vf, bool b_op)
 {
        i40e_update_vsi_stats(vf->vsi);
 
-       i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_STATS,
-               I40E_SUCCESS, (uint8_t *)&vf->vsi->eth_stats,
-                               sizeof(vf->vsi->eth_stats));
+       if (b_op)
+               i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_STATS,
+                                           I40E_SUCCESS,
+                                           (uint8_t *)&vf->vsi->eth_stats,
+                                           sizeof(vf->vsi->eth_stats));
+       else
+               i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_STATS,
+                                           I40E_NOT_SUPPORTED,
+                                           (uint8_t *)&vf->vsi->eth_stats,
+                                           sizeof(vf->vsi->eth_stats));
 
        return I40E_SUCCESS;
 }
 
-static void
-i40e_pf_host_process_cmd_get_link_status(struct i40e_pf_vf *vf)
-{
-       struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(vf->pf->main_vsi);
-
-       /* Update link status first to acquire latest link change */
-       i40e_dev_link_update(dev, 1);
-       i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_LINK_STAT,
-               I40E_SUCCESS, (uint8_t *)&dev->data->dev_link,
-                               sizeof(struct rte_eth_link));
-}
-
 static int
 i40e_pf_host_process_cmd_cfg_vlan_offload(
                                        struct i40e_pf_vf *vf,
                                        uint8_t *msg,
-                                       uint16_t msglen)
+                                       uint16_t msglen,
+                                       bool b_op)
 {
        int ret = I40E_SUCCESS;
        struct i40e_virtchnl_vlan_offload_info *offload =
                        (struct i40e_virtchnl_vlan_offload_info *)msg;
 
+       if (!b_op) {
+               i40e_pf_host_send_msg_to_vf(
+                       vf,
+                       I40E_VIRTCHNL_OP_CFG_VLAN_OFFLOAD,
+                       I40E_NOT_SUPPORTED, NULL, 0);
+               return ret;
+       }
+
        if (msg == NULL || msglen != sizeof(*offload)) {
                ret = I40E_ERR_PARAM;
                goto send_msg;
@@ -889,12 +1126,21 @@ send_msg:
 static int
 i40e_pf_host_process_cmd_cfg_pvid(struct i40e_pf_vf *vf,
                                        uint8_t *msg,
-                                       uint16_t msglen)
+                                       uint16_t msglen,
+                                       bool b_op)
 {
        int ret = I40E_SUCCESS;
        struct i40e_virtchnl_pvid_info  *tpid_info =
                        (struct i40e_virtchnl_pvid_info *)msg;
 
+       if (!b_op) {
+               i40e_pf_host_send_msg_to_vf(
+                       vf,
+                       I40E_VIRTCHNL_OP_CFG_VLAN_PVID,
+                       I40E_NOT_SUPPORTED, NULL, 0);
+               return ret;
+       }
+
        if (msg == NULL || msglen != sizeof(*tpid_info)) {
                ret = I40E_ERR_PARAM;
                goto send_msg;
@@ -909,6 +1155,20 @@ send_msg:
        return ret;
 }
 
+void
+i40e_notify_vf_link_status(struct rte_eth_dev *dev, struct i40e_pf_vf *vf)
+{
+       struct i40e_virtchnl_pf_event event;
+
+       event.event = I40E_VIRTCHNL_EVENT_LINK_CHANGE;
+       event.event_data.link_event.link_status =
+               dev->data->dev_link.link_status;
+       event.event_data.link_event.link_speed =
+               (enum i40e_aq_link_speed)dev->data->dev_link.link_speed;
+       i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_EVENT,
+               I40E_SUCCESS, (uint8_t *)&event, sizeof(event));
+}
+
 void
 i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev,
                           uint16_t abs_vf_id, uint32_t opcode,
@@ -921,6 +1181,8 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev,
        struct i40e_pf_vf *vf;
        /* AdminQ will pass absolute VF id, transfer to internal vf id */
        uint16_t vf_id = abs_vf_id - hw->func_caps.vf_base_id;
+       struct rte_pmd_i40e_mb_event_param cb_param;
+       bool b_op = TRUE;
 
        if (vf_id > pf->vf_num - 1 || !pf->vfs) {
                PMD_DRV_LOG(ERR, "invalid argument");
@@ -935,10 +1197,35 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev,
                return;
        }
 
+       /**
+        * initialise structure to send to user application
+        * will return response from user in retval field
+        */
+       cb_param.retval = RTE_PMD_I40E_MB_EVENT_PROCEED;
+       cb_param.vfid = vf_id;
+       cb_param.msg_type = opcode;
+       cb_param.msg = (void *)msg;
+       cb_param.msglen = msglen;
+
+       /**
+        * Ask user application if we're allowed to perform those functions.
+        * If we get cb_param.retval == RTE_PMD_I40E_MB_EVENT_PROCEED,
+        * then business as usual.
+        * If RTE_PMD_I40E_MB_EVENT_NOOP_ACK or RTE_PMD_I40E_MB_EVENT_NOOP_NACK,
+        * do nothing and send not_supported to VF. As PF must send a response
+        * to VF and ACK/NACK is not defined.
+        */
+       _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_VF_MBOX, &cb_param);
+       if (cb_param.retval != RTE_PMD_I40E_MB_EVENT_PROCEED) {
+               PMD_DRV_LOG(WARNING, "VF to PF message(%d) is not permitted!",
+                           opcode);
+               b_op = FALSE;
+       }
+
        switch (opcode) {
        case I40E_VIRTCHNL_OP_VERSION :
                PMD_DRV_LOG(INFO, "OP_VERSION received");
-               i40e_pf_host_process_cmd_version(vf);
+               i40e_pf_host_process_cmd_version(vf, b_op);
                break;
        case I40E_VIRTCHNL_OP_RESET_VF :
                PMD_DRV_LOG(INFO, "OP_RESET_VF received");
@@ -946,64 +1233,72 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev,
                break;
        case I40E_VIRTCHNL_OP_GET_VF_RESOURCES:
                PMD_DRV_LOG(INFO, "OP_GET_VF_RESOURCES received");
-               i40e_pf_host_process_cmd_get_vf_resource(vf);
+               i40e_pf_host_process_cmd_get_vf_resource(vf, b_op);
                break;
        case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES:
                PMD_DRV_LOG(INFO, "OP_CONFIG_VSI_QUEUES received");
-               i40e_pf_host_process_cmd_config_vsi_queues(vf, msg, msglen);
+               i40e_pf_host_process_cmd_config_vsi_queues(vf, msg,
+                                                          msglen, b_op);
                break;
        case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT:
                PMD_DRV_LOG(INFO, "OP_CONFIG_VSI_QUEUES_EXT received");
                i40e_pf_host_process_cmd_config_vsi_queues_ext(vf, msg,
-                                                               msglen);
+                                                              msglen, b_op);
                break;
        case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP:
                PMD_DRV_LOG(INFO, "OP_CONFIG_IRQ_MAP received");
-               i40e_pf_host_process_cmd_config_irq_map(vf, msg, msglen);
+               i40e_pf_host_process_cmd_config_irq_map(vf, msg, msglen, b_op);
                break;
        case I40E_VIRTCHNL_OP_ENABLE_QUEUES:
                PMD_DRV_LOG(INFO, "OP_ENABLE_QUEUES received");
-               i40e_pf_host_process_cmd_enable_queues(vf, msg, msglen);
+               if (b_op) {
+                       i40e_pf_host_process_cmd_enable_queues(vf, msg, msglen);
+                       i40e_notify_vf_link_status(dev, vf);
+               } else {
+                       i40e_pf_host_send_msg_to_vf(
+                               vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES,
+                               I40E_NOT_SUPPORTED, NULL, 0);
+               }
                break;
        case I40E_VIRTCHNL_OP_DISABLE_QUEUES:
                PMD_DRV_LOG(INFO, "OP_DISABLE_QUEUE received");
-               i40e_pf_host_process_cmd_disable_queues(vf, msg, msglen);
+               i40e_pf_host_process_cmd_disable_queues(vf, msg, msglen, b_op);
                break;
        case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS:
                PMD_DRV_LOG(INFO, "OP_ADD_ETHER_ADDRESS received");
-               i40e_pf_host_process_cmd_add_ether_address(vf, msg, msglen);
+               i40e_pf_host_process_cmd_add_ether_address(vf, msg,
+                                                          msglen, b_op);
                break;
        case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS:
                PMD_DRV_LOG(INFO, "OP_DEL_ETHER_ADDRESS received");
-               i40e_pf_host_process_cmd_del_ether_address(vf, msg, msglen);
+               i40e_pf_host_process_cmd_del_ether_address(vf, msg,
+                                                          msglen, b_op);
                break;
        case I40E_VIRTCHNL_OP_ADD_VLAN:
                PMD_DRV_LOG(INFO, "OP_ADD_VLAN received");
-               i40e_pf_host_process_cmd_add_vlan(vf, msg, msglen);
+               i40e_pf_host_process_cmd_add_vlan(vf, msg, msglen, b_op);
                break;
        case I40E_VIRTCHNL_OP_DEL_VLAN:
                PMD_DRV_LOG(INFO, "OP_DEL_VLAN received");
-               i40e_pf_host_process_cmd_del_vlan(vf, msg, msglen);
+               i40e_pf_host_process_cmd_del_vlan(vf, msg, msglen, b_op);
                break;
        case I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
                PMD_DRV_LOG(INFO, "OP_CONFIG_PROMISCUOUS_MODE received");
-               i40e_pf_host_process_cmd_config_promisc_mode(vf, msg, msglen);
+               i40e_pf_host_process_cmd_config_promisc_mode(vf, msg,
+                                                            msglen, b_op);
                break;
        case I40E_VIRTCHNL_OP_GET_STATS:
                PMD_DRV_LOG(INFO, "OP_GET_STATS received");
-               i40e_pf_host_process_cmd_get_stats(vf);
-               break;
-       case I40E_VIRTCHNL_OP_GET_LINK_STAT:
-               PMD_DRV_LOG(INFO, "OP_GET_LINK_STAT received");
-               i40e_pf_host_process_cmd_get_link_status(vf);
+               i40e_pf_host_process_cmd_get_stats(vf, b_op);
                break;
        case I40E_VIRTCHNL_OP_CFG_VLAN_OFFLOAD:
                PMD_DRV_LOG(INFO, "OP_CFG_VLAN_OFFLOAD received");
-               i40e_pf_host_process_cmd_cfg_vlan_offload(vf, msg, msglen);
+               i40e_pf_host_process_cmd_cfg_vlan_offload(vf, msg,
+                                                         msglen, b_op);
                break;
        case I40E_VIRTCHNL_OP_CFG_VLAN_PVID:
                PMD_DRV_LOG(INFO, "OP_CFG_VLAN_PVID received");
-               i40e_pf_host_process_cmd_cfg_pvid(vf, msg, msglen);
+               i40e_pf_host_process_cmd_cfg_pvid(vf, msg, msglen, b_op);
                break;
        /* Don't add command supported below, which will
         * return an error code.
index 9c01829..b4c2287 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
 
 #define I40E_DPDK_OFFSET  0x100
 
-enum i40e_pf_vfr_state {
-       I40E_PF_VFR_INPROGRESS = 0,
-       I40E_PF_VFR_COMPLETED = 1,
-};
-
 /* DPDK pf driver specific command to VF */
 enum i40e_virtchnl_ops_dpdk {
        /*
         * Keep some gap between Linux PF commands and
         * DPDK PF extended commands.
         */
-       I40E_VIRTCHNL_OP_GET_LINK_STAT = I40E_VIRTCHNL_OP_VERSION +
+       I40E_VIRTCHNL_OP_CFG_VLAN_OFFLOAD = I40E_VIRTCHNL_OP_VERSION +
                                                I40E_DPDK_OFFSET,
-       I40E_VIRTCHNL_OP_CFG_VLAN_OFFLOAD,
        I40E_VIRTCHNL_OP_CFG_VLAN_PVID,
        I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT,
 };
@@ -124,5 +118,7 @@ void i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev,
                                uint8_t *msg, uint16_t msglen);
 int i40e_pf_host_init(struct rte_eth_dev *dev);
 int i40e_pf_host_uninit(struct rte_eth_dev *dev);
+void i40e_notify_vf_link_status(struct rte_eth_dev *dev,
+                               struct i40e_pf_vf *vf);
 
 #endif /* _I40E_PF_H_ */
index 19b431c..608685f 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include <rte_tcp.h>
 #include <rte_sctp.h>
 #include <rte_udp.h>
+#include <rte_ip.h>
+#include <rte_net.h>
 
 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
                PKT_TX_TCP_SEG |                 \
                PKT_TX_OUTER_IP_CKSUM)
 
+#define I40E_TX_OFFLOAD_MASK (  \
+               PKT_TX_IP_CKSUM |       \
+               PKT_TX_L4_MASK |        \
+               PKT_TX_OUTER_IP_CKSUM | \
+               PKT_TX_TCP_SEG |        \
+               PKT_TX_QINQ_PKT |       \
+               PKT_TX_VLAN_PKT)
+
+#define I40E_TX_OFFLOAD_NOTSUP_MASK \
+               (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
                                      struct rte_mbuf **tx_pkts,
                                      uint16_t nb_pkts);
@@ -138,12 +151,21 @@ i40e_rxd_error_to_pkt_flags(uint64_t qword)
        uint64_t error_bits = (qword >> I40E_RXD_QW1_ERROR_SHIFT);
 
 #define I40E_RX_ERR_BITS 0x3f
-       if (likely((error_bits & I40E_RX_ERR_BITS) == 0))
+       if (likely((error_bits & I40E_RX_ERR_BITS) == 0)) {
+               flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
                return flags;
+       }
+
        if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT)))
                flags |= PKT_RX_IP_CKSUM_BAD;
+       else
+               flags |= PKT_RX_IP_CKSUM_GOOD;
+
        if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)))
                flags |= PKT_RX_L4_CKSUM_BAD;
+       else
+               flags |= PKT_RX_L4_CKSUM_GOOD;
+
        if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)))
                flags |= PKT_RX_EIP_CKSUM_BAD;
 
@@ -174,569 +196,6 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t qword)
 }
 #endif
 
-/* For each value it means, datasheet of hardware can tell more details
- *
- * @note: fix i40e_dev_supported_ptypes_get() if any change here.
- */
-static inline uint32_t
-i40e_rxd_pkt_type_mapping(uint8_t ptype)
-{
-       static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = {
-               /* L2 types */
-               /* [0] reserved */
-               [1] = RTE_PTYPE_L2_ETHER,
-               [2] = RTE_PTYPE_L2_ETHER_TIMESYNC,
-               /* [3] - [5] reserved */
-               [6] = RTE_PTYPE_L2_ETHER_LLDP,
-               /* [7] - [10] reserved */
-               [11] = RTE_PTYPE_L2_ETHER_ARP,
-               /* [12] - [21] reserved */
-
-               /* Non tunneled IPv4 */
-               [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_FRAG,
-               [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_NONFRAG,
-               [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_UDP,
-               /* [25] reserved */
-               [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_TCP,
-               [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_SCTP,
-               [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_ICMP,
-
-               /* IPv4 --> IPv4 */
-               [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [32] reserved */
-               [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv4 --> IPv6 */
-               [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [39] reserved */
-               [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv4 --> GRE/Teredo/VXLAN */
-               [43] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT,
-
-               /* IPv4 --> GRE/Teredo/VXLAN --> IPv4 */
-               [44] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [47] reserved */
-               [48] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [50] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv4 --> GRE/Teredo/VXLAN --> IPv6 */
-               [51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [53] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [54] reserved */
-               [55] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [56] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [57] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv4 --> GRE/Teredo/VXLAN --> MAC */
-               [58] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER,
-
-               /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */
-               [59] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [60] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [62] reserved */
-               [63] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [64] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [65] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */
-               [66] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [67] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [68] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [69] reserved */
-               [70] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [71] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [72] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN */
-               [73] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN,
-
-               /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */
-               [74] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [75] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [76] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [77] reserved */
-               [78] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [79] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [80] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */
-               [81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [83] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [84] reserved */
-               [85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [87] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* Non tunneled IPv6 */
-               [88] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_FRAG,
-               [89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_NONFRAG,
-               [90] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_UDP,
-               /* [91] reserved */
-               [92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_TCP,
-               [93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_SCTP,
-               [94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_ICMP,
-
-               /* IPv6 --> IPv4 */
-               [95] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [96] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [97] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [98] reserved */
-               [99] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [100] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [101] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv6 --> IPv6 */
-               [102] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [103] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [104] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [105] reserved */
-               [106] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [107] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [108] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_IP |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv6 --> GRE/Teredo/VXLAN */
-               [109] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT,
-
-               /* IPv6 --> GRE/Teredo/VXLAN --> IPv4 */
-               [110] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [111] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [112] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [113] reserved */
-               [114] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [115] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [116] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv6 --> GRE/Teredo/VXLAN --> IPv6 */
-               [117] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [118] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [119] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [120] reserved */
-               [121] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [122] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [123] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv6 --> GRE/Teredo/VXLAN --> MAC */
-               [124] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER,
-
-               /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */
-               [125] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [126] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [127] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [128] reserved */
-               [129] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [130] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [131] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */
-               [132] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [133] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [134] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [135] reserved */
-               [136] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [137] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [138] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN */
-               [139] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN,
-
-               /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */
-               [140] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [141] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [142] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [143] reserved */
-               [144] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [145] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [146] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */
-               [147] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_FRAG,
-               [148] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_NONFRAG,
-               [149] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_UDP,
-               /* [150] reserved */
-               [151] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_TCP,
-               [152] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_SCTP,
-               [153] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_TUNNEL_GRENAT |
-                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
-                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_INNER_L4_ICMP,
-
-               /* L2 NSH packet type */
-               [154] = RTE_PTYPE_L2_ETHER_NSH,
-               [155] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_FRAG,
-               [156] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_NONFRAG,
-               [157] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_UDP,
-               [158] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_TCP,
-               [159] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_SCTP,
-               [160] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_ICMP,
-               [161] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_FRAG,
-               [162] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_NONFRAG,
-               [163] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_UDP,
-               [164] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_TCP,
-               [165] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_SCTP,
-               [166] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
-                       RTE_PTYPE_L4_ICMP,
-
-               /* All others reserved */
-       };
-
-       return type_table[ptype];
-}
-
 #define I40E_RX_DESC_EXT_STATUS_FLEXBH_MASK   0x03
 #define I40E_RX_DESC_EXT_STATUS_FLEXBH_FD_ID  0x01
 #define I40E_RX_DESC_EXT_STATUS_FLEXBH_FLEX   0x02
@@ -779,33 +238,65 @@ i40e_rxd_build_fdir(volatile union i40e_rx_desc *rxdp, struct rte_mbuf *mb)
 #endif
        return flags;
 }
+
+static inline void
+i40e_parse_tunneling_params(uint64_t ol_flags,
+                           union i40e_tx_offload tx_offload,
+                           uint32_t *cd_tunneling)
+{
+       /* EIPT: External (outer) IP header type */
+       if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
+               *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
+       else if (ol_flags & PKT_TX_OUTER_IPV4)
+               *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
+       else if (ol_flags & PKT_TX_OUTER_IPV6)
+               *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
+
+       /* EIPLEN: External (outer) IP header length, in DWords */
+       *cd_tunneling |= (tx_offload.outer_l3_len >> 2) <<
+               I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
+
+       /* L4TUNT: L4 Tunneling Type */
+       switch (ol_flags & PKT_TX_TUNNEL_MASK) {
+       case PKT_TX_TUNNEL_IPIP:
+               /* for non UDP / GRE tunneling, set to 00b */
+               break;
+       case PKT_TX_TUNNEL_VXLAN:
+       case PKT_TX_TUNNEL_GENEVE:
+               *cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING;
+               break;
+       case PKT_TX_TUNNEL_GRE:
+               *cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING;
+               break;
+       default:
+               PMD_TX_LOG(ERR, "Tunnel type not supported");
+               return;
+       }
+
+       /* L4TUNLEN: L4 Tunneling Length, in Words
+        *
+        * We depend on app to set rte_mbuf.l2_len correctly.
+        * For IP in GRE it should be set to the length of the GRE
+        * header;
+        * for MAC in GRE or MAC in UDP it should be set to the length
+        * of the GRE or UDP headers plus the inner MAC up to including
+        * its last Ethertype.
+        */
+       *cd_tunneling |= (tx_offload.l2_len >> 1) <<
+               I40E_TXD_CTX_QW0_NATLEN_SHIFT;
+}
+
 static inline void
 i40e_txd_enable_checksum(uint64_t ol_flags,
                        uint32_t *td_cmd,
                        uint32_t *td_offset,
-                       union i40e_tx_offload tx_offload,
-                       uint32_t *cd_tunneling)
+                       union i40e_tx_offload tx_offload)
 {
-       /* UDP tunneling packet TX checksum offload */
-       if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
-
+       /* Set MACLEN */
+       if (ol_flags & PKT_TX_TUNNEL_MASK)
                *td_offset |= (tx_offload.outer_l2_len >> 1)
                                << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
-
-               if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
-                       *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
-               else if (ol_flags & PKT_TX_OUTER_IPV4)
-                       *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
-               else if (ol_flags & PKT_TX_OUTER_IPV6)
-                       *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
-
-               /* Now set the ctx descriptor fields */
-               *cd_tunneling |= (tx_offload.outer_l3_len >> 2) <<
-                               I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
-                               (tx_offload.l2_len >> 1) <<
-                               I40E_TXD_CTX_QW0_NATLEN_SHIFT;
-
-       } else
+       else
                *td_offset |= (tx_offload.l2_len >> 1)
                        << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
 
@@ -934,15 +425,6 @@ check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
                             "rxq->rx_free_thresh=%d",
                             rxq->nb_rx_desc, rxq->rx_free_thresh);
                ret = -EINVAL;
-       } else if (!(rxq->nb_rx_desc < (I40E_MAX_RING_DESC -
-                               RTE_PMD_I40E_RX_MAX_BURST))) {
-               PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
-                            "rxq->nb_rx_desc=%d, "
-                            "I40E_MAX_RING_DESC=%d, "
-                            "RTE_PMD_I40E_RX_MAX_BURST=%d",
-                            rxq->nb_rx_desc, I40E_MAX_RING_DESC,
-                            RTE_PMD_I40E_RX_MAX_BURST);
-               ret = -EINVAL;
        }
 #else
        ret = -EINVAL;
@@ -994,6 +476,8 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
                                        I40E_RXD_QW1_STATUS_SHIFT;
                }
 
+               rte_smp_rmb();
+
                /* Compute how many status bits were set */
                for (j = 0, nb_dd = 0; j < I40E_LOOK_AHEAD; j++)
                        nb_dd += s[j] & (1 << I40E_RX_DESC_STATUS_DD_SHIFT);
@@ -1104,7 +588,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 
        /* Update rx tail regsiter */
        rte_wmb();
-       I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->rx_free_trigger);
+       I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rxq->rx_free_trigger);
 
        rxq->rx_free_trigger =
                (uint16_t)(rxq->rx_free_trigger + rxq->rx_free_thresh);
@@ -1484,7 +968,8 @@ i40e_calc_context_desc(uint64_t flags)
 {
        static uint64_t mask = PKT_TX_OUTER_IP_CKSUM |
                PKT_TX_TCP_SEG |
-               PKT_TX_QINQ_PKT;
+               PKT_TX_QINQ_PKT |
+               PKT_TX_TUNNEL_MASK;
 
 #ifdef RTE_LIBRTE_IEEE1588
        mask |= PKT_TX_IEEE1588_TMST;
@@ -1506,7 +991,7 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
        }
 
        /**
-        * in case of tunneling packet, the outer_l2_len and
+        * in case of non tunneling packet, the outer_l2_len and
         * outer_l3_len must be 0.
         */
        hdr_len = tx_offload.outer_l2_len +
@@ -1623,12 +1108,15 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                /* Always enable CRC offload insertion */
                td_cmd |= I40E_TX_DESC_CMD_ICRC;
 
-               /* Enable checksum offloading */
+               /* Fill in tunneling parameters if necessary */
                cd_tunneling_params = 0;
-               if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK) {
-                       i40e_txd_enable_checksum(ol_flags, &td_cmd, &td_offset,
-                               tx_offload, &cd_tunneling_params);
-               }
+               if (ol_flags & PKT_TX_TUNNEL_MASK)
+                       i40e_parse_tunneling_params(ol_flags, tx_offload,
+                                                   &cd_tunneling_params);
+               /* Enable checksum offloading */
+               if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK)
+                       i40e_txd_enable_checksum(ol_flags, &td_cmd,
+                                                &td_offset, tx_offload);
 
                if (nb_ctx) {
                        /* Setup TX context descriptor if required */
@@ -1747,7 +1235,7 @@ end_of_tx:
                   (unsigned) txq->port_id, (unsigned) txq->queue_id,
                   (unsigned) tx_id, (unsigned) nb_tx);
 
-       I40E_PCI_REG_WRITE(txq->qtx_tail, tx_id);
+       I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
        txq->tx_tail = tx_id;
 
        return nb_tx;
@@ -1899,7 +1387,7 @@ tx_xmit_pkts(struct i40e_tx_queue *txq,
 
        /* Update the tx tail register */
        rte_wmb();
-       I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+       I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, txq->tx_tail);
 
        return nb_pkts;
 }
@@ -1930,6 +1418,63 @@ i40e_xmit_pkts_simple(void *tx_queue,
        return nb_tx;
 }
 
+/*********************************************************************
+ *
+ *  TX prep functions
+ *
+ **********************************************************************/
+uint16_t
+i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts)
+{
+       int i, ret;
+       uint64_t ol_flags;
+       struct rte_mbuf *m;
+
+       for (i = 0; i < nb_pkts; i++) {
+               m = tx_pkts[i];
+               ol_flags = m->ol_flags;
+
+               /**
+                * m->nb_segs is uint8_t, so nb_segs is always less than
+                * I40E_TX_MAX_SEG.
+                * We check only a condition for nb_segs > I40E_TX_MAX_MTU_SEG.
+                */
+               if (!(ol_flags & PKT_TX_TCP_SEG)) {
+                       if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+                               rte_errno = -EINVAL;
+                               return i;
+                       }
+               } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+                               (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+                       /* MSS outside the range (256B - 9674B) are considered
+                        * malicious
+                        */
+                       rte_errno = -EINVAL;
+                       return i;
+               }
+
+               if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+                       rte_errno = -ENOTSUP;
+                       return i;
+               }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+               ret = rte_validate_tx_offload(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+#endif
+               ret = rte_net_intel_cksum_prepare(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+       }
+       return i;
+}
+
 /*
  * Find the VSI the queue belongs to. 'queue_idx' is the queue index
  * application used, which assume having sequential ones. But from driver's
@@ -2136,7 +1681,9 @@ i40e_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
            dev->rx_pkt_burst == i40e_recv_pkts_bulk_alloc ||
 #endif
-           dev->rx_pkt_burst == i40e_recv_scattered_pkts)
+           dev->rx_pkt_burst == i40e_recv_scattered_pkts ||
+           dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec ||
+           dev->rx_pkt_burst == i40e_recv_pkts_vec)
                return ptypes;
        return NULL;
 }
@@ -2161,21 +1708,12 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
        uint16_t base, bsf, tc_mapping;
        int use_def_burst_func = 1;
 
-#define TREX_PATCH_LOW_LATENCY
-#ifdef TREX_PATCH_LOW_LATENCY
-    int is_vf = 0;
-#endif
-
        if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) {
                struct i40e_vf *vf =
                        I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
                vsi = &vf->vsi;
-#ifdef TREX_PATCH_LOW_LATENCY
-        is_vf = 1;
-#endif
-       } else {
+       } else
                vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx);
-    }
 
        if (vsi == NULL) {
                PMD_DRV_LOG(ERR, "VSI not available or queue "
@@ -2224,8 +1762,19 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->rx_deferred_start = rx_conf->rx_deferred_start;
 
        /* Allocate the maximun number of RX ring hardware descriptor. */
-       ring_size = sizeof(union i40e_rx_desc) * I40E_MAX_RING_DESC;
-       ring_size = RTE_ALIGN(ring_size, I40E_DMA_MEM_ALIGN);
+       len = I40E_MAX_RING_DESC;
+
+#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
+       /**
+        * Allocating a little more memory because vectorized/bulk_alloc Rx
+        * functions doesn't check boundaries each time.
+        */
+       len += RTE_PMD_I40E_RX_MAX_BURST;
+#endif
+
+       ring_size = RTE_ALIGN(len * sizeof(union i40e_rx_desc),
+                             I40E_DMA_MEM_ALIGN);
+
        rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
                              ring_size, I40E_RING_BASE_ALIGN, socket_id);
        if (!rz) {
@@ -2280,12 +1829,6 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
                ad->rx_bulk_alloc_allowed = false;
        }
 
-#ifdef TREX_PATCH_LOW_LATENCY
-    if (! is_vf)
-        rxq->dcb_tc =0;
-    else // The entire for below is in the else
-#endif
-
        for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
                if (!(vsi->enabled_tc & (1 << i)))
                        continue;
@@ -2393,25 +1936,12 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
        uint16_t tx_rs_thresh, tx_free_thresh;
        uint16_t i, base, bsf, tc_mapping;
 
-#ifdef TREX_PATCH_LOW_LATENCY
-    u8 low_latency = 0;
-    int is_vf = 1;
-#endif
-    
        if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) {
                struct i40e_vf *vf =
                        I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
                vsi = &vf->vsi;
-       } else {
+       } else
                vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx);
-#ifdef TREX_PATCH_LOW_LATENCY
-        if (queue_idx == pf->dev_data->nb_tx_queues-1) {
-            low_latency = 1;
-        }
-        is_vf = 0;
-#endif
-    }
-
 
        if (vsi == NULL) {
                PMD_DRV_LOG(ERR, "VSI is NULL, or queue index (%u) "
@@ -2461,8 +1991,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
                return I40E_ERR_PARAM;
        }
        if (tx_free_thresh >= (nb_desc - 3)) {
-               PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
-                            "tx_free_thresh must be less than the "
+               PMD_INIT_LOG(ERR, "tx_free_thresh must be less than the "
                             "number of TX descriptors minus 3. "
                             "(tx_free_thresh=%u port=%d queue=%d)",
                             (unsigned int)tx_free_thresh,
@@ -2567,15 +2096,6 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
        /* Use a simple TX queue without offloads or multi segs if possible */
        i40e_set_tx_function_flag(dev, txq);
 
-#ifdef TREX_PATCH_LOW_LATENCY
-    if (! is_vf) {
-        if (low_latency) {
-            txq->dcb_tc=1;
-        }else{
-            txq->dcb_tc=0;
-        }
-    } else // The entire for below is in the else
-#endif
        for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
                if (!(vsi->enabled_tc & (1 << i)))
                        continue;
@@ -2985,11 +2505,15 @@ i40e_dev_clear_queues(struct rte_eth_dev *dev)
        PMD_INIT_FUNC_TRACE();
 
        for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               if (!dev->data->tx_queues[i])
+                       continue;
                i40e_tx_queue_release_mbufs(dev->data->tx_queues[i]);
                i40e_reset_tx_queue(dev->data->tx_queues[i]);
        }
 
        for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               if (!dev->data->rx_queues[i])
+                       continue;
                i40e_rx_queue_release_mbufs(dev->data->rx_queues[i]);
                i40e_reset_rx_queue(dev->data->rx_queues[i]);
        }
@@ -3003,12 +2527,16 @@ i40e_dev_free_queues(struct rte_eth_dev *dev)
        PMD_INIT_FUNC_TRACE();
 
        for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               if (!dev->data->rx_queues[i])
+                       continue;
                i40e_dev_rx_queue_release(dev->data->rx_queues[i]);
                dev->data->rx_queues[i] = NULL;
        }
        dev->data->nb_rx_queues = 0;
 
        for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               if (!dev->data->tx_queues[i])
+                       continue;
                i40e_dev_tx_queue_release(dev->data->tx_queues[i]);
                dev->data->tx_queues[i] = NULL;
        }
@@ -3191,7 +2719,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
                                struct i40e_rx_queue *rxq =
                                        dev->data->rx_queues[i];
 
-                               if (i40e_rxq_vec_setup(rxq)) {
+                               if (rxq && i40e_rxq_vec_setup(rxq)) {
                                        ad->rx_vec_allowed = false;
                                        break;
                                }
@@ -3253,7 +2781,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
                for (i = 0; i < dev->data->nb_rx_queues; i++) {
                        struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
 
-                       rxq->rx_using_sse = rx_using_sse;
+                       if (rxq)
+                               rxq->rx_using_sse = rx_using_sse;
                }
        }
 }
@@ -3292,7 +2821,7 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
                                struct i40e_tx_queue *txq =
                                        dev->data->tx_queues[i];
 
-                               if (i40e_txq_vec_setup(txq)) {
+                               if (txq && i40e_txq_vec_setup(txq)) {
                                        ad->tx_vec_allowed = false;
                                        break;
                                }
@@ -3308,9 +2837,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
                        PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
                        dev->tx_pkt_burst = i40e_xmit_pkts_simple;
                }
+               dev->tx_pkt_prepare = NULL;
        } else {
                PMD_INIT_LOG(DEBUG, "Xmit tx finally be used.");
                dev->tx_pkt_burst = i40e_xmit_pkts;
+               dev->tx_pkt_prepare = i40e_prep_pkts;
        }
 }
 
index 98179f0..9df8a56 100644 (file)
 #define        I40E_MIN_RING_DESC      64
 #define        I40E_MAX_RING_DESC      4096
 
+#define I40E_MIN_TSO_MSS          256
+#define I40E_MAX_TSO_MSS          9674
+
+#define I40E_TX_MAX_SEG     UINT8_MAX
+#define I40E_TX_MAX_MTU_SEG 8
+
 #undef container_of
 #define container_of(ptr, type, member) ({ \
                typeof(((type *)0)->member)(*__mptr) = (ptr); \
@@ -223,6 +229,8 @@ uint16_t i40e_recv_scattered_pkts(void *rx_queue,
 uint16_t i40e_xmit_pkts(void *tx_queue,
                        struct rte_mbuf **tx_pkts,
                        uint16_t nb_pkts);
+uint16_t i40e_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts);
 int i40e_tx_queue_init(struct i40e_tx_queue *txq);
 int i40e_rx_queue_init(struct i40e_rx_queue *rxq);
 void i40e_free_tx_resources(struct i40e_tx_queue *txq);
@@ -255,4 +263,567 @@ void i40e_set_tx_function_flag(struct rte_eth_dev *dev,
                               struct i40e_tx_queue *txq);
 void i40e_set_tx_function(struct rte_eth_dev *dev);
 
+/* For each value it means, datasheet of hardware can tell more details
+ *
+ * @note: fix i40e_dev_supported_ptypes_get() if any change here.
+ */
+static inline uint32_t
+i40e_rxd_pkt_type_mapping(uint8_t ptype)
+{
+       static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = {
+               /* L2 types */
+               /* [0] reserved */
+               [1] = RTE_PTYPE_L2_ETHER,
+               [2] = RTE_PTYPE_L2_ETHER_TIMESYNC,
+               /* [3] - [5] reserved */
+               [6] = RTE_PTYPE_L2_ETHER_LLDP,
+               /* [7] - [10] reserved */
+               [11] = RTE_PTYPE_L2_ETHER_ARP,
+               /* [12] - [21] reserved */
+
+               /* Non tunneled IPv4 */
+               [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_FRAG,
+               [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_NONFRAG,
+               [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               /* [25] reserved */
+               [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_TCP,
+               [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_SCTP,
+               [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_ICMP,
+
+               /* IPv4 --> IPv4 */
+               [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [32] reserved */
+               [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv4 --> IPv6 */
+               [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [39] reserved */
+               [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv4 --> GRE/Teredo/VXLAN */
+               [43] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT,
+
+               /* IPv4 --> GRE/Teredo/VXLAN --> IPv4 */
+               [44] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [47] reserved */
+               [48] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [50] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv4 --> GRE/Teredo/VXLAN --> IPv6 */
+               [51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [53] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [54] reserved */
+               [55] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [56] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [57] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv4 --> GRE/Teredo/VXLAN --> MAC */
+               [58] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER,
+
+               /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */
+               [59] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [60] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [62] reserved */
+               [63] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [64] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [65] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */
+               [66] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [67] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [68] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [69] reserved */
+               [70] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [71] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [72] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN */
+               [73] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN,
+
+               /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */
+               [74] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [75] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [76] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [77] reserved */
+               [78] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [79] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [80] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */
+               [81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [83] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [84] reserved */
+               [85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [87] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* Non tunneled IPv6 */
+               [88] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_FRAG,
+               [89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_NONFRAG,
+               [90] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               /* [91] reserved */
+               [92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_TCP,
+               [93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_SCTP,
+               [94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_ICMP,
+
+               /* IPv6 --> IPv4 */
+               [95] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [96] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [97] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [98] reserved */
+               [99] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [100] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [101] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv6 --> IPv6 */
+               [102] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [103] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [104] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [105] reserved */
+               [106] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [107] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [108] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_IP |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv6 --> GRE/Teredo/VXLAN */
+               [109] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT,
+
+               /* IPv6 --> GRE/Teredo/VXLAN --> IPv4 */
+               [110] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [111] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [112] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [113] reserved */
+               [114] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [115] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [116] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv6 --> GRE/Teredo/VXLAN --> IPv6 */
+               [117] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [118] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [119] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [120] reserved */
+               [121] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [122] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [123] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv6 --> GRE/Teredo/VXLAN --> MAC */
+               [124] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER,
+
+               /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */
+               [125] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [126] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [127] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [128] reserved */
+               [129] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [130] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [131] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */
+               [132] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [133] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [134] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [135] reserved */
+               [136] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [137] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [138] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN */
+               [139] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN,
+
+               /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */
+               [140] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [141] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [142] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [143] reserved */
+               [144] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [145] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [146] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */
+               [147] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_FRAG,
+               [148] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_NONFRAG,
+               [149] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_UDP,
+               /* [150] reserved */
+               [151] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_TCP,
+               [152] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_SCTP,
+               [153] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_TUNNEL_GRENAT |
+                       RTE_PTYPE_INNER_L2_ETHER_VLAN |
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_INNER_L4_ICMP,
+
+               /* L2 NSH packet type */
+               [154] = RTE_PTYPE_L2_ETHER_NSH,
+               [155] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_FRAG,
+               [156] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_NONFRAG,
+               [157] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [158] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_TCP,
+               [159] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_SCTP,
+               [160] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_ICMP,
+               [161] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_FRAG,
+               [162] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_NONFRAG,
+               [163] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_UDP,
+               [164] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_TCP,
+               [165] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_SCTP,
+               [166] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                       RTE_PTYPE_L4_ICMP,
+
+               /* All others reserved */
+       };
+
+       return type_table[ptype];
+}
+
 #endif /* _I40E_RXTX_H_ */
diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h
new file mode 100644 (file)
index 0000000..3745558
--- /dev/null
@@ -0,0 +1,251 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _I40E_RXTX_VEC_COMMON_H_
+#define _I40E_RXTX_VEC_COMMON_H_
+#include <stdint.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include "i40e_ethdev.h"
+#include "i40e_rxtx.h"
+
+static inline uint16_t
+reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs,
+                  uint16_t nb_bufs, uint8_t *split_flags)
+{
+       struct rte_mbuf *pkts[RTE_I40E_VPMD_RX_BURST]; /*finished pkts*/
+       struct rte_mbuf *start = rxq->pkt_first_seg;
+       struct rte_mbuf *end =  rxq->pkt_last_seg;
+       unsigned pkt_idx, buf_idx;
+
+       for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
+               if (end != NULL) {
+                       /* processing a split packet */
+                       end->next = rx_bufs[buf_idx];
+                       rx_bufs[buf_idx]->data_len += rxq->crc_len;
+
+                       start->nb_segs++;
+                       start->pkt_len += rx_bufs[buf_idx]->data_len;
+                       end = end->next;
+
+                       if (!split_flags[buf_idx]) {
+                               /* it's the last packet of the set */
+                               start->hash = end->hash;
+                               start->ol_flags = end->ol_flags;
+                               /* we need to strip crc for the whole packet */
+                               start->pkt_len -= rxq->crc_len;
+                               if (end->data_len > rxq->crc_len)
+                                       end->data_len -= rxq->crc_len;
+                               else {
+                                       /* free up last mbuf */
+                                       struct rte_mbuf *secondlast = start;
+
+                                       start->nb_segs--;
+                                       while (secondlast->next != end)
+                                               secondlast = secondlast->next;
+                                       secondlast->data_len -= (rxq->crc_len -
+                                                       end->data_len);
+                                       secondlast->next = NULL;
+                                       rte_pktmbuf_free_seg(end);
+                               }
+                               pkts[pkt_idx++] = start;
+                               start = end = NULL;
+                       }
+               } else {
+                       /* not processing a split packet */
+                       if (!split_flags[buf_idx]) {
+                               /* not a split packet, save and skip */
+                               pkts[pkt_idx++] = rx_bufs[buf_idx];
+                               continue;
+                       }
+                       end = start = rx_bufs[buf_idx];
+                       rx_bufs[buf_idx]->data_len += rxq->crc_len;
+                       rx_bufs[buf_idx]->pkt_len += rxq->crc_len;
+               }
+       }
+
+       /* save the partial packet for next time */
+       rxq->pkt_first_seg = start;
+       rxq->pkt_last_seg = end;
+       memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts)));
+       return pkt_idx;
+}
+
+static inline int __attribute__((always_inline))
+i40e_tx_free_bufs(struct i40e_tx_queue *txq)
+{
+       struct i40e_tx_entry *txep;
+       uint32_t n;
+       uint32_t i;
+       int nb_free = 0;
+       struct rte_mbuf *m, *free[RTE_I40E_TX_MAX_FREE_BUF_SZ];
+
+       /* check DD bits on threshold descriptor */
+       if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+                       rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+                       rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+               return 0;
+
+       n = txq->tx_rs_thresh;
+
+        /* first buffer to free from S/W ring is at index
+         * tx_next_dd - (tx_rs_thresh-1)
+         */
+       txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+       m = __rte_pktmbuf_prefree_seg(txep[0].mbuf);
+       if (likely(m != NULL)) {
+               free[0] = m;
+               nb_free = 1;
+               for (i = 1; i < n; i++) {
+                       m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
+                       if (likely(m != NULL)) {
+                               if (likely(m->pool == free[0]->pool)) {
+                                       free[nb_free++] = m;
+                               } else {
+                                       rte_mempool_put_bulk(free[0]->pool,
+                                                            (void *)free,
+                                                            nb_free);
+                                       free[0] = m;
+                                       nb_free = 1;
+                               }
+                       }
+               }
+               rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+       } else {
+               for (i = 1; i < n; i++) {
+                       m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
+                       if (m != NULL)
+                               rte_mempool_put(m->pool, m);
+               }
+       }
+
+       /* buffers were freed, update counters */
+       txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+       txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+       if (txq->tx_next_dd >= txq->nb_tx_desc)
+               txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+       return txq->tx_rs_thresh;
+}
+
+static inline void __attribute__((always_inline))
+tx_backlog_entry(struct i40e_tx_entry *txep,
+                struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+       int i;
+
+       for (i = 0; i < (int)nb_pkts; ++i)
+               txep[i].mbuf = tx_pkts[i];
+}
+
+static inline void
+_i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+{
+       const unsigned mask = rxq->nb_rx_desc - 1;
+       unsigned i;
+
+       if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc)
+               return;
+
+       /* free all mbufs that are valid in the ring */
+       if (rxq->rxrearm_nb == 0) {
+               for (i = 0; i < rxq->nb_rx_desc; i++) {
+                       if (rxq->sw_ring[i].mbuf != NULL)
+                               rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+               }
+       } else {
+               for (i = rxq->rx_tail;
+                    i != rxq->rxrearm_start;
+                    i = (i + 1) & mask) {
+                       if (rxq->sw_ring[i].mbuf != NULL)
+                               rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+               }
+       }
+
+       rxq->rxrearm_nb = rxq->nb_rx_desc;
+
+       /* set all entries to NULL */
+       memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc);
+}
+
+static inline int
+i40e_rxq_vec_setup_default(struct i40e_rx_queue *rxq)
+{
+       uintptr_t p;
+       struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
+
+       mb_def.nb_segs = 1;
+       mb_def.data_off = RTE_PKTMBUF_HEADROOM;
+       mb_def.port = rxq->port_id;
+       rte_mbuf_refcnt_set(&mb_def, 1);
+
+       /* prevent compiler reordering: rearm_data covers previous fields */
+       rte_compiler_barrier();
+       p = (uintptr_t)&mb_def.rearm_data;
+       rxq->mbuf_initializer = *(uint64_t *)p;
+       return 0;
+}
+
+static inline int
+i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
+{
+#ifndef RTE_LIBRTE_IEEE1588
+       struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
+       struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf;
+
+#ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE
+       /* whithout rx ol_flags, no VP flag report */
+       if (rxmode->hw_vlan_strip != 0 ||
+           rxmode->hw_vlan_extend != 0 ||
+           rxmode->hw_ip_checksum != 0)
+               return -1;
+#endif
+
+       /* no fdir support */
+       if (fconf->mode != RTE_FDIR_MODE_NONE)
+               return -1;
+
+        /* - no csum error report support
+        * - no header split support
+        */
+       if (rxmode->header_split == 1)
+               return -1;
+
+       return 0;
+#else
+       RTE_SET_USED(dev);
+       return -1;
+#endif
+}
+#endif
diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c
new file mode 100644 (file)
index 0000000..011c54e
--- /dev/null
@@ -0,0 +1,614 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2016, Linaro Limited
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include "base/i40e_prototype.h"
+#include "base/i40e_type.h"
+#include "i40e_ethdev.h"
+#include "i40e_rxtx.h"
+#include "i40e_rxtx_vec_common.h"
+
+#include <arm_neon.h>
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+static inline void
+i40e_rxq_rearm(struct i40e_rx_queue *rxq)
+{
+       int i;
+       uint16_t rx_id;
+       volatile union i40e_rx_desc *rxdp;
+       struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+       struct rte_mbuf *mb0, *mb1;
+       uint64x2_t dma_addr0, dma_addr1;
+       uint64x2_t zero = vdupq_n_u64(0);
+       uint64_t paddr;
+       uint8x8_t p;
+
+       rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+       /* Pull 'n' more MBUFs into the software ring */
+       if (unlikely(rte_mempool_get_bulk(rxq->mp,
+                                         (void *)rxep,
+                                         RTE_I40E_RXQ_REARM_THRESH) < 0)) {
+               if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+                   rxq->nb_rx_desc) {
+                       for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+                               rxep[i].mbuf = &rxq->fake_mbuf;
+                               vst1q_u64((uint64_t *)&rxdp[i].read, zero);
+                       }
+               }
+               rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+                       RTE_I40E_RXQ_REARM_THRESH;
+               return;
+       }
+
+       p = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
+
+       /* Initialize the mbufs in vector, process 2 mbufs in one loop */
+       for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+               mb0 = rxep[0].mbuf;
+               mb1 = rxep[1].mbuf;
+
+                /* Flush mbuf with pkt template.
+                * Data to be rearmed is 6 bytes long.
+                * Though, RX will overwrite ol_flags that are coming next
+                * anyway. So overwrite whole 8 bytes with one load:
+                * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
+                */
+               vst1_u8((uint8_t *)&mb0->rearm_data, p);
+               paddr = mb0->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+               dma_addr0 = vdupq_n_u64(paddr);
+
+               /* flush desc with pa dma_addr */
+               vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
+
+               vst1_u8((uint8_t *)&mb1->rearm_data, p);
+               paddr = mb1->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+               dma_addr1 = vdupq_n_u64(paddr);
+               vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1);
+       }
+
+       rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
+       if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+               rxq->rxrearm_start = 0;
+
+       rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH;
+
+       rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+                            (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+       /* Update the tail pointer on the NIC */
+       I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+}
+
+/* Handling the offload flags (olflags) field takes computation
+ * time when receiving packets. Therefore we provide a flag to disable
+ * the processing of the olflags field when they are not needed. This
+ * gives improved performance, at the cost of losing the offload info
+ * in the received packet
+ */
+#ifdef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE
+
+static inline void
+desc_to_olflags_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
+{
+       uint32x4_t vlan0, vlan1, rss, l3_l4e;
+
+       /* mask everything except RSS, flow director and VLAN flags
+        * bit2 is for VLAN tag, bit11 for flow director indication
+        * bit13:12 for RSS indication.
+        */
+       const uint32x4_t rss_vlan_msk = {
+                       0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804};
+
+       /* map rss and vlan type to rss hash and vlan flag */
+       const uint8x16_t vlan_flags = {
+                       0, 0, 0, 0,
+                       PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED, 0, 0, 0,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0};
+
+       const uint8x16_t rss_flags = {
+                       0, PKT_RX_FDIR, 0, 0,
+                       0, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH | PKT_RX_FDIR,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0};
+
+       const uint8x16_t l3_l4e_flags = {
+                       0,
+                       PKT_RX_IP_CKSUM_BAD,
+                       PKT_RX_L4_CKSUM_BAD,
+                       PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+                       PKT_RX_EIP_CKSUM_BAD,
+                       PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+                       PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+                       PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+                       0, 0, 0, 0, 0, 0, 0, 0};
+
+       vlan0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]),
+                         vreinterpretq_u32_u64(descs[2])).val[1];
+       vlan1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]),
+                         vreinterpretq_u32_u64(descs[3])).val[1];
+       vlan0 = vzipq_u32(vlan0, vlan1).val[0];
+
+       vlan1 = vandq_u32(vlan0, rss_vlan_msk);
+       vlan0 = vreinterpretq_u32_u8(vqtbl1q_u8(vlan_flags,
+                                               vreinterpretq_u8_u32(vlan1)));
+
+       rss = vshrq_n_u32(vlan1, 11);
+       rss = vreinterpretq_u32_u8(vqtbl1q_u8(rss_flags,
+                                             vreinterpretq_u8_u32(rss)));
+
+       l3_l4e = vshrq_n_u32(vlan1, 22);
+       l3_l4e = vreinterpretq_u32_u8(vqtbl1q_u8(l3_l4e_flags,
+                                             vreinterpretq_u8_u32(l3_l4e)));
+
+
+       vlan0 = vorrq_u32(vlan0, rss);
+       vlan0 = vorrq_u32(vlan0, l3_l4e);
+
+       rx_pkts[0]->ol_flags = vgetq_lane_u32(vlan0, 0);
+       rx_pkts[1]->ol_flags = vgetq_lane_u32(vlan0, 1);
+       rx_pkts[2]->ol_flags = vgetq_lane_u32(vlan0, 2);
+       rx_pkts[3]->ol_flags = vgetq_lane_u32(vlan0, 3);
+}
+#else
+#define desc_to_olflags_v(descs, rx_pkts) do {} while (0)
+#endif
+
+#define PKTLEN_SHIFT     10
+
+#define I40E_VPMD_DESC_DD_MASK 0x0001000100010001ULL
+
+static inline void
+desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
+{
+       int i;
+       uint8_t ptype;
+       uint8x16_t tmp;
+
+       for (i = 0; i < 4; i++) {
+               tmp = vreinterpretq_u8_u64(vshrq_n_u64(descs[i], 30));
+               ptype = vgetq_lane_u8(tmp, 8);
+               rx_pkts[0]->packet_type = i40e_rxd_pkt_type_mapping(ptype);
+       }
+
+}
+
+ /*
+ * Notice:
+ * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
+ *   numbers of DD bits
+ */
+static inline uint16_t
+_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+                  uint16_t nb_pkts, uint8_t *split_packet)
+{
+       volatile union i40e_rx_desc *rxdp;
+       struct i40e_rx_entry *sw_ring;
+       uint16_t nb_pkts_recd;
+       int pos;
+       uint64_t var;
+
+       /* mask to shuffle from desc. to mbuf */
+       uint8x16_t shuf_msk = {
+               0xFF, 0xFF,   /* pkt_type set as unknown */
+               0xFF, 0xFF,   /* pkt_type set as unknown */
+               14, 15,       /* octet 15~14, low 16 bits pkt_len */
+               0xFF, 0xFF,   /* skip high 16 bits pkt_len, zero out */
+               14, 15,       /* octet 15~14, 16 bits data_len */
+               2, 3,         /* octet 2~3, low 16 bits vlan_macip */
+               4, 5, 6, 7    /* octet 4~7, 32bits rss */
+               };
+
+       uint8x16_t eop_check = {
+               0x02, 0x00, 0x02, 0x00,
+               0x02, 0x00, 0x02, 0x00,
+               0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00
+               };
+
+       uint16x8_t crc_adjust = {
+               0, 0,         /* ignore pkt_type field */
+               rxq->crc_len, /* sub crc on pkt_len */
+               0,            /* ignore high-16bits of pkt_len */
+               rxq->crc_len, /* sub crc on data_len */
+               0, 0, 0       /* ignore non-length fields */
+               };
+
+       /* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */
+       nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST);
+
+       /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */
+       nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP);
+
+       /* Just the act of getting into the function from the application is
+        * going to cost about 7 cycles
+        */
+       rxdp = rxq->rx_ring + rxq->rx_tail;
+
+       rte_prefetch_non_temporal(rxdp);
+
+       /* See if we need to rearm the RX queue - gives the prefetch a bit
+        * of time to act
+        */
+       if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH)
+               i40e_rxq_rearm(rxq);
+
+       /* Before we start moving massive data around, check to see if
+        * there is actually a packet available
+        */
+       if (!(rxdp->wb.qword1.status_error_len &
+                       rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
+               return 0;
+
+       /* Cache is empty -> need to scan the buffer rings, but first move
+        * the next 'n' mbufs into the cache
+        */
+       sw_ring = &rxq->sw_ring[rxq->rx_tail];
+
+       /* A. load 4 packet in one loop
+        * [A*. mask out 4 unused dirty field in desc]
+        * B. copy 4 mbuf point from swring to rx_pkts
+        * C. calc the number of DD bits among the 4 packets
+        * [C*. extract the end-of-packet bit, if requested]
+        * D. fill info. from desc to mbuf
+        */
+
+       for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
+                       pos += RTE_I40E_DESCS_PER_LOOP,
+                       rxdp += RTE_I40E_DESCS_PER_LOOP) {
+               uint64x2_t descs[RTE_I40E_DESCS_PER_LOOP];
+               uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+               uint16x8x2_t sterr_tmp1, sterr_tmp2;
+               uint64x2_t mbp1, mbp2;
+               uint16x8_t staterr;
+               uint16x8_t tmp;
+               uint64_t stat;
+
+               int32x4_t len_shl = {0, 0, 0, PKTLEN_SHIFT};
+
+               /* B.1 load 1 mbuf point */
+               mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
+               /* Read desc statuses backwards to avoid race condition */
+               /* A.1 load 4 pkts desc */
+               descs[3] =  vld1q_u64((uint64_t *)(rxdp + 3));
+               rte_rmb();
+
+               /* B.2 copy 2 mbuf point into rx_pkts  */
+               vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
+
+               /* B.1 load 1 mbuf point */
+               mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
+
+               descs[2] =  vld1q_u64((uint64_t *)(rxdp + 2));
+               /* B.1 load 2 mbuf point */
+               descs[1] =  vld1q_u64((uint64_t *)(rxdp + 1));
+               descs[0] =  vld1q_u64((uint64_t *)(rxdp));
+
+               /* B.2 copy 2 mbuf point into rx_pkts  */
+               vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
+
+               if (split_packet) {
+                       rte_mbuf_prefetch_part2(rx_pkts[pos]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+               }
+
+               /* avoid compiler reorder optimization */
+               rte_compiler_barrier();
+
+               /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/
+               uint32x4_t len3 = vshlq_u32(vreinterpretq_u32_u64(descs[3]),
+                                           len_shl);
+               descs[3] = vreinterpretq_u64_u32(len3);
+               uint32x4_t len2 = vshlq_u32(vreinterpretq_u32_u64(descs[2]),
+                                           len_shl);
+               descs[2] = vreinterpretq_u64_u32(len2);
+
+               /* D.1 pkt 3,4 convert format from desc to pktmbuf */
+               pkt_mb4 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[3]), shuf_msk);
+               pkt_mb3 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[2]), shuf_msk);
+
+               /* C.1 4=>2 filter staterr info only */
+               sterr_tmp2 = vzipq_u16(vreinterpretq_u16_u64(descs[1]),
+                                      vreinterpretq_u16_u64(descs[3]));
+               /* C.1 4=>2 filter staterr info only */
+               sterr_tmp1 = vzipq_u16(vreinterpretq_u16_u64(descs[0]),
+                                      vreinterpretq_u16_u64(descs[2]));
+
+               /* C.2 get 4 pkts staterr value  */
+               staterr = vzipq_u16(sterr_tmp1.val[1],
+                                   sterr_tmp2.val[1]).val[0];
+               stat = vgetq_lane_u64(vreinterpretq_u64_u16(staterr), 0);
+
+               desc_to_olflags_v(descs, &rx_pkts[pos]);
+
+               /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
+               tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
+               pkt_mb4 = vreinterpretq_u8_u16(tmp);
+               tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
+               pkt_mb3 = vreinterpretq_u8_u16(tmp);
+
+               /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/
+               uint32x4_t len1 = vshlq_u32(vreinterpretq_u32_u64(descs[1]),
+                                           len_shl);
+               descs[1] = vreinterpretq_u64_u32(len1);
+               uint32x4_t len0 = vshlq_u32(vreinterpretq_u32_u64(descs[0]),
+                                           len_shl);
+               descs[0] = vreinterpretq_u64_u32(len0);
+
+               /* D.1 pkt 1,2 convert format from desc to pktmbuf */
+               pkt_mb2 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[1]), shuf_msk);
+               pkt_mb1 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[0]), shuf_msk);
+
+               /* D.3 copy final 3,4 data to rx_pkts */
+               vst1q_u8((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
+                                pkt_mb4);
+               vst1q_u8((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
+                                pkt_mb3);
+
+               /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+               tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);
+               pkt_mb2 = vreinterpretq_u8_u16(tmp);
+               tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);
+               pkt_mb1 = vreinterpretq_u8_u16(tmp);
+
+               /* C* extract and record EOP bit */
+               if (split_packet) {
+                       uint8x16_t eop_shuf_mask = {
+                                       0x00, 0x02, 0x04, 0x06,
+                                       0xFF, 0xFF, 0xFF, 0xFF,
+                                       0xFF, 0xFF, 0xFF, 0xFF,
+                                       0xFF, 0xFF, 0xFF, 0xFF};
+                       uint8x16_t eop_bits;
+
+                       /* and with mask to extract bits, flipping 1-0 */
+                       eop_bits = vmvnq_u8(vreinterpretq_u8_u16(staterr));
+                       eop_bits = vandq_u8(eop_bits, eop_check);
+                       /* the staterr values are not in order, as the count
+                        * count of dd bits doesn't care. However, for end of
+                        * packet tracking, we do care, so shuffle. This also
+                        * compresses the 32-bit values to 8-bit
+                        */
+                       eop_bits = vqtbl1q_u8(eop_bits, eop_shuf_mask);
+
+                       /* store the resulting 32-bit value */
+                       vst1q_lane_u32((uint32_t *)split_packet,
+                                      vreinterpretq_u32_u8(eop_bits), 0);
+                       split_packet += RTE_I40E_DESCS_PER_LOOP;
+
+                       /* zero-out next pointers */
+                       rx_pkts[pos]->next = NULL;
+                       rx_pkts[pos + 1]->next = NULL;
+                       rx_pkts[pos + 2]->next = NULL;
+                       rx_pkts[pos + 3]->next = NULL;
+               }
+
+               rte_prefetch_non_temporal(rxdp + RTE_I40E_DESCS_PER_LOOP);
+
+               /* D.3 copy final 1,2 data to rx_pkts */
+               vst1q_u8((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
+                        pkt_mb2);
+               vst1q_u8((void *)&rx_pkts[pos]->rx_descriptor_fields1,
+                        pkt_mb1);
+               desc_to_ptype_v(descs, &rx_pkts[pos]);
+               /* C.4 calc avaialbe number of desc */
+               var = __builtin_popcountll(stat & I40E_VPMD_DESC_DD_MASK);
+               nb_pkts_recd += var;
+               if (likely(var != RTE_I40E_DESCS_PER_LOOP))
+                       break;
+       }
+
+       /* Update our internal tail pointer */
+       rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
+       rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
+       rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
+
+       return nb_pkts_recd;
+}
+
+ /*
+ * Notice:
+ * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+                  uint16_t nb_pkts)
+{
+       return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
+}
+
+ /* vPMD receive routine that reassembles scattered packets
+ * Notice:
+ * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+                            uint16_t nb_pkts)
+{
+
+       struct i40e_rx_queue *rxq = rx_queue;
+       uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0};
+
+       /* get some new buffers */
+       uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
+                       split_flags);
+       if (nb_bufs == 0)
+               return 0;
+
+       /* happy day case, full burst + no packets to be joined */
+       const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+       if (rxq->pkt_first_seg == NULL &&
+                       split_fl64[0] == 0 && split_fl64[1] == 0 &&
+                       split_fl64[2] == 0 && split_fl64[3] == 0)
+               return nb_bufs;
+
+       /* reassemble any packets that need reassembly*/
+       unsigned i = 0;
+
+       if (rxq->pkt_first_seg == NULL) {
+               /* find the first split flag, and only reassemble then*/
+               while (i < nb_bufs && !split_flags[i])
+                       i++;
+               if (i == nb_bufs)
+                       return nb_bufs;
+       }
+       return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+               &split_flags[i]);
+}
+
+static inline void
+vtx1(volatile struct i40e_tx_desc *txdp,
+               struct rte_mbuf *pkt, uint64_t flags)
+{
+       uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA |
+                       ((uint64_t)flags  << I40E_TXD_QW1_CMD_SHIFT) |
+                       ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
+
+       uint64x2_t descriptor = {pkt->buf_physaddr + pkt->data_off, high_qw};
+       vst1q_u64((uint64_t *)txdp, descriptor);
+}
+
+static inline void
+vtx(volatile struct i40e_tx_desc *txdp,
+               struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
+{
+       int i;
+
+       for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt)
+               vtx1(txdp, *pkt, flags);
+}
+
+uint16_t
+i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
+                  uint16_t nb_pkts)
+{
+       struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue;
+       volatile struct i40e_tx_desc *txdp;
+       struct i40e_tx_entry *txep;
+       uint16_t n, nb_commit, tx_id;
+       uint64_t flags = I40E_TD_CMD;
+       uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
+       int i;
+
+       /* cross rx_thresh boundary is not allowed */
+       nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
+
+       if (txq->nb_tx_free < txq->tx_free_thresh)
+               i40e_tx_free_bufs(txq);
+
+       nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
+       if (unlikely(nb_pkts == 0))
+               return 0;
+
+       tx_id = txq->tx_tail;
+       txdp = &txq->tx_ring[tx_id];
+       txep = &txq->sw_ring[tx_id];
+
+       txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
+
+       n = (uint16_t)(txq->nb_tx_desc - tx_id);
+       if (nb_commit >= n) {
+               tx_backlog_entry(txep, tx_pkts, n);
+
+               for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
+                       vtx1(txdp, *tx_pkts, flags);
+
+               vtx1(txdp, *tx_pkts++, rs);
+
+               nb_commit = (uint16_t)(nb_commit - n);
+
+               tx_id = 0;
+               txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
+
+               /* avoid reach the end of ring */
+               txdp = &txq->tx_ring[tx_id];
+               txep = &txq->sw_ring[tx_id];
+       }
+
+       tx_backlog_entry(txep, tx_pkts, nb_commit);
+
+       vtx(txdp, tx_pkts, nb_commit, flags);
+
+       tx_id = (uint16_t)(tx_id + nb_commit);
+       if (tx_id > txq->tx_next_rs) {
+               txq->tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
+                       rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
+                                               I40E_TXD_QW1_CMD_SHIFT);
+               txq->tx_next_rs =
+                       (uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
+       }
+
+       txq->tx_tail = tx_id;
+
+       I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+
+       return nb_pkts;
+}
+
+void __attribute__((cold))
+i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
+{
+       _i40e_rx_queue_release_mbufs_vec(rxq);
+}
+
+int __attribute__((cold))
+i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
+{
+       return i40e_rxq_vec_setup_default(rxq);
+}
+
+int __attribute__((cold))
+i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused *txq)
+{
+       return 0;
+}
+
+int __attribute__((cold))
+i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
+{
+       return i40e_rx_vec_dev_conf_condition_check_default(dev);
+}
similarity index 75%
rename from src/dpdk/drivers/net/i40e/i40e_rxtx_vec.c
rename to src/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c
index 51fb282..b95cc8e 100644 (file)
@@ -39,6 +39,7 @@
 #include "base/i40e_type.h"
 #include "i40e_ethdev.h"
 #include "i40e_rxtx.h"
+#include "i40e_rxtx_vec_common.h"
 
 #include <tmmintrin.h>
 
@@ -138,19 +139,28 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 static inline void
 desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
 {
-       __m128i vlan0, vlan1, rss;
-       union {
-               uint16_t e[4];
-               uint64_t dword;
-       } vol;
+       __m128i vlan0, vlan1, rss, l3_l4e;
 
        /* mask everything except RSS, flow director and VLAN flags
         * bit2 is for VLAN tag, bit11 for flow director indication
         * bit13:12 for RSS indication.
         */
-       const __m128i rss_vlan_msk = _mm_set_epi16(
-                       0x0000, 0x0000, 0x0000, 0x0000,
-                       0x3804, 0x3804, 0x3804, 0x3804);
+       const __m128i rss_vlan_msk = _mm_set_epi32(
+                       0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804);
+
+       const __m128i cksum_mask = _mm_set_epi32(
+                       PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+                       PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+                       PKT_RX_EIP_CKSUM_BAD,
+                       PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+                       PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+                       PKT_RX_EIP_CKSUM_BAD,
+                       PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+                       PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+                       PKT_RX_EIP_CKSUM_BAD,
+                       PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+                       PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+                       PKT_RX_EIP_CKSUM_BAD);
 
        /* map rss and vlan type to rss hash and vlan flag */
        const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0,
@@ -163,23 +173,43 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
                        PKT_RX_RSS_HASH | PKT_RX_FDIR, PKT_RX_RSS_HASH, 0, 0,
                        0, 0, PKT_RX_FDIR, 0);
 
-       vlan0 = _mm_unpackhi_epi16(descs[0], descs[1]);
-       vlan1 = _mm_unpackhi_epi16(descs[2], descs[3]);
-       vlan0 = _mm_unpacklo_epi32(vlan0, vlan1);
+       const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+                       /* shift right 1 bit to make sure it not exceed 255 */
+                       (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+                        PKT_RX_IP_CKSUM_BAD) >> 1,
+                       (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD |
+                        PKT_RX_L4_CKSUM_BAD) >> 1,
+                       (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+                       (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1,
+                       (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+                       (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1,
+                       PKT_RX_IP_CKSUM_BAD >> 1,
+                       (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1);
+
+       vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]);
+       vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]);
+       vlan0 = _mm_unpacklo_epi64(vlan0, vlan1);
 
        vlan1 = _mm_and_si128(vlan0, rss_vlan_msk);
        vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1);
 
-       rss = _mm_srli_epi16(vlan1, 11);
+       rss = _mm_srli_epi32(vlan1, 11);
        rss = _mm_shuffle_epi8(rss_flags, rss);
 
+       l3_l4e = _mm_srli_epi32(vlan1, 22);
+       l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e);
+       /* then we shift left 1 bit */
+       l3_l4e = _mm_slli_epi32(l3_l4e, 1);
+       /* we need to mask out the reduntant bits */
+       l3_l4e = _mm_and_si128(l3_l4e, cksum_mask);
+
        vlan0 = _mm_or_si128(vlan0, rss);
-       vol.dword = _mm_cvtsi128_si64(vlan0);
+       vlan0 = _mm_or_si128(vlan0, l3_l4e);
 
-       rx_pkts[0]->ol_flags = vol.e[0];
-       rx_pkts[1]->ol_flags = vol.e[1];
-       rx_pkts[2]->ol_flags = vol.e[2];
-       rx_pkts[3]->ol_flags = vol.e[3];
+       rx_pkts[0]->ol_flags = _mm_extract_epi16(vlan0, 0);
+       rx_pkts[1]->ol_flags = _mm_extract_epi16(vlan0, 2);
+       rx_pkts[2]->ol_flags = _mm_extract_epi16(vlan0, 4);
+       rx_pkts[3]->ol_flags = _mm_extract_epi16(vlan0, 6);
 }
 #else
 #define desc_to_olflags_v(desc, rx_pkts) do {} while (0)
@@ -187,6 +217,21 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
 
 #define PKTLEN_SHIFT     10
 
+static inline void
+desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
+{
+       __m128i ptype0 = _mm_unpackhi_epi64(descs[0], descs[1]);
+       __m128i ptype1 = _mm_unpackhi_epi64(descs[2], descs[3]);
+
+       ptype0 = _mm_srli_epi64(ptype0, 30);
+       ptype1 = _mm_srli_epi64(ptype1, 30);
+
+       rx_pkts[0]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype0, 0));
+       rx_pkts[1]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype0, 8));
+       rx_pkts[2]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype1, 0));
+       rx_pkts[3]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype1, 8));
+}
+
  /*
  * Notice:
  * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
@@ -224,7 +269,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
         */
        rxdp = rxq->rx_ring + rxq->rx_tail;
 
-       _mm_prefetch((const void *)rxdp, _MM_HINT_T0);
+       rte_prefetch0(rxdp);
 
        /* See if we need to rearm the RX queue - gives the prefetch a bit
         * of time to act
@@ -282,6 +327,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                /* Read desc statuses backwards to avoid race condition */
                /* A.1 load 4 pkts desc */
                descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
+               rte_compiler_barrier();
 
                /* B.2 copy 2 mbuf point into rx_pkts  */
                _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
@@ -290,8 +336,10 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]);
 
                descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
+               rte_compiler_barrier();
                /* B.1 load 2 mbuf point */
                descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
+               rte_compiler_barrier();
                descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
 
                /* B.2 copy 2 mbuf point into rx_pkts  */
@@ -393,6 +441,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                                 pkt_mb2);
                _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
                                 pkt_mb1);
+               desc_to_ptype_v(descs, &rx_pkts[pos]);
                /* C.4 calc avaialbe number of desc */
                var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
                nb_pkts_recd += var;
@@ -421,68 +470,6 @@ i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
        return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
-static inline uint16_t
-reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs,
-                  uint16_t nb_bufs, uint8_t *split_flags)
-{
-       struct rte_mbuf *pkts[RTE_I40E_VPMD_RX_BURST]; /*finished pkts*/
-       struct rte_mbuf *start = rxq->pkt_first_seg;
-       struct rte_mbuf *end =  rxq->pkt_last_seg;
-       unsigned pkt_idx, buf_idx;
-
-       for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
-               if (end != NULL) {
-                       /* processing a split packet */
-                       end->next = rx_bufs[buf_idx];
-                       rx_bufs[buf_idx]->data_len += rxq->crc_len;
-
-                       start->nb_segs++;
-                       start->pkt_len += rx_bufs[buf_idx]->data_len;
-                       end = end->next;
-
-                       if (!split_flags[buf_idx]) {
-                               /* it's the last packet of the set */
-                               start->hash = end->hash;
-                               start->ol_flags = end->ol_flags;
-                               /* we need to strip crc for the whole packet */
-                               start->pkt_len -= rxq->crc_len;
-                               if (end->data_len > rxq->crc_len) {
-                                       end->data_len -= rxq->crc_len;
-                               } else {
-                                       /* free up last mbuf */
-                                       struct rte_mbuf *secondlast = start;
-
-                                       while (secondlast->next != end)
-                                               secondlast = secondlast->next;
-                                       secondlast->data_len -= (rxq->crc_len -
-                                                       end->data_len);
-                                       secondlast->next = NULL;
-                                       rte_pktmbuf_free_seg(end);
-                                       end = secondlast;
-                               }
-                               pkts[pkt_idx++] = start;
-                               start = end = NULL;
-                       }
-               } else {
-                       /* not processing a split packet */
-                       if (!split_flags[buf_idx]) {
-                               /* not a split packet, save and skip */
-                               pkts[pkt_idx++] = rx_bufs[buf_idx];
-                               continue;
-                       }
-                       end = start = rx_bufs[buf_idx];
-                       rx_bufs[buf_idx]->data_len += rxq->crc_len;
-                       rx_bufs[buf_idx]->pkt_len += rxq->crc_len;
-               }
-       }
-
-       /* save the partial packet for next time */
-       rxq->pkt_first_seg = start;
-       rxq->pkt_last_seg = end;
-       memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts)));
-       return pkt_idx;
-}
-
  /* vPMD receive routine that reassembles scattered packets
  * Notice:
  * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
@@ -548,73 +535,6 @@ vtx(volatile struct i40e_tx_desc *txdp,
                vtx1(txdp, *pkt, flags);
 }
 
-static inline int __attribute__((always_inline))
-i40e_tx_free_bufs(struct i40e_tx_queue *txq)
-{
-       struct i40e_tx_entry *txep;
-       uint32_t n;
-       uint32_t i;
-       int nb_free = 0;
-       struct rte_mbuf *m, *free[RTE_I40E_TX_MAX_FREE_BUF_SZ];
-
-       /* check DD bits on threshold descriptor */
-       if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
-                       rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
-                       rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
-               return 0;
-
-       n = txq->tx_rs_thresh;
-
-        /* first buffer to free from S/W ring is at index
-         * tx_next_dd - (tx_rs_thresh-1)
-         */
-       txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
-       m = __rte_pktmbuf_prefree_seg(txep[0].mbuf);
-       if (likely(m != NULL)) {
-               free[0] = m;
-               nb_free = 1;
-               for (i = 1; i < n; i++) {
-                       m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
-                       if (likely(m != NULL)) {
-                               if (likely(m->pool == free[0]->pool)) {
-                                       free[nb_free++] = m;
-                               } else {
-                                       rte_mempool_put_bulk(free[0]->pool,
-                                                            (void *)free,
-                                                            nb_free);
-                                       free[0] = m;
-                                       nb_free = 1;
-                               }
-                       }
-               }
-               rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
-       } else {
-               for (i = 1; i < n; i++) {
-                       m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
-                       if (m != NULL)
-                               rte_mempool_put(m->pool, m);
-               }
-       }
-
-       /* buffers were freed, update counters */
-       txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-       txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-       if (txq->tx_next_dd >= txq->nb_tx_desc)
-               txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-       return txq->tx_rs_thresh;
-}
-
-static inline void __attribute__((always_inline))
-tx_backlog_entry(struct i40e_tx_entry *txep,
-                struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
-{
-       int i;
-
-       for (i = 0; i < (int)nb_pkts; ++i)
-               txep[i].mbuf = tx_pkts[i];
-}
-
 uint16_t
 i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
                   uint16_t nb_pkts)
@@ -685,37 +605,13 @@ i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 void __attribute__((cold))
 i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
 {
-       const unsigned mask = rxq->nb_rx_desc - 1;
-       unsigned i;
-
-       if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc)
-               return;
-
-       /* free all mbufs that are valid in the ring */
-       for (i = rxq->rx_tail; i != rxq->rxrearm_start; i = (i + 1) & mask)
-               rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
-       rxq->rxrearm_nb = rxq->nb_rx_desc;
-
-       /* set all entries to NULL */
-       memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc);
+       _i40e_rx_queue_release_mbufs_vec(rxq);
 }
 
 int __attribute__((cold))
 i40e_rxq_vec_setup(struct i40e_rx_queue *rxq)
 {
-       uintptr_t p;
-       struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
-
-       mb_def.nb_segs = 1;
-       mb_def.data_off = RTE_PKTMBUF_HEADROOM;
-       mb_def.port = rxq->port_id;
-       rte_mbuf_refcnt_set(&mb_def, 1);
-
-       /* prevent compiler reordering: rearm_data covers previous fields */
-       rte_compiler_barrier();
-       p = (uintptr_t)&mb_def.rearm_data;
-       rxq->mbuf_initializer = *(uint64_t *)p;
-       return 0;
+       return i40e_rxq_vec_setup_default(rxq);
 }
 
 int __attribute__((cold))
@@ -728,34 +624,10 @@ int __attribute__((cold))
 i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
 {
 #ifndef RTE_LIBRTE_IEEE1588
-       struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
-       struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf;
-
        /* need SSE4.1 support */
        if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1))
                return -1;
-
-#ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE
-       /* whithout rx ol_flags, no VP flag report */
-       if (rxmode->hw_vlan_strip != 0 ||
-           rxmode->hw_vlan_extend != 0)
-               return -1;
 #endif
 
-       /* no fdir support */
-       if (fconf->mode != RTE_FDIR_MODE_NONE)
-               return -1;
-
-        /* - no csum error report support
-        * - no header split support
-        */
-       if (rxmode->hw_ip_checksum == 1 ||
-           rxmode->header_split == 1)
-               return -1;
-
-       return 0;
-#else
-       RTE_SET_USED(dev);
-       return -1;
-#endif
+       return i40e_rx_vec_dev_conf_condition_check_default(dev);
 }
diff --git a/src/dpdk/drivers/net/i40e/rte_pmd_i40e.h b/src/dpdk/drivers/net/i40e/rte_pmd_i40e.h
new file mode 100644 (file)
index 0000000..a0ad88c
--- /dev/null
@@ -0,0 +1,335 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _PMD_I40E_H_
+#define _PMD_I40E_H_
+
+/**
+ * @file rte_pmd_i40e.h
+ *
+ * i40e PMD specific functions.
+ *
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ */
+
+#include <rte_ethdev.h>
+
+/**
+ * Response sent back to i40e driver from user app after callback
+ */
+enum rte_pmd_i40e_mb_event_rsp {
+       RTE_PMD_I40E_MB_EVENT_NOOP_ACK,  /**< skip mbox request and ACK */
+       RTE_PMD_I40E_MB_EVENT_NOOP_NACK, /**< skip mbox request and NACK */
+       RTE_PMD_I40E_MB_EVENT_PROCEED,  /**< proceed with mbox request  */
+       RTE_PMD_I40E_MB_EVENT_MAX       /**< max value of this enum */
+};
+
+/**
+ * Data sent to the user application when the callback is executed.
+ */
+struct rte_pmd_i40e_mb_event_param {
+       uint16_t vfid;     /**< Virtual Function number */
+       uint16_t msg_type; /**< VF to PF message type, see i40e_virtchnl_ops */
+       uint16_t retval;   /**< return value */
+       void *msg;         /**< pointer to message */
+       uint16_t msglen;   /**< length of the message */
+};
+
+/**
+ * Notify VF when PF link status changes.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param vf
+ *   VF id.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if *vf* invalid.
+ */
+int rte_pmd_i40e_ping_vfs(uint8_t port, uint16_t vf);
+
+/**
+ * Enable/Disable VF MAC anti spoofing.
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf_id
+ *    VF on which to set MAC anti spoofing.
+ * @param on
+ *    1 - Enable VFs MAC anti spoofing.
+ *    0 - Disable VFs MAC anti spoofing.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_i40e_set_vf_mac_anti_spoof(uint8_t port,
+                                      uint16_t vf_id,
+                                      uint8_t on);
+
+/**
+ * Enable/Disable VF VLAN anti spoofing.
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf_id
+ *    VF on which to set VLAN anti spoofing.
+ * @param on
+ *    1 - Enable VFs VLAN anti spoofing.
+ *    0 - Disable VFs VLAN anti spoofing.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_i40e_set_vf_vlan_anti_spoof(uint8_t port,
+                                       uint16_t vf_id,
+                                       uint8_t on);
+
+/**
+ * Enable/Disable TX loopback on all the PF and VFs.
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param on
+ *    1 - Enable TX loopback.
+ *    0 - Disable TX loopback.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_i40e_set_tx_loopback(uint8_t port,
+                                uint8_t on);
+
+/**
+ * Enable/Disable VF unicast promiscuous mode.
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf_id
+ *    VF on which to set.
+ * @param on
+ *    1 - Enable.
+ *    0 - Disable.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_i40e_set_vf_unicast_promisc(uint8_t port,
+                                       uint16_t vf_id,
+                                       uint8_t on);
+
+/**
+ * Enable/Disable VF multicast promiscuous mode.
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf_id
+ *    VF on which to set.
+ * @param on
+ *    1 - Enable.
+ *    0 - Disable.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_i40e_set_vf_multicast_promisc(uint8_t port,
+                                         uint16_t vf_id,
+                                         uint8_t on);
+
+/**
+ * Set the VF MAC address.
+ *
+ * PF should set MAC address before VF initialized, if PF sets the MAC
+ * address after VF initialized, new MAC address won't be effective until
+ * VF reinitialize.
+ *
+ * This will remove all existing MAC filters.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param vf_id
+ *   VF id.
+ * @param mac_addr
+ *   VF MAC address.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if *vf* or *mac_addr* is invalid.
+ */
+int rte_pmd_i40e_set_vf_mac_addr(uint8_t port, uint16_t vf_id,
+                                struct ether_addr *mac_addr);
+
+/**
+ * Enable/Disable vf vlan strip for all queues in a pool
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf
+ *    ID specifying VF.
+ * @param on
+ *    1 - Enable VF's vlan strip on RX queues.
+ *    0 - Disable VF's vlan strip on RX queues.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int
+rte_pmd_i40e_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on);
+
+/**
+ * Enable/Disable vf vlan insert
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf_id
+ *    ID specifying VF.
+ * @param vlan_id
+ *    0 - Disable VF's vlan insert.
+ *    n - Enable; n is inserted as the vlan id.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id,
+                                   uint16_t vlan_id);
+
+/**
+ * Enable/Disable vf broadcast mode
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf_id
+ *    ID specifying VF.
+ * @param on
+ *    0 - Disable broadcast.
+ *    1 - Enable broadcast.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_i40e_set_vf_broadcast(uint8_t port, uint16_t vf_id,
+                                 uint8_t on);
+
+/**
+ * Enable/Disable vf vlan tag
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf_id
+ *    ID specifying VF.
+ * @param on
+ *    0 - Disable VF's vlan tag.
+ *    n - Enable VF's vlan tag.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_i40e_set_vf_vlan_tag(uint8_t port, uint16_t vf_id, uint8_t on);
+
+/**
+ * Enable/Disable VF VLAN filter
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vlan_id
+ *    ID specifying VLAN
+ * @param vf_mask
+ *    Mask to filter VF's
+ * @param on
+ *    0 - Disable VF's VLAN filter.
+ *    1 - Enable VF's VLAN filter.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ *   - (-ENOTSUP) not supported by firmware.
+ */
+int rte_pmd_i40e_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id,
+                                   uint64_t vf_mask, uint8_t on);
+
+/**
+ * Get VF's statistics
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf_id
+ *    VF on which to get.
+ * @param stats
+ *    A pointer to a structure of type *rte_eth_stats* to be filled with
+ *    the values of device counters for the following set of statistics:
+ *   - *ipackets* with the total of successfully received packets.
+ *   - *opackets* with the total of successfully transmitted packets.
+ *   - *ibytes*   with the total of successfully received bytes.
+ *   - *obytes*   with the total of successfully transmitted bytes.
+ *   - *ierrors*  with the total of erroneous received packets.
+ *   - *oerrors*  with the total of failed transmitted packets.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+
+int rte_pmd_i40e_get_vf_stats(uint8_t port,
+                             uint16_t vf_id,
+                             struct rte_eth_stats *stats);
+
+/**
+ * Clear VF's statistics
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf_id
+ *    VF on which to get.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_i40e_reset_vf_stats(uint8_t port,
+                               uint16_t vf_id);
+
+#endif /* _PMD_I40E_H_ */
index db80880..724dcbb 100644 (file)
@@ -995,19 +995,19 @@ STATIC s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
  *  @vlan: VLAN id to write to VLAN filter
  *  @vind: VMDq output index that maps queue to VLAN id in VFTA
  *  @vlan_on: boolean flag to turn on/off VLAN in VFTA
- *  @bypass_vlvf: boolean flag - unused
+ *  @vlvf_bypass: boolean flag - unused
  *
  *  Turn on/off specified VLAN in the VLAN filter table.
  **/
 s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind,
-                        bool vlan_on, bool bypass_vlvf)
+                        bool vlan_on, bool vlvf_bypass)
 {
        u32 regindex;
        u32 bitindex;
        u32 bits;
        u32 vftabyte;
 
-       UNREFERENCED_1PARAMETER(bypass_vlvf);
+       UNREFERENCED_1PARAMETER(vlvf_bypass);
 
        DEBUGFUNC("ixgbe_set_vfta_82598");
 
index 5bc7c2b..832242e 100644 (file)
@@ -1178,6 +1178,7 @@ mac_reset_top:
        if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
                /* Save the SAN MAC RAR index */
                hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
+
                hw->mac.ops.set_rar(hw, hw->mac.san_mac_rar_index,
                                    hw->mac.san_addr, 0, IXGBE_RAH_AV);
 
@@ -1809,14 +1810,23 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
                }
                IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIP6M, fdirip6m);
 
-               /* Set all bits in FDIRTCPM, FDIRUDPM, FDIRSIP4M and
-                * FDIRDIP4M in cloud mode to allow L3/L3 packets to
-                * tunnel.
+               /* Set all bits in FDIRTCPM, FDIRUDPM, FDIRSCTPM,
+                * FDIRSIP4M and FDIRDIP4M in cloud mode to allow
+                * L3/L3 packets to tunnel.
                 */
                IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, 0xFFFFFFFF);
                IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, 0xFFFFFFFF);
                IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M, 0xFFFFFFFF);
                IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M, 0xFFFFFFFF);
+               switch (hw->mac.type) {
+               case ixgbe_mac_X550:
+               case ixgbe_mac_X550EM_x:
+               case ixgbe_mac_X550EM_a:
+                       IXGBE_WRITE_REG(hw, IXGBE_FDIRSCTPM, 0xFFFFFFFF);
+                       break;
+               default:
+                       break;
+               }
        }
 
        /* Now mask VM pool and destination IPv6 - bits 5 and 2 */
index 1786867..270a97d 100644 (file)
@@ -106,8 +106,10 @@ s32 ixgbe_init_shared_code(struct ixgbe_hw *hw)
                status = ixgbe_init_ops_X550(hw);
                break;
        case ixgbe_mac_X550EM_x:
+               status = ixgbe_init_ops_X550EM_x(hw);
+               break;
        case ixgbe_mac_X550EM_a:
-               status = ixgbe_init_ops_X550EM(hw);
+               status = ixgbe_init_ops_X550EM_a(hw);
                break;
        case ixgbe_mac_82599_vf:
        case ixgbe_mac_X540_vf:
@@ -203,6 +205,7 @@ s32 ixgbe_set_mac_type(struct ixgbe_hw *hw)
        case IXGBE_DEV_ID_X550EM_X_10G_T:
        case IXGBE_DEV_ID_X550EM_X_1G_T:
        case IXGBE_DEV_ID_X550EM_X_SFP:
+       case IXGBE_DEV_ID_X550EM_X_XFI:
                hw->mac.type = ixgbe_mac_X550EM_x;
                hw->mvals = ixgbe_mvals_X550EM_x;
                break;
@@ -1090,7 +1093,7 @@ s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on,
                   bool vlvf_bypass)
 {
        return ixgbe_call_func(hw, hw->mac.ops.set_vfta, (hw, vlan, vind,
-                                 vlan_on, vlvf_bypass), IXGBE_NOT_IMPLEMENTED);
+                              vlan_on, vlvf_bypass), IXGBE_NOT_IMPLEMENTED);
 }
 
 /**
@@ -1100,7 +1103,7 @@ s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on,
  *  @vind: VMDq output index that maps queue to VLAN id in VLVFB
  *  @vlan_on: boolean flag to turn on/off VLAN in VLVF
  *  @vfta_delta: pointer to the difference between the current value of VFTA
- *               and the desired value
+ *              and the desired value
  *  @vfta: the desired value of the VFTA
  *  @vlvf_bypass: boolean flag indicating updating the default pool is okay
  *
@@ -1110,7 +1113,7 @@ s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on,
                   u32 *vfta_delta, u32 vfta, bool vlvf_bypass)
 {
        return ixgbe_call_func(hw, hw->mac.ops.set_vlvf, (hw, vlan, vind,
-                               vlan_on, vfta_delta, vfta, vlvf_bypass),
+                              vlan_on, vfta_delta, vfta, vlvf_bypass),
                               IXGBE_NOT_IMPLEMENTED);
 }
 
@@ -1145,12 +1148,15 @@ s32 ixgbe_setup_fc(struct ixgbe_hw *hw)
  * @min: driver minor number to be sent to firmware
  * @build: driver build number to be sent to firmware
  * @ver: driver version number to be sent to firmware
+ * @len: length of driver_ver string
+ * @driver_ver: driver string
  **/
 s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build,
-                        u8 ver)
+                        u8 ver, u16 len, char *driver_ver)
 {
        return ixgbe_call_func(hw, hw->mac.ops.set_fw_drv_ver, (hw, maj, min,
-                              build, ver), IXGBE_NOT_IMPLEMENTED);
+                              build, ver, len, driver_ver),
+                              IXGBE_NOT_IMPLEMENTED);
 }
 
 
@@ -1659,6 +1665,7 @@ void ixgbe_init_swfw_semaphore(struct ixgbe_hw *hw)
                hw->mac.ops.init_swfw_sync(hw);
 }
 
+
 void ixgbe_disable_rx(struct ixgbe_hw *hw)
 {
        if (hw->mac.ops.disable_rx)
index 3aad1da..af85d4e 100644 (file)
@@ -45,6 +45,8 @@ extern s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw);
 extern s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw);
 extern s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw);
 extern s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw);
+extern s32 ixgbe_init_ops_X550EM_x(struct ixgbe_hw *hw);
+extern s32 ixgbe_init_ops_X550EM_a(struct ixgbe_hw *hw);
 extern s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw);
 
 s32 ixgbe_set_mac_type(struct ixgbe_hw *hw);
@@ -131,7 +133,7 @@ s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
 s32 ixgbe_fc_enable(struct ixgbe_hw *hw);
 s32 ixgbe_setup_fc(struct ixgbe_hw *hw);
 s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build,
-                        u8 ver);
+                        u8 ver, u16 len, char *driver_ver);
 s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw);
 s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw);
 void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr);
index 811875a..9645667 100644 (file)
@@ -113,6 +113,7 @@ s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw)
        mac->ops.led_off = ixgbe_led_off_generic;
        mac->ops.blink_led_start = ixgbe_blink_led_start_generic;
        mac->ops.blink_led_stop = ixgbe_blink_led_stop_generic;
+       mac->ops.init_led_link_act = ixgbe_init_led_link_act_generic;
 
        /* RAR, Multicast, VLAN */
        mac->ops.set_rar = ixgbe_set_rar_generic;
@@ -168,13 +169,24 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
        switch (hw->phy.media_type) {
        case ixgbe_media_type_fiber_qsfp:
        case ixgbe_media_type_fiber:
-               hw->mac.ops.check_link(hw, &speed, &link_up, false);
-               /* if link is down, assume supported */
-               if (link_up)
-                       supported = speed == IXGBE_LINK_SPEED_1GB_FULL ?
+               /* flow control autoneg black list */
+               switch (hw->device_id) {
+               case IXGBE_DEV_ID_X550EM_A_SFP:
+               case IXGBE_DEV_ID_X550EM_A_SFP_N:
+               case IXGBE_DEV_ID_X550EM_A_QSFP:
+               case IXGBE_DEV_ID_X550EM_A_QSFP_N:
+                       supported = false;
+                       break;
+               default:
+                       hw->mac.ops.check_link(hw, &speed, &link_up, false);
+                       /* if link is down, assume supported */
+                       if (link_up)
+                               supported = speed == IXGBE_LINK_SPEED_1GB_FULL ?
                                true : false;
-               else
-                       supported = true;
+                       else
+                               supported = true;
+               }
+
                break;
        case ixgbe_media_type_backplane:
                supported = true;
@@ -188,6 +200,9 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
                case IXGBE_DEV_ID_X550T:
                case IXGBE_DEV_ID_X550T1:
                case IXGBE_DEV_ID_X550EM_X_10G_T:
+               case IXGBE_DEV_ID_X550EM_A_10G_T:
+               case IXGBE_DEV_ID_X550EM_A_1G_T:
+               case IXGBE_DEV_ID_X550EM_A_1G_T_L:
                        supported = true;
                        break;
                default:
@@ -197,9 +212,10 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
                break;
        }
 
-       ERROR_REPORT2(IXGBE_ERROR_UNSUPPORTED,
-                     "Device %x does not support flow control autoneg",
-                     hw->device_id);
+       if (!supported)
+               ERROR_REPORT2(IXGBE_ERROR_UNSUPPORTED,
+                             "Device %x does not support flow control autoneg",
+                             hw->device_id);
        return supported;
 }
 
@@ -371,6 +387,7 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
 {
        s32 ret_val;
        u32 ctrl_ext;
+       u16 device_caps;
 
        DEBUGFUNC("ixgbe_start_hw_generic");
 
@@ -393,14 +410,31 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
 
        /* Setup flow control */
        ret_val = ixgbe_setup_fc(hw);
-       if (ret_val != IXGBE_SUCCESS)
-               goto out;
+       if (ret_val != IXGBE_SUCCESS && ret_val != IXGBE_NOT_IMPLEMENTED) {
+               DEBUGOUT1("Flow control setup failed, returning %d\n", ret_val);
+               return ret_val;
+       }
+
+       /* Cache bit indicating need for crosstalk fix */
+       switch (hw->mac.type) {
+       case ixgbe_mac_82599EB:
+       case ixgbe_mac_X550EM_x:
+       case ixgbe_mac_X550EM_a:
+               hw->mac.ops.get_device_caps(hw, &device_caps);
+               if (device_caps & IXGBE_DEVICE_CAPS_NO_CROSSTALK_WR)
+                       hw->need_crosstalk_fix = false;
+               else
+                       hw->need_crosstalk_fix = true;
+               break;
+       default:
+               hw->need_crosstalk_fix = false;
+               break;
+       }
 
        /* Clear adapter stopped flag */
        hw->adapter_stopped = false;
 
-out:
-       return ret_val;
+       return IXGBE_SUCCESS;
 }
 
 /**
@@ -466,6 +500,12 @@ s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw)
                status = hw->mac.ops.start_hw(hw);
        }
 
+       /* Initialize the LED link active for LED blink support */
+       hw->mac.ops.init_led_link_act(hw);
+
+       if (status != IXGBE_SUCCESS)
+               DEBUGOUT1("Failed to initialize HW, STATUS = %d\n", status);
+
        return status;
 }
 
@@ -1046,7 +1086,7 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw)
        if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP) {
                hw->eeprom.ops.read(hw, IXGBE_EEPROM_CTRL_4, &ee_ctrl_4);
                bus->instance_id = (ee_ctrl_4 & IXGBE_EE_CTRL_4_INST_ID) >>
-                       IXGBE_EE_CTRL_4_INST_ID_SHIFT;
+                                  IXGBE_EE_CTRL_4_INST_ID_SHIFT;
        }
 }
 
@@ -1104,6 +1144,47 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw)
        return ixgbe_disable_pcie_master(hw);
 }
 
+/**
+ *  ixgbe_init_led_link_act_generic - Store the LED index link/activity.
+ *  @hw: pointer to hardware structure
+ *
+ *  Store the index for the link active LED. This will be used to support
+ *  blinking the LED.
+ **/
+s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw)
+{
+       struct ixgbe_mac_info *mac = &hw->mac;
+       u32 led_reg, led_mode;
+       u8 i;
+
+       led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+       /* Get LED link active from the LEDCTL register */
+       for (i = 0; i < 4; i++) {
+               led_mode = led_reg >> IXGBE_LED_MODE_SHIFT(i);
+
+               if ((led_mode & IXGBE_LED_MODE_MASK_BASE) ==
+                    IXGBE_LED_LINK_ACTIVE) {
+                       mac->led_link_act = i;
+                       return IXGBE_SUCCESS;
+               }
+       }
+
+       /*
+        * If LEDCTL register does not have the LED link active set, then use
+        * known MAC defaults.
+        */
+       switch (hw->mac.type) {
+       case ixgbe_mac_X550EM_a:
+       case ixgbe_mac_X550EM_x:
+               mac->led_link_act = 1;
+               break;
+       default:
+               mac->led_link_act = 2;
+       }
+       return IXGBE_SUCCESS;
+}
+
 /**
  *  ixgbe_led_on_generic - Turns on the software controllable LEDs.
  *  @hw: pointer to hardware structure
@@ -1115,6 +1196,9 @@ s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index)
 
        DEBUGFUNC("ixgbe_led_on_generic");
 
+       if (index > 3)
+               return IXGBE_ERR_PARAM;
+
        /* To turn on the LED, set mode to ON. */
        led_reg &= ~IXGBE_LED_MODE_MASK(index);
        led_reg |= IXGBE_LED_ON << IXGBE_LED_MODE_SHIFT(index);
@@ -1135,6 +1219,9 @@ s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index)
 
        DEBUGFUNC("ixgbe_led_off_generic");
 
+       if (index > 3)
+               return IXGBE_ERR_PARAM;
+
        /* To turn off the LED, set mode to OFF. */
        led_reg &= ~IXGBE_LED_MODE_MASK(index);
        led_reg |= IXGBE_LED_OFF << IXGBE_LED_MODE_SHIFT(index);
@@ -2851,7 +2938,7 @@ out:
  *  advertised settings
  **/
 s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
-                             u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm)
+                      u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm)
 {
        if ((!(adv_reg)) ||  (!(lp_reg))) {
                ERROR_REPORT3(IXGBE_ERROR_UNSUPPORTED,
@@ -3323,7 +3410,7 @@ s32 prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked)
  **/
 s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw)
 {
-       int secrxreg;
+       u32 secrxreg;
 
        DEBUGFUNC("ixgbe_enable_sec_rx_path_generic");
 
@@ -3370,6 +3457,9 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index)
 
        DEBUGFUNC("ixgbe_blink_led_start_generic");
 
+       if (index > 3)
+               return IXGBE_ERR_PARAM;
+
        /*
         * Link must be up to auto-blink the LEDs;
         * Force it if link is down.
@@ -3415,6 +3505,10 @@ s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index)
 
        DEBUGFUNC("ixgbe_blink_led_stop_generic");
 
+       if (index > 3)
+               return IXGBE_ERR_PARAM;
+
+
        ret_val = hw->mac.ops.prot_autoc_read(hw, &locked, &autoc_reg);
        if (ret_val != IXGBE_SUCCESS)
                goto out;
@@ -3720,7 +3814,8 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
        }
 
        /* was that the last pool using this rar? */
-       if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0)
+       if (mpsar_lo == 0 && mpsar_hi == 0 &&
+           rar != 0 && rar != hw->mac.san_mac_rar_index)
                hw->mac.ops.clear_rar(hw, rar);
 done:
        return IXGBE_SUCCESS;
@@ -3887,7 +3982,8 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
        vfta_delta = 1 << (vlan % 32);
        vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regidx));
 
-       /* vfta_delta represents the difference between the current value
+       /*
+        * vfta_delta represents the difference between the current value
         * of vfta and the value we want in the register.  Since the diff
         * is an XOR mask we can just update the vfta using an XOR
         */
@@ -3920,7 +4016,7 @@ vfta_update:
  *  @vind: VMDq output index that maps queue to VLAN id in VLVFB
  *  @vlan_on: boolean flag to turn on/off VLAN in VLVF
  *  @vfta_delta: pointer to the difference between the current value of VFTA
- * and the desired value
+ *              and the desired value
  *  @vfta: the desired value of the VFTA
  *  @vlvf_bypass: boolean flag indicating updating default pool is okay
  *
@@ -3947,6 +4043,7 @@ s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
         */
        if (!(IXGBE_READ_REG(hw, IXGBE_VT_CTL) & IXGBE_VT_CTL_VT_ENABLE))
                return IXGBE_SUCCESS;
+
        vlvf_index = ixgbe_find_vlvf_slot(hw, vlan, vlvf_bypass);
        if (vlvf_index < 0)
                return vlvf_index;
@@ -3967,7 +4064,7 @@ s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
                 * we run the risk of stray packets leaking into
                 * the PF via the default pool
                 */
-               if (vfta_delta)
+               if (*vfta_delta)
                        IXGBE_WRITE_REG(hw, IXGBE_VFTA(vlan / 32), vfta);
 
                /* disable VLVF and clear remaining bit from pool */
@@ -3976,6 +4073,7 @@ s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
 
                return IXGBE_SUCCESS;
        }
+
        /* If there are still bits set in the VLVFB registers
         * for the VLAN ID indicated we need to see if the
         * caller is requesting that we clear the VFTA entry bit.
@@ -4024,6 +4122,32 @@ s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw)
        return IXGBE_SUCCESS;
 }
 
+/**
+ *  ixgbe_need_crosstalk_fix - Determine if we need to do cross talk fix
+ *  @hw: pointer to hardware structure
+ *
+ *  Contains the logic to identify if we need to verify link for the
+ *  crosstalk fix
+ **/
+static bool ixgbe_need_crosstalk_fix(struct ixgbe_hw *hw)
+{
+
+       /* Does FW say we need the fix */
+       if (!hw->need_crosstalk_fix)
+               return false;
+
+       /* Only consider SFP+ PHYs i.e. media type fiber */
+       switch (hw->mac.ops.get_media_type(hw)) {
+       case ixgbe_media_type_fiber:
+       case ixgbe_media_type_fiber_qsfp:
+               break;
+       default:
+               return false;
+       }
+
+       return true;
+}
+
 /**
  *  ixgbe_check_mac_link_generic - Determine link and speed status
  *  @hw: pointer to hardware structure
@@ -4041,6 +4165,35 @@ s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
 
        DEBUGFUNC("ixgbe_check_mac_link_generic");
 
+       /* If Crosstalk fix enabled do the sanity check of making sure
+        * the SFP+ cage is full.
+        */
+       if (ixgbe_need_crosstalk_fix(hw)) {
+               u32 sfp_cage_full;
+
+               switch (hw->mac.type) {
+               case ixgbe_mac_82599EB:
+                       sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) &
+                                       IXGBE_ESDP_SDP2;
+                       break;
+               case ixgbe_mac_X550EM_x:
+               case ixgbe_mac_X550EM_a:
+                       sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) &
+                                       IXGBE_ESDP_SDP0;
+                       break;
+               default:
+                       /* sanity check - No SFP+ devices here */
+                       sfp_cage_full = false;
+                       break;
+               }
+
+               if (!sfp_cage_full) {
+                       *link_up = false;
+                       *speed = IXGBE_LINK_SPEED_UNKNOWN;
+                       return IXGBE_SUCCESS;
+               }
+       }
+
        /* clear the old state */
        links_orig = IXGBE_READ_REG(hw, IXGBE_LINKS);
 
@@ -4082,11 +4235,18 @@ s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
                break;
        case IXGBE_LINKS_SPEED_100_82599:
                *speed = IXGBE_LINK_SPEED_100_FULL;
-               if (hw->mac.type >= ixgbe_mac_X550) {
+               if (hw->mac.type == ixgbe_mac_X550) {
                        if (links_reg & IXGBE_LINKS_SPEED_NON_STD)
                                *speed = IXGBE_LINK_SPEED_5GB_FULL;
                }
                break;
+       case IXGBE_LINKS_SPEED_10_X550EM_A:
+               *speed = IXGBE_LINK_SPEED_UNKNOWN;
+               if (hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T ||
+                   hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L) {
+                       *speed = IXGBE_LINK_SPEED_10_FULL;
+               }
+               break;
        default:
                *speed = IXGBE_LINK_SPEED_UNKNOWN;
        }
@@ -4318,43 +4478,31 @@ u8 ixgbe_calculate_checksum(u8 *buffer, u32 length)
 }
 
 /**
- *  ixgbe_host_interface_command - Issue command to manageability block
+ *  ixgbe_hic_unlocked - Issue command to manageability block unlocked
  *  @hw: pointer to the HW structure
- *  @buffer: contains the command to write and where the return status will
- *   be placed
+ *  @buffer: command to write and where the return status will be placed
  *  @length: length of buffer, must be multiple of 4 bytes
  *  @timeout: time in ms to wait for command completion
- *  @return_data: read and return data from the buffer (true) or not (false)
- *   Needed because FW structures are big endian and decoding of
- *   these fields can be 8 bit or 16 bit based on command. Decoding
- *   is not easily understood without making a table of commands.
- *   So we will leave this up to the caller to read back the data
- *   in these cases.
  *
  *  Communicates with the manageability block. On success return IXGBE_SUCCESS
  *  else returns semaphore error when encountering an error acquiring
  *  semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
+ *
+ *  This function assumes that the IXGBE_GSSR_SW_MNG_SM semaphore is held
+ *  by the caller.
  **/
-s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
-                                u32 length, u32 timeout, bool return_data)
+s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length,
+                      u32 timeout)
 {
-       u32 hicr, i, bi, fwsts;
-       u32 hdr_size = sizeof(struct ixgbe_hic_hdr);
-       u16 buf_len;
+       u32 hicr, i, fwsts;
        u16 dword_len;
-       s32 status;
 
-       DEBUGFUNC("ixgbe_host_interface_command");
+       DEBUGFUNC("ixgbe_hic_unlocked");
 
-       if (length == 0 || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
+       if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
                DEBUGOUT1("Buffer length failure buffersize=%d.\n", length);
                return IXGBE_ERR_HOST_INTERFACE_COMMAND;
        }
-       /* Take management host interface semaphore */
-       status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM);
-
-       if (status)
-               return status;
 
        /* Set bit 9 of FWSTS clearing FW reset indication */
        fwsts = IXGBE_READ_REG(hw, IXGBE_FWSTS);
@@ -4362,17 +4510,15 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
 
        /* Check that the host interface is enabled. */
        hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
-       if ((hicr & IXGBE_HICR_EN) == 0) {
+       if (!(hicr & IXGBE_HICR_EN)) {
                DEBUGOUT("IXGBE_HOST_EN bit disabled.\n");
-               status = IXGBE_ERR_HOST_INTERFACE_COMMAND;
-               goto rel_out;
+               return IXGBE_ERR_HOST_INTERFACE_COMMAND;
        }
 
        /* Calculate length in DWORDs. We must be DWORD aligned */
-       if ((length % (sizeof(u32))) != 0) {
+       if (length % sizeof(u32)) {
                DEBUGOUT("Buffer length failure, not aligned to dword");
-               status = IXGBE_ERR_INVALID_ARGUMENT;
-               goto rel_out;
+               return IXGBE_ERR_INVALID_ARGUMENT;
        }
 
        dword_len = length >> 2;
@@ -4395,14 +4541,59 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
        }
 
        /* Check command completion */
-       if ((timeout != 0 && i == timeout) ||
+       if ((timeout && i == timeout) ||
            !(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV)) {
                ERROR_REPORT1(IXGBE_ERROR_CAUTION,
                             "Command has failed with no status valid.\n");
-               status = IXGBE_ERR_HOST_INTERFACE_COMMAND;
-               goto rel_out;
+               return IXGBE_ERR_HOST_INTERFACE_COMMAND;
        }
 
+       return IXGBE_SUCCESS;
+}
+
+/**
+ *  ixgbe_host_interface_command - Issue command to manageability block
+ *  @hw: pointer to the HW structure
+ *  @buffer: contains the command to write and where the return status will
+ *   be placed
+ *  @length: length of buffer, must be multiple of 4 bytes
+ *  @timeout: time in ms to wait for command completion
+ *  @return_data: read and return data from the buffer (true) or not (false)
+ *   Needed because FW structures are big endian and decoding of
+ *   these fields can be 8 bit or 16 bit based on command. Decoding
+ *   is not easily understood without making a table of commands.
+ *   So we will leave this up to the caller to read back the data
+ *   in these cases.
+ *
+ *  Communicates with the manageability block. On success return IXGBE_SUCCESS
+ *  else returns semaphore error when encountering an error acquiring
+ *  semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
+ **/
+s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
+                                u32 length, u32 timeout, bool return_data)
+{
+       u32 hdr_size = sizeof(struct ixgbe_hic_hdr);
+       u16 dword_len;
+       u16 buf_len;
+       s32 status;
+       u32 bi;
+
+       DEBUGFUNC("ixgbe_host_interface_command");
+
+       if (length == 0 || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
+               DEBUGOUT1("Buffer length failure buffersize=%d.\n", length);
+               return IXGBE_ERR_HOST_INTERFACE_COMMAND;
+       }
+
+       /* Take management host interface semaphore */
+       status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM);
+       if (status)
+               return status;
+
+       status = ixgbe_hic_unlocked(hw, buffer, length, timeout);
+       if (status)
+               goto rel_out;
+
        if (!return_data)
                goto rel_out;
 
@@ -4417,7 +4608,7 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
 
        /* If there is any thing in data position pull it in */
        buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len;
-       if (buf_len == 0)
+       if (!buf_len)
                goto rel_out;
 
        if (length < buf_len + hdr_size) {
@@ -4455,13 +4646,15 @@ rel_out:
  *  semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
  **/
 s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
-                                u8 build, u8 sub)
+                                u8 build, u8 sub, u16 len,
+                                const char *driver_ver)
 {
        struct ixgbe_hic_drv_info fw_cmd;
        int i;
        s32 ret_val = IXGBE_SUCCESS;
 
        DEBUGFUNC("ixgbe_set_fw_drv_ver_generic");
+       UNREFERENCED_2PARAMETER(len, driver_ver);
 
        fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO;
        fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN;
@@ -4923,14 +5116,6 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
                speedcnt++;
                highest_link_speed = IXGBE_LINK_SPEED_10GB_FULL;
 
-               /* If we already have link at this speed, just jump out */
-               status = ixgbe_check_link(hw, &link_speed, &link_up, false);
-               if (status != IXGBE_SUCCESS)
-                       return status;
-
-               if ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) && link_up)
-                       goto out;
-
                /* Set the module link speed */
                switch (hw->phy.media_type) {
                case ixgbe_media_type_fiber:
@@ -4981,14 +5166,6 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
                if (highest_link_speed == IXGBE_LINK_SPEED_UNKNOWN)
                        highest_link_speed = IXGBE_LINK_SPEED_1GB_FULL;
 
-               /* If we already have link at this speed, just jump out */
-               status = ixgbe_check_link(hw, &link_speed, &link_up, false);
-               if (status != IXGBE_SUCCESS)
-                       return status;
-
-               if ((link_speed == IXGBE_LINK_SPEED_1GB_FULL) && link_up)
-                       goto out;
-
                /* Set the module link speed */
                switch (hw->phy.media_type) {
                case ixgbe_media_type_fiber:
index 0545f85..903f34d 100644 (file)
@@ -72,6 +72,7 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw);
 
 s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index);
 s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw);
 
 s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw);
 s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
@@ -133,7 +134,7 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
 s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq);
 s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw);
 s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan,
-                          u32 vind, bool vlan_on, bool vlvf_bypass);
+                        u32 vind, bool vlan_on, bool vlvf_bypass);
 s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
                           bool vlan_on, u32 *vfta_delta, u32 vfta,
                           bool vlvf_bypass);
@@ -155,11 +156,14 @@ void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom,
                             int strategy);
 void ixgbe_enable_relaxed_ordering_gen2(struct ixgbe_hw *hw);
 s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
-                                u8 build, u8 ver);
+                                u8 build, u8 ver, u16 len, const char *str);
 u8 ixgbe_calculate_checksum(u8 *buffer, u32 length);
 s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
                                 u32 length, u32 timeout, bool return_data);
-
+s32 ixgbe_hic_unlocked(struct ixgbe_hw *, u32 *buffer, u32 length, u32 timeout);
+s32 ixgbe_shutdown_fw_phy(struct ixgbe_hw *);
+s32 ixgbe_fw_phy_activity(struct ixgbe_hw *, u16 activity,
+                         u32 (*data)[FW_PHY_ACT_DATA_COUNT]);
 void ixgbe_clear_tx_pending(struct ixgbe_hw *hw);
 
 extern s32 ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw);
diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c
new file mode 100644 (file)
index 0000000..47143a2
--- /dev/null
@@ -0,0 +1,240 @@
+/*******************************************************************************
+
+Copyright (c) 2001-2015, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include "ixgbe_vf.h"
+#include "ixgbe_hv_vf.h"
+
+/**
+ * Hyper-V variant - just a stub.
+ */
+static s32 ixgbevf_hv_update_mc_addr_list_vf(struct ixgbe_hw *hw, u8 *mc_addr_list,
+                                u32 mc_addr_count, ixgbe_mc_addr_itr next,
+                                bool clear)
+{
+       UNREFERENCED_5PARAMETER(hw, mc_addr_list, mc_addr_count, next, clear);
+
+       return IXGBE_ERR_FEATURE_NOT_SUPPORTED;
+}
+
+/**
+ * Hyper-V variant - just a stub.
+ */
+static s32 ixgbevf_hv_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
+{
+       UNREFERENCED_2PARAMETER(hw, xcast_mode);
+
+       return IXGBE_ERR_FEATURE_NOT_SUPPORTED;
+}
+
+/**
+ * Hyper-V variant - just a stub.
+ */
+static s32 ixgbevf_hv_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+                                 bool vlan_on, bool vlvf_bypass)
+{
+       UNREFERENCED_5PARAMETER(hw, vlan, vind, vlan_on, vlvf_bypass);
+
+       return IXGBE_ERR_FEATURE_NOT_SUPPORTED;
+}
+
+static s32 ixgbevf_hv_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
+{
+       UNREFERENCED_3PARAMETER(hw, index, addr);
+
+       return IXGBE_ERR_FEATURE_NOT_SUPPORTED;
+}
+
+/**
+ * Hyper-V variant - just a stub.
+ */
+static s32 ixgbevf_hv_reset_hw_vf(struct ixgbe_hw *hw)
+{
+       UNREFERENCED_PARAMETER(hw);
+
+       return IXGBE_ERR_FEATURE_NOT_SUPPORTED;
+}
+
+/**
+ * Hyper-V variant - just a stub.
+ */
+static s32 ixgbevf_hv_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vlan, u32 vind)
+{
+       UNREFERENCED_5PARAMETER(hw, index, addr, vlan, vind);
+
+       return IXGBE_ERR_FEATURE_NOT_SUPPORTED;
+}
+
+/**
+ * Hyper-V variant; there is no mailbox communication.
+ */
+static s32 ixgbevf_hv_check_mac_link_vf(struct ixgbe_hw *hw,
+                                       ixgbe_link_speed *speed,
+                                       bool *link_up,
+                                       bool autoneg_wait_to_complete)
+{
+       struct ixgbe_mbx_info *mbx = &hw->mbx;
+       struct ixgbe_mac_info *mac = &hw->mac;
+       u32 links_reg;
+       UNREFERENCED_1PARAMETER(autoneg_wait_to_complete);
+
+       /* If we were hit with a reset drop the link */
+       if (!mbx->ops.check_for_rst(hw, 0) || !mbx->timeout)
+               mac->get_link_status = true;
+
+       if (!mac->get_link_status)
+               goto out;
+
+       /* if link status is down no point in checking to see if pf is up */
+       links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+       if (!(links_reg & IXGBE_LINKS_UP))
+               goto out;
+
+       /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs
+        * before the link status is correct
+        */
+       if (mac->type == ixgbe_mac_82599_vf) {
+               int i;
+
+               for (i = 0; i < 5; i++) {
+                       DELAY(100);
+                       links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+
+                       if (!(links_reg & IXGBE_LINKS_UP))
+                               goto out;
+               }
+       }
+
+       switch (links_reg & IXGBE_LINKS_SPEED_82599) {
+       case IXGBE_LINKS_SPEED_10G_82599:
+               *speed = IXGBE_LINK_SPEED_10GB_FULL;
+               if (hw->mac.type >= ixgbe_mac_X550) {
+                       if (links_reg & IXGBE_LINKS_SPEED_NON_STD)
+                               *speed = IXGBE_LINK_SPEED_2_5GB_FULL;
+               }
+               break;
+       case IXGBE_LINKS_SPEED_1G_82599:
+               *speed = IXGBE_LINK_SPEED_1GB_FULL;
+               break;
+       case IXGBE_LINKS_SPEED_100_82599:
+               *speed = IXGBE_LINK_SPEED_100_FULL;
+               if (hw->mac.type == ixgbe_mac_X550) {
+                       if (links_reg & IXGBE_LINKS_SPEED_NON_STD)
+                               *speed = IXGBE_LINK_SPEED_5GB_FULL;
+               }
+               break;
+       case IXGBE_LINKS_SPEED_10_X550EM_A:
+               *speed = IXGBE_LINK_SPEED_UNKNOWN;
+               /* Reserved for pre-x550 devices */
+               if (hw->mac.type >= ixgbe_mac_X550)
+                       *speed = IXGBE_LINK_SPEED_10_FULL;
+               break;
+       default:
+               *speed = IXGBE_LINK_SPEED_UNKNOWN;
+       }
+
+       /* if we passed all the tests above then the link is up and we no
+        * longer need to check for link
+        */
+       mac->get_link_status = false;
+
+out:
+       *link_up = !mac->get_link_status;
+       return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbevf_hv_set_rlpml_vf - Set the maximum receive packet length
+ * @hw: pointer to the HW structure
+ * @max_size: value to assign to max frame size
+ * Hyper-V variant.
+ **/
+static s32 ixgbevf_hv_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size)
+{
+       u32 reg;
+
+       /* If we are on Hyper-V, we implement this functionality
+        * differently.
+        */
+       reg =  IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(0));
+       /* CRC == 4 */
+       reg |= ((max_size + 4) | IXGBE_RXDCTL_RLPML_EN);
+       IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(0), reg);
+
+       return IXGBE_SUCCESS;
+}
+
+/**
+ *  ixgbevf_hv_negotiate_api_version_vf - Negotiate supported API version
+ *  @hw: pointer to the HW structure
+ *  @api: integer containing requested API version
+ *  Hyper-V version - only ixgbe_mbox_api_10 supported.
+ **/
+static int ixgbevf_hv_negotiate_api_version_vf(struct ixgbe_hw *hw, int api)
+{
+       UNREFERENCED_1PARAMETER(hw);
+
+       /* Hyper-V only supports api version ixgbe_mbox_api_10 */
+       if (api != ixgbe_mbox_api_10)
+               return IXGBE_ERR_INVALID_ARGUMENT;
+
+       return IXGBE_SUCCESS;
+}
+
+/**
+ *  ixgbevf_hv_init_ops_vf - Initialize the pointers for vf
+ *  @hw: pointer to hardware structure
+ *
+ *  This will assign function pointers, adapter-specific functions can
+ *  override the assignment of generic function pointers by assigning
+ *  their own adapter-specific function pointers.
+ *  Does not touch the hardware.
+ **/
+s32 ixgbevf_hv_init_ops_vf(struct ixgbe_hw *hw)
+{
+       /* Set defaults for VF then override applicable Hyper-V
+        * specific functions
+        */
+       ixgbe_init_ops_vf(hw);
+
+       hw->mac.ops.reset_hw = ixgbevf_hv_reset_hw_vf;
+       hw->mac.ops.check_link = ixgbevf_hv_check_mac_link_vf;
+       hw->mac.ops.negotiate_api_version = ixgbevf_hv_negotiate_api_version_vf;
+       hw->mac.ops.set_rar = ixgbevf_hv_set_rar_vf;
+       hw->mac.ops.update_mc_addr_list = ixgbevf_hv_update_mc_addr_list_vf;
+       hw->mac.ops.update_xcast_mode = ixgbevf_hv_update_xcast_mode;
+       hw->mac.ops.set_uc_addr = ixgbevf_hv_set_uc_addr_vf;
+       hw->mac.ops.set_vfta = ixgbevf_hv_set_vfta_vf;
+       hw->mac.ops.set_rlpml = ixgbevf_hv_set_rlpml_vf;
+
+       return IXGBE_SUCCESS;
+}
diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h
new file mode 100644 (file)
index 0000000..9119f29
--- /dev/null
@@ -0,0 +1,41 @@
+/*******************************************************************************
+
+Copyright (c) 2001-2016, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#ifndef _IXGBE_HV_VF_H_
+#define _IXGBE_HV_VF_H_
+
+#include "ixgbe_type.h"
+
+s32 ixgbevf_hv_init_ops_vf(struct ixgbe_hw *hw);
+
+#endif /* _IXGBE_HV_VF_H_ */
index d775142..7556a81 100644 (file)
@@ -90,6 +90,7 @@ enum ixgbe_pfvf_api_rev {
        ixgbe_mbox_api_20,      /* API version 2.0, solaris Phase1 VF driver */
        ixgbe_mbox_api_11,      /* API version 1.1, linux/freebsd VF driver */
        ixgbe_mbox_api_12,      /* API version 1.2, linux/freebsd VF driver */
+       ixgbe_mbox_api_13,      /* API version 1.3, linux/freebsd VF driver */
        /* This value should always be last */
        ixgbe_mbox_api_unknown, /* indicates that API version is not known */
 };
@@ -109,9 +110,9 @@ enum ixgbe_pfvf_api_rev {
 #define IXGBE_VF_GET_QUEUES    0x09 /* get queue configuration */
 
 /* mailbox API, version 1.2 VF requests */
-#define IXGBE_VF_GET_RETA              0x0a /* VF request for RETA */
-#define IXGBE_VF_GET_RSS_KEY           0x0b /* get RSS key */
-#define IXGBE_VF_UPDATE_XCAST_MODE     0x0C
+#define IXGBE_VF_GET_RETA      0x0a    /* VF request for RETA */
+#define IXGBE_VF_GET_RSS_KEY   0x0b    /* get RSS key */
+#define IXGBE_VF_UPDATE_XCAST_MODE     0x0c
 
 /* GET_QUEUES return data indices within the mailbox */
 #define IXGBE_VF_TX_QUEUES     1       /* number of Tx queues supported */
index 06d1ee1..4aab278 100644 (file)
@@ -44,6 +44,7 @@
 #include <rte_cycles.h>
 #include <rte_log.h>
 #include <rte_byteorder.h>
+#include <rte_io.h>
 
 #include "../ixgbe_logs.h"
 #include "../ixgbe_bypass_defines.h"
@@ -81,6 +82,7 @@
 #define UNREFERENCED_2PARAMETER(_p, _q)
 #define UNREFERENCED_3PARAMETER(_p, _q, _r) 
 #define UNREFERENCED_4PARAMETER(_p, _q, _r, _s) 
+#define UNREFERENCED_5PARAMETER(_p, _q, _r, _s, _t)
 
 /* Shared code error reporting */
 enum {
@@ -95,11 +97,13 @@ enum {
 #define STATIC static
 #define IXGBE_NTOHL(_i)        rte_be_to_cpu_32(_i)
 #define IXGBE_NTOHS(_i)        rte_be_to_cpu_16(_i)
+#define IXGBE_CPU_TO_LE16(_i)  rte_cpu_to_le_16(_i)
 #define IXGBE_CPU_TO_LE32(_i)  rte_cpu_to_le_32(_i)
-#define IXGBE_LE32_TO_CPU(_i) rte_le_to_cpu_32(_i)
+#define IXGBE_LE32_TO_CPU(_i)  rte_le_to_cpu_32(_i)
 #define IXGBE_LE32_TO_CPUS(_i) rte_le_to_cpu_32(_i)
 #define IXGBE_CPU_TO_BE16(_i)  rte_cpu_to_be_16(_i)
 #define IXGBE_CPU_TO_BE32(_i)  rte_cpu_to_be_32(_i)
+#define IXGBE_BE32_TO_CPU(_i)  rte_be_to_cpu_32(_i)
 
 typedef uint8_t                u8;
 typedef int8_t         s8;
@@ -120,16 +124,18 @@ typedef int               bool;
 
 #define prefetch(x) rte_prefetch0(x)
 
-#define IXGBE_PCI_REG(reg) (*((volatile uint32_t *)(reg)))
+#define IXGBE_PCI_REG(reg) rte_read32(reg)
 
 static inline uint32_t ixgbe_read_addr(volatile void* addr)
 {
        return rte_le_to_cpu_32(IXGBE_PCI_REG(addr));
 }
 
-#define IXGBE_PCI_REG_WRITE(reg, value) do { \
-       IXGBE_PCI_REG((reg)) = (rte_cpu_to_le_32(value)); \
-} while(0)
+#define IXGBE_PCI_REG_WRITE(reg, value)                        \
+       rte_write32((rte_cpu_to_le_32(value)), reg)
+
+#define IXGBE_PCI_REG_WRITE_RELAXED(reg, value)                \
+       rte_write32_relaxed((rte_cpu_to_le_32(value)), reg)
 
 #define IXGBE_PCI_REG_ADDR(hw, reg) \
        ((volatile uint32_t *)((char *)(hw)->hw_addr + (reg)))
index ed1b14f..c953805 100644 (file)
@@ -113,7 +113,7 @@ s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, u16 reg,
                                        u16 *val, bool lock)
 {
        u32 swfw_mask = hw->phy.phy_semaphore_mask;
-       int max_retry = 10;
+       int max_retry = 3;
        int retry = 0;
        u8 csum_byte;
        u8 high_bits;
@@ -121,8 +121,6 @@ s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, u16 reg,
        u8 reg_high;
        u8 csum;
 
-       if (hw->mac.type >= ixgbe_mac_X550)
-               max_retry = 3;
        reg_high = ((reg >> 7) & 0xFE) | 1;     /* Indicate read combined */
        csum = ixgbe_ones_comp_byte_add(reg_high, reg & 0xFF);
        csum = ~csum;
@@ -282,6 +280,42 @@ s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw)
        return IXGBE_SUCCESS;
 }
 
+/**
+ * ixgbe_probe_phy - Probe a single address for a PHY
+ * @hw: pointer to hardware structure
+ * @phy_addr: PHY address to probe
+ *
+ * Returns true if PHY found
+ */
+static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr)
+{
+       u16 ext_ability = 0;
+
+       if (!ixgbe_validate_phy_addr(hw, phy_addr)) {
+               DEBUGOUT1("Unable to validate PHY address 0x%04X\n",
+                       phy_addr);
+               return false;
+       }
+
+       if (ixgbe_get_phy_id(hw))
+               return false;
+
+       hw->phy.type = ixgbe_get_phy_type_from_id(hw->phy.id);
+
+       if (hw->phy.type == ixgbe_phy_unknown) {
+               hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY,
+                                    IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability);
+               if (ext_ability &
+                   (IXGBE_MDIO_PHY_10GBASET_ABILITY |
+                    IXGBE_MDIO_PHY_1000BASET_ABILITY))
+                       hw->phy.type = ixgbe_phy_cu_unknown;
+               else
+                       hw->phy.type = ixgbe_phy_generic;
+       }
+
+       return true;
+}
+
 /**
  *  ixgbe_identify_phy_generic - Get physical layer module
  *  @hw: pointer to hardware structure
@@ -291,8 +325,7 @@ s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw)
 s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
 {
        s32 status = IXGBE_ERR_PHY_ADDR_INVALID;
-       u32 phy_addr;
-       u16 ext_ability = 0;
+       u16 phy_addr;
 
        DEBUGFUNC("ixgbe_identify_phy_generic");
 
@@ -303,45 +336,33 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
                        hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM;
        }
 
-       if (hw->phy.type == ixgbe_phy_unknown) {
-               for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) {
-                       if (ixgbe_validate_phy_addr(hw, phy_addr)) {
-                               hw->phy.addr = phy_addr;
-                               ixgbe_get_phy_id(hw);
-                               hw->phy.type =
-                                       ixgbe_get_phy_type_from_id(hw->phy.id);
-
-                               if (hw->phy.type == ixgbe_phy_unknown) {
-                                       hw->phy.ops.read_reg(hw,
-                                                 IXGBE_MDIO_PHY_EXT_ABILITY,
-                                                 IXGBE_MDIO_PMA_PMD_DEV_TYPE,
-                                                 &ext_ability);
-                                       if (ext_ability &
-                                           (IXGBE_MDIO_PHY_10GBASET_ABILITY |
-                                            IXGBE_MDIO_PHY_1000BASET_ABILITY))
-                                               hw->phy.type =
-                                                        ixgbe_phy_cu_unknown;
-                                       else
-                                               hw->phy.type =
-                                                        ixgbe_phy_generic;
-                               }
+       if (hw->phy.type != ixgbe_phy_unknown)
+               return IXGBE_SUCCESS;
 
-                               status = IXGBE_SUCCESS;
-                               break;
-                       }
-               }
+       if (hw->phy.nw_mng_if_sel) {
+               phy_addr = (hw->phy.nw_mng_if_sel &
+                           IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
+                          IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
+               if (ixgbe_probe_phy(hw, phy_addr))
+                       return IXGBE_SUCCESS;
+               else
+                       return IXGBE_ERR_PHY_ADDR_INVALID;
+       }
 
-               /* Certain media types do not have a phy so an address will not
-                * be found and the code will take this path.  Caller has to
-                * decide if it is an error or not.
-                */
-               if (status != IXGBE_SUCCESS) {
-                       hw->phy.addr = 0;
+       for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) {
+               if (ixgbe_probe_phy(hw, phy_addr)) {
+                       status = IXGBE_SUCCESS;
+                       break;
                }
-       } else {
-               status = IXGBE_SUCCESS;
        }
 
+       /* Certain media types do not have a phy so an address will not
+        * be found and the code will take this path.  Caller has to
+        * decide if it is an error or not.
+        */
+       if (status != IXGBE_SUCCESS)
+               hw->phy.addr = 0;
+
        return status;
 }
 
@@ -393,6 +414,8 @@ bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr)
        if (phy_id != 0xFFFF && phy_id != 0x0)
                valid = true;
 
+       DEBUGOUT1("PHY ID HIGH is 0x%04X\n", phy_id);
+
        return valid;
 }
 
@@ -421,6 +444,9 @@ s32 ixgbe_get_phy_id(struct ixgbe_hw *hw)
                hw->phy.id |= (u32)(phy_id_low & IXGBE_PHY_REVISION_MASK);
                hw->phy.revision = (u32)(phy_id_low & ~IXGBE_PHY_REVISION_MASK);
        }
+       DEBUGOUT2("PHY_ID_HIGH 0x%04X, PHY_ID_LOW 0x%04X\n",
+                 phy_id_high, phy_id_low);
+
        return status;
 }
 
@@ -439,7 +465,6 @@ enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
        case TN1010_PHY_ID:
                phy_type = ixgbe_phy_tn;
                break;
-       case X550_PHY_ID1:
        case X550_PHY_ID2:
        case X550_PHY_ID3:
        case X540_PHY_ID:
@@ -452,10 +477,12 @@ enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
                phy_type = ixgbe_phy_nl;
                break;
        case X557_PHY_ID:
+       case X557_PHY_ID2:
                phy_type = ixgbe_phy_x550em_ext_t;
                break;
        case IXGBE_M88E1500_E_PHY_ID:
-               phy_type = ixgbe_phy_m88;
+       case IXGBE_M88E1543_E_PHY_ID:
+               phy_type = ixgbe_phy_ext_1g_t;
                break;
        default:
                phy_type = ixgbe_phy_unknown;
@@ -506,11 +533,30 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw)
         */
        for (i = 0; i < 30; i++) {
                msec_delay(100);
-               hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
-                                    IXGBE_MDIO_PHY_XS_DEV_TYPE, &ctrl);
-               if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) {
-                       usec_delay(2);
-                       break;
+               if (hw->phy.type == ixgbe_phy_x550em_ext_t) {
+                       status = hw->phy.ops.read_reg(hw,
+                                                 IXGBE_MDIO_TX_VENDOR_ALARMS_3,
+                                                 IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+                                                 &ctrl);
+                       if (status != IXGBE_SUCCESS)
+                               return status;
+
+                       if (ctrl & IXGBE_MDIO_TX_VENDOR_ALARMS_3_RST_MASK) {
+                               usec_delay(2);
+                               break;
+                       }
+               } else {
+                       status = hw->phy.ops.read_reg(hw,
+                                                    IXGBE_MDIO_PHY_XS_CONTROL,
+                                                    IXGBE_MDIO_PHY_XS_DEV_TYPE,
+                                                    &ctrl);
+                       if (status != IXGBE_SUCCESS)
+                               return status;
+
+                       if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) {
+                               usec_delay(2);
+                               break;
+                       }
                }
        }
 
@@ -532,7 +578,7 @@ out:
  *  @phy_data: Pointer to read data from PHY register
  **/
 s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
-                      u16 *phy_data)
+                          u16 *phy_data)
 {
        u32 i, data, command;
 
@@ -554,12 +600,13 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
 
                command = IXGBE_READ_REG(hw, IXGBE_MSCA);
                if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
-                               break;
+                       break;
        }
 
 
        if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
                ERROR_REPORT1(IXGBE_ERROR_POLLING, "PHY address command did not complete.\n");
+               DEBUGOUT("PHY address command did not complete, returning IXGBE_ERR_PHY\n");
                return IXGBE_ERR_PHY;
        }
 
@@ -589,6 +636,7 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
 
        if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
                ERROR_REPORT1(IXGBE_ERROR_POLLING, "PHY read command didn't complete\n");
+               DEBUGOUT("PHY read command didn't complete, returning IXGBE_ERR_PHY\n");
                return IXGBE_ERR_PHY;
        }
 
@@ -719,7 +767,7 @@ s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
        DEBUGFUNC("ixgbe_write_phy_reg_generic");
 
        if (hw->mac.ops.acquire_swfw_sync(hw, gssr) == IXGBE_SUCCESS) {
-               status = ixgbe_write_phy_reg_mdi(hw, reg_addr, device_type,
+               status = hw->phy.ops.write_reg_mdi(hw, reg_addr, device_type,
                                                 phy_data);
                hw->mac.ops.release_swfw_sync(hw, gssr);
        } else {
@@ -746,91 +794,63 @@ s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
 
        ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg);
 
-       if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
-               /* Set or unset auto-negotiation 10G advertisement */
-               hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
-                                    IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                    &autoneg_reg);
+       /* Set or unset auto-negotiation 10G advertisement */
+       hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
+                            IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+                            &autoneg_reg);
 
-               autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE;
-               if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
-                       autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE;
+       autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE;
+       if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) &&
+           (speed & IXGBE_LINK_SPEED_10GB_FULL))
+               autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE;
 
-               hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                     autoneg_reg);
-       }
+       hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
+                             IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+                             autoneg_reg);
 
-       if (hw->mac.type == ixgbe_mac_X550) {
-               if (speed & IXGBE_LINK_SPEED_5GB_FULL) {
-                       /* Set or unset auto-negotiation 5G advertisement */
-                       hw->phy.ops.read_reg(hw,
-                               IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                               &autoneg_reg);
-
-                       autoneg_reg &= ~IXGBE_MII_5GBASE_T_ADVERTISE;
-                       if (hw->phy.autoneg_advertised &
-                            IXGBE_LINK_SPEED_5GB_FULL)
-                               autoneg_reg |= IXGBE_MII_5GBASE_T_ADVERTISE;
-
-                       hw->phy.ops.write_reg(hw,
-                               IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                               autoneg_reg);
-               }
+       hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+                            IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+                            &autoneg_reg);
 
-               if (speed & IXGBE_LINK_SPEED_2_5GB_FULL) {
-                       /* Set or unset auto-negotiation 2.5G advertisement */
-                       hw->phy.ops.read_reg(hw,
-                               IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                               &autoneg_reg);
-
-                       autoneg_reg &= ~IXGBE_MII_2_5GBASE_T_ADVERTISE;
-                       if (hw->phy.autoneg_advertised &
-                           IXGBE_LINK_SPEED_2_5GB_FULL)
-                               autoneg_reg |= IXGBE_MII_2_5GBASE_T_ADVERTISE;
-
-                       hw->phy.ops.write_reg(hw,
-                               IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                               autoneg_reg);
-               }
+       if (hw->mac.type == ixgbe_mac_X550) {
+               /* Set or unset auto-negotiation 5G advertisement */
+               autoneg_reg &= ~IXGBE_MII_5GBASE_T_ADVERTISE;
+               if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_5GB_FULL) &&
+                   (speed & IXGBE_LINK_SPEED_5GB_FULL))
+                       autoneg_reg |= IXGBE_MII_5GBASE_T_ADVERTISE;
+
+               /* Set or unset auto-negotiation 2.5G advertisement */
+               autoneg_reg &= ~IXGBE_MII_2_5GBASE_T_ADVERTISE;
+               if ((hw->phy.autoneg_advertised &
+                    IXGBE_LINK_SPEED_2_5GB_FULL) &&
+                   (speed & IXGBE_LINK_SPEED_2_5GB_FULL))
+                       autoneg_reg |= IXGBE_MII_2_5GBASE_T_ADVERTISE;
        }
 
-       if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
-               /* Set or unset auto-negotiation 1G advertisement */
-               hw->phy.ops.read_reg(hw,
-                                    IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-                                    IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                    &autoneg_reg);
+       /* Set or unset auto-negotiation 1G advertisement */
+       autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE;
+       if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) &&
+           (speed & IXGBE_LINK_SPEED_1GB_FULL))
+               autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE;
 
-               autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE;
-               if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL)
-                       autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE;
+       hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+                             IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+                             autoneg_reg);
 
-               hw->phy.ops.write_reg(hw,
-                                     IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                     autoneg_reg);
-       }
+       /* Set or unset auto-negotiation 100M advertisement */
+       hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
+                            IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+                            &autoneg_reg);
 
-       if (speed & IXGBE_LINK_SPEED_100_FULL) {
-               /* Set or unset auto-negotiation 100M advertisement */
-               hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
-                                    IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                    &autoneg_reg);
+       autoneg_reg &= ~(IXGBE_MII_100BASE_T_ADVERTISE |
+                        IXGBE_MII_100BASE_T_ADVERTISE_HALF);
+       if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) &&
+           (speed & IXGBE_LINK_SPEED_100_FULL))
+               autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE;
 
-               autoneg_reg &= ~(IXGBE_MII_100BASE_T_ADVERTISE |
-                                IXGBE_MII_100BASE_T_ADVERTISE_HALF);
-               if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL)
-                       autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE;
-
-               hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                     autoneg_reg);
-       }
+       hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
+                             IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+                             autoneg_reg);
 
        /* Blocked by MNG FW so don't reset PHY */
        if (ixgbe_check_reset_blocked(hw))
@@ -882,6 +902,9 @@ s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
        if (speed & IXGBE_LINK_SPEED_100_FULL)
                hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL;
 
+       if (speed & IXGBE_LINK_SPEED_10_FULL)
+               hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10_FULL;
+
        /* Setup link based on the new speed settings */
        ixgbe_setup_phy_link(hw);
 
@@ -919,6 +942,7 @@ static s32 ixgbe_get_copper_speeds_supported(struct ixgbe_hw *hw)
                hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL;
                break;
        case ixgbe_mac_X550EM_x:
+       case ixgbe_mac_X550EM_a:
                hw->phy.speeds_supported &= ~IXGBE_LINK_SPEED_100_FULL;
                break;
        default:
index 281f9fa..820d471 100644 (file)
@@ -92,8 +92,9 @@ POSSIBILITY OF SUCH DAMAGE.
 #define IXGBE_CS4227_GLOBAL_ID_MSB     1
 #define IXGBE_CS4227_SCRATCH           2
 #define IXGBE_CS4227_GLOBAL_ID_VALUE   0x03E5
-#define IXGBE_CS4223_PHY_ID            0x7003/* Quad port */
-#define IXGBE_CS4227_PHY_ID            0x3003/* Dual port */
+#define IXGBE_CS4227_EFUSE_PDF_SKU     0x19F
+#define IXGBE_CS4223_SKU_ID            0x0010  /* Quad port */
+#define IXGBE_CS4227_SKU_ID            0x0014  /* Dual port */
 #define IXGBE_CS4227_RESET_PENDING     0x1357
 #define IXGBE_CS4227_RESET_COMPLETE    0x5AA5
 #define IXGBE_CS4227_RETRIES           15
index 83818a9..bb1f85b 100644 (file)
@@ -105,11 +105,11 @@ POSSIBILITY OF SUCH DAMAGE.
 #define IXGBE_SUBDEV_ID_82599_560FLR           0x17D0
 #define IXGBE_SUBDEV_ID_82599_ECNA_DP          0x0470
 #define IXGBE_SUBDEV_ID_82599_SP_560FLR                0x211B
-#define IXGBE_SUBDEV_ID_82599_LOM_SFP          0x8976
 #define IXGBE_SUBDEV_ID_82599_LOM_SNAP6                0x2159
 #define IXGBE_SUBDEV_ID_82599_SFP_1OCP         0x000D
 #define IXGBE_SUBDEV_ID_82599_SFP_2OCP         0x0008
-#define IXGBE_SUBDEV_ID_82599_SFP_LOM          0x06EE
+#define IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM1     0x8976
+#define IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM2     0x06EE
 #define IXGBE_DEV_ID_82599_BACKPLANE_FCOE      0x152A
 #define IXGBE_DEV_ID_82599_SFP_FCOE            0x1529
 #define IXGBE_DEV_ID_82599_SFP_EM              0x1507
@@ -146,6 +146,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define IXGBE_DEV_ID_X550EM_X_SFP              0x15AC
 #define IXGBE_DEV_ID_X550EM_X_10G_T            0x15AD
 #define IXGBE_DEV_ID_X550EM_X_1G_T             0x15AE
+#define IXGBE_DEV_ID_X550EM_X_XFI              0x15B0
 #define IXGBE_DEV_ID_X550_VF_HV                        0x1564
 #define IXGBE_DEV_ID_X550_VF                   0x1565
 #define IXGBE_DEV_ID_X550EM_A_VF               0x15C5
@@ -565,6 +566,13 @@ struct ixgbe_thermal_sensor_data {
 #define IXGBE_PROXYFC  0x05F64 /* Proxying Filter Control Register */
 #define IXGBE_VXLANCTRL        0x0000507C /* Rx filter VXLAN UDPPORT Register */
 
+/* masks for accessing VXLAN and GENEVE UDP ports */
+#define IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK     0x0000ffff /* VXLAN port */
+#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK    0xffff0000 /* GENEVE port */
+#define IXGBE_VXLANCTRL_ALL_UDPPORT_MASK       0xffffffff /* GENEVE/VXLAN */
+
+#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT   16
+
 #define IXGBE_FHFT(_n) (0x09000 + ((_n) * 0x100)) /* Flex host filter table */
 /* Ext Flexible Host Filter Table */
 #define IXGBE_FHFT_EXT(_n)     (0x09800 + ((_n) * 0x100))
@@ -1038,7 +1046,7 @@ struct ixgbe_dmac_config {
 #define IXGBE_FTFT             0x09400 /* 0x9400-0x97FC */
 #define IXGBE_METF(_i)         (0x05190 + ((_i) * 4)) /* 4 of these (0-3) */
 #define IXGBE_MDEF_EXT(_i)     (0x05160 + ((_i) * 4)) /* 8 of these (0-7) */
-#define IXGBE_LSWFW            0x15014
+#define IXGBE_LSWFW            0x15F14
 #define IXGBE_BMCIP(_i)                (0x05050 + ((_i) * 4)) /* 0x5050-0x505C */
 #define IXGBE_BMCIPVAL         0x05060
 #define IXGBE_BMCIP_IPADDR_TYPE        0x00000001
@@ -1640,17 +1648,17 @@ struct ixgbe_dmac_config {
 #define TN1010_PHY_ID  0x00A19410
 #define TNX_FW_REV     0xB
 #define X540_PHY_ID    0x01540200
-#define X550_PHY_ID1   0x01540220
 #define X550_PHY_ID2   0x01540223
 #define X550_PHY_ID3   0x01540221
 #define X557_PHY_ID    0x01540240
+#define X557_PHY_ID2   0x01540250
 #define AQ_FW_REV      0x20
 #define QT2022_PHY_ID  0x0043A400
 #define ATH_PHY_ID     0x03429050
 
 /* PHY Types */
-#define IXGBE_M88E1500_E_PHY_ID                0x01410DD0
-#define IXGBE_M88E1543_E_PHY_ID                0x01410EA0
+#define IXGBE_M88E1500_E_PHY_ID        0x01410DD0
+#define IXGBE_M88E1543_E_PHY_ID        0x01410EA0
 
 /* Special PHY Init Routine */
 #define IXGBE_PHY_INIT_OFFSET_NL       0x002B
@@ -1765,6 +1773,8 @@ enum {
 #define IXGBE_VT_CTL_POOL_MASK         (0x3F << IXGBE_VT_CTL_POOL_SHIFT)
 
 /* VMOLR bitmasks */
+#define IXGBE_VMOLR_UPE                0x00400000 /* unicast promiscuous */
+#define IXGBE_VMOLR_VPE                0x00800000 /* VLAN promiscuous */
 #define IXGBE_VMOLR_AUPE       0x01000000 /* accept untagged packets */
 #define IXGBE_VMOLR_ROMPE      0x02000000 /* accept packets in MTA tbl */
 #define IXGBE_VMOLR_ROPE       0x04000000 /* accept packets in UC tbl */
@@ -2203,6 +2213,7 @@ enum {
 #define IXGBE_LINKS_SPEED_10G_82599    0x30000000
 #define IXGBE_LINKS_SPEED_1G_82599     0x20000000
 #define IXGBE_LINKS_SPEED_100_82599    0x10000000
+#define IXGBE_LINKS_SPEED_10_X550EM_A  0x00000000
 #define IXGBE_LINK_UP_TIME             90 /* 9.0 Seconds */
 #define IXGBE_AUTO_NEG_TIME            45 /* 4.5 Seconds */
 
@@ -2335,7 +2346,9 @@ enum {
 
 #define IXGBE_SAN_MAC_ADDR_PTR         0x28
 #define IXGBE_DEVICE_CAPS              0x2C
-#define IXGBE_SERIAL_NUMBER_MAC_ADDR   0x11
+#define IXGBE_82599_SERIAL_NUMBER_MAC_ADDR     0x11
+#define IXGBE_X550_SERIAL_NUMBER_MAC_ADDR      0x04
+
 #define IXGBE_PCIE_MSIX_82599_CAPS     0x72
 #define IXGBE_MAX_MSIX_VECTORS_82599   0x40
 #define IXGBE_PCIE_MSIX_82598_CAPS     0x62
@@ -2780,6 +2793,7 @@ enum {
 #define IXGBE_RXDADV_PKTTYPE_UDP       0x00000200 /* UDP hdr present */
 #define IXGBE_RXDADV_PKTTYPE_SCTP      0x00000400 /* SCTP hdr present */
 #define IXGBE_RXDADV_PKTTYPE_NFS       0x00000800 /* NFS hdr present */
+#define IXGBE_RXDADV_PKTTYPE_GENEVE    0x00000800 /* GENEVE hdr present */
 #define IXGBE_RXDADV_PKTTYPE_VXLAN     0x00000800 /* VXLAN hdr present */
 #define IXGBE_RXDADV_PKTTYPE_TUNNEL    0x00010000 /* Tunnel type */
 #define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP 0x00001000 /* IPSec ESP */
@@ -3023,6 +3037,7 @@ enum ixgbe_fdir_pballoc_type {
 #define FW_CEM_UNUSED_VER              0x0
 #define FW_CEM_MAX_RETRIES             3
 #define FW_CEM_RESP_STATUS_SUCCESS     0x1
+#define FW_CEM_DRIVER_VERSION_SIZE     39 /* +9 would send 48 bytes to fw */
 #define FW_READ_SHADOW_RAM_CMD         0x31
 #define FW_READ_SHADOW_RAM_LEN         0x6
 #define FW_WRITE_SHADOW_RAM_CMD                0x33
@@ -3048,13 +3063,66 @@ enum ixgbe_fdir_pballoc_type {
 #define FW_INT_PHY_REQ_LEN             10
 #define FW_INT_PHY_REQ_READ            0
 #define FW_INT_PHY_REQ_WRITE           1
+#define FW_PHY_ACT_REQ_CMD             5
+#define FW_PHY_ACT_DATA_COUNT          4
+#define FW_PHY_ACT_REQ_LEN             (4 + 4 * FW_PHY_ACT_DATA_COUNT)
+#define FW_PHY_ACT_INIT_PHY            1
+#define FW_PHY_ACT_SETUP_LINK          2
+#define FW_PHY_ACT_LINK_SPEED_10       (1u << 0)
+#define FW_PHY_ACT_LINK_SPEED_100      (1u << 1)
+#define FW_PHY_ACT_LINK_SPEED_1G       (1u << 2)
+#define FW_PHY_ACT_LINK_SPEED_2_5G     (1u << 3)
+#define FW_PHY_ACT_LINK_SPEED_5G       (1u << 4)
+#define FW_PHY_ACT_LINK_SPEED_10G      (1u << 5)
+#define FW_PHY_ACT_LINK_SPEED_20G      (1u << 6)
+#define FW_PHY_ACT_LINK_SPEED_25G      (1u << 7)
+#define FW_PHY_ACT_LINK_SPEED_40G      (1u << 8)
+#define FW_PHY_ACT_LINK_SPEED_50G      (1u << 9)
+#define FW_PHY_ACT_LINK_SPEED_100G     (1u << 10)
+#define FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT 16
+#define FW_PHY_ACT_SETUP_LINK_PAUSE_MASK (3u << \
+                                         FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT)
+#define FW_PHY_ACT_SETUP_LINK_PAUSE_NONE 0u
+#define FW_PHY_ACT_SETUP_LINK_PAUSE_TX 1u
+#define FW_PHY_ACT_SETUP_LINK_PAUSE_RX 2u
+#define FW_PHY_ACT_SETUP_LINK_PAUSE_RXTX 3u
+#define FW_PHY_ACT_SETUP_LINK_LP       (1u << 18)
+#define FW_PHY_ACT_SETUP_LINK_HP       (1u << 19)
+#define FW_PHY_ACT_SETUP_LINK_EEE      (1u << 20)
+#define FW_PHY_ACT_SETUP_LINK_AN       (1u << 22)
+#define FW_PHY_ACT_SETUP_LINK_RSP_DOWN (1u << 0)
+#define FW_PHY_ACT_GET_LINK_INFO       3
+#define FW_PHY_ACT_GET_LINK_INFO_EEE   (1u << 19)
+#define FW_PHY_ACT_GET_LINK_INFO_FC_TX (1u << 20)
+#define FW_PHY_ACT_GET_LINK_INFO_FC_RX (1u << 21)
+#define FW_PHY_ACT_GET_LINK_INFO_POWER (1u << 22)
+#define FW_PHY_ACT_GET_LINK_INFO_AN_COMPLETE   (1u << 24)
+#define FW_PHY_ACT_GET_LINK_INFO_TEMP  (1u << 25)
+#define FW_PHY_ACT_GET_LINK_INFO_LP_FC_TX      (1u << 28)
+#define FW_PHY_ACT_GET_LINK_INFO_LP_FC_RX      (1u << 29)
+#define FW_PHY_ACT_FORCE_LINK_DOWN     4
+#define FW_PHY_ACT_FORCE_LINK_DOWN_OFF (1u << 0)
+#define FW_PHY_ACT_PHY_SW_RESET                5
+#define FW_PHY_ACT_PHY_HW_RESET                6
+#define FW_PHY_ACT_GET_PHY_INFO                7
+#define FW_PHY_ACT_UD_2                        0x1002
+#define FW_PHY_ACT_UD_2_10G_KR_EEE     (1u << 6)
+#define FW_PHY_ACT_UD_2_10G_KX4_EEE    (1u << 5)
+#define FW_PHY_ACT_UD_2_1G_KX_EEE      (1u << 4)
+#define FW_PHY_ACT_UD_2_10G_T_EEE      (1u << 3)
+#define FW_PHY_ACT_UD_2_1G_T_EEE       (1u << 2)
+#define FW_PHY_ACT_UD_2_100M_TX_EEE    (1u << 1)
+#define FW_PHY_ACT_RETRIES             50
+#define FW_PHY_INFO_SPEED_MASK         0xFFFu
+#define FW_PHY_INFO_ID_HI_MASK         0xFFFF0000u
+#define FW_PHY_INFO_ID_LO_MASK         0x0000FFFFu
 
 /* Host Interface Command Structures */
 
 #ifdef C99
 #pragma pack(push, 1)
 #else
-#pragma pack(1)
+#pragma pack (1)
 #endif /* C99 */
 
 struct ixgbe_hic_hdr {
@@ -3097,6 +3165,16 @@ struct ixgbe_hic_drv_info {
        u16 pad2; /* end spacing to ensure length is mult. of dword2 */
 };
 
+struct ixgbe_hic_drv_info2 {
+       struct ixgbe_hic_hdr hdr;
+       u8 port_num;
+       u8 ver_sub;
+       u8 ver_build;
+       u8 ver_min;
+       u8 ver_maj;
+       char driver_string[FW_CEM_DRIVER_VERSION_SIZE];
+};
+
 /* These need to be dword aligned */
 struct ixgbe_hic_read_shadow_ram {
        union ixgbe_hic_hdr2 hdr;
@@ -3136,13 +3214,26 @@ struct ixgbe_hic_internal_phy_req {
        u8 command_type;
        __be16 address;
        u16 rsv1;
-       __le32 write_data;
+       __be32 write_data;
        u16 pad;
 };
 
 struct ixgbe_hic_internal_phy_resp {
        struct ixgbe_hic_hdr hdr;
-       __le32 read_data;
+       __be32 read_data;
+};
+
+struct ixgbe_hic_phy_activity_req {
+       struct ixgbe_hic_hdr hdr;
+       u8 port_number;
+       u8 pad;
+       __le16 activity_id;
+       __be32 data[FW_PHY_ACT_DATA_COUNT];
+};
+
+struct ixgbe_hic_phy_activity_resp {
+       struct ixgbe_hic_hdr hdr;
+       __be32 data[FW_PHY_ACT_DATA_COUNT];
 };
 
 #ifdef C99
@@ -3305,7 +3396,7 @@ typedef u32 ixgbe_autoneg_advertised;
 /* Link speed */
 typedef u32 ixgbe_link_speed;
 #define IXGBE_LINK_SPEED_UNKNOWN       0
-#define IXGBE_LINK_SPEED_10_FULL       0x0004
+#define IXGBE_LINK_SPEED_10_FULL       0x0002
 #define IXGBE_LINK_SPEED_100_FULL      0x0008
 #define IXGBE_LINK_SPEED_1GB_FULL      0x0020
 #define IXGBE_LINK_SPEED_2_5GB_FULL    0x0400
@@ -3335,6 +3426,7 @@ typedef u32 ixgbe_physical_layer;
 #define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI      0x1000
 #define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA     0x2000
 #define IXGBE_PHYSICAL_LAYER_1000BASE_SX       0x4000
+#define IXGBE_PHYSICAL_LAYER_10BASE_T          0x8000
 
 /* Flow Control Data Sheet defined values
  * Calculation and defines taken from 802.1bb Annex O
@@ -3553,7 +3645,9 @@ enum ixgbe_phy_type {
        ixgbe_phy_aq,
        ixgbe_phy_x550em_kr,
        ixgbe_phy_x550em_kx4,
+       ixgbe_phy_x550em_xfi,
        ixgbe_phy_x550em_ext_t,
+       ixgbe_phy_ext_1g_t,
        ixgbe_phy_cu_unknown,
        ixgbe_phy_qt,
        ixgbe_phy_xaui,
@@ -3572,7 +3666,7 @@ enum ixgbe_phy_type {
        ixgbe_phy_qsfp_unknown,
        ixgbe_phy_sfp_unsupported, /*Enforce bit set with unsupported module*/
        ixgbe_phy_sgmii,
-       ixgbe_phy_m88,
+       ixgbe_phy_fw,
        ixgbe_phy_generic
 };
 
@@ -3629,14 +3723,6 @@ enum ixgbe_fc_mode {
        ixgbe_fc_default
 };
 
-/* Master/slave control */
-enum ixgbe_ms_type {
-       ixgbe_ms_hw_default = 0,
-       ixgbe_ms_force_master,
-       ixgbe_ms_force_slave,
-       ixgbe_ms_auto
-};
-
 /* Smart Speed Settings */
 #define IXGBE_SMARTSPEED_MAX_RETRIES   3
 enum ixgbe_smart_speed {
@@ -3840,6 +3926,7 @@ struct ixgbe_mac_operations {
        void (*init_swfw_sync)(struct ixgbe_hw *);
        s32 (*prot_autoc_read)(struct ixgbe_hw *, bool *, u32 *);
        s32 (*prot_autoc_write)(struct ixgbe_hw *, u32, bool);
+       s32 (*negotiate_api_version)(struct ixgbe_hw *hw, int api);
 
        /* Link */
        void (*disable_tx_laser)(struct ixgbe_hw *);
@@ -3860,6 +3947,7 @@ struct ixgbe_mac_operations {
        s32 (*led_off)(struct ixgbe_hw *, u32);
        s32 (*blink_led_start)(struct ixgbe_hw *, u32);
        s32 (*blink_led_stop)(struct ixgbe_hw *, u32);
+       s32 (*init_led_link_act)(struct ixgbe_hw *);
 
        /* RAR, Multicast, VLAN */
        s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32);
@@ -3883,6 +3971,8 @@ struct ixgbe_mac_operations {
        s32 (*init_uta_tables)(struct ixgbe_hw *);
        void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int);
        void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int);
+       s32 (*update_xcast_mode)(struct ixgbe_hw *, int);
+       s32 (*set_rlpml)(struct ixgbe_hw *, u16);
 
        /* Flow Control */
        s32 (*fc_enable)(struct ixgbe_hw *);
@@ -3890,7 +3980,8 @@ struct ixgbe_mac_operations {
        void (*fc_autoneg)(struct ixgbe_hw *);
 
        /* Manageability interface */
-       s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8);
+       s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8, u16,
+                             const char *);
        s32 (*get_thermal_sensor_data)(struct ixgbe_hw *);
        s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw);
        void (*get_rtrup2tc)(struct ixgbe_hw *hw, u8 *map);
@@ -4000,6 +4091,7 @@ struct ixgbe_mac_info {
        struct ixgbe_dmac_config dmac_config;
        bool set_lben;
        u32  max_link_up_time;
+       u8   led_link_act;
 };
 
 struct ixgbe_phy_info {
@@ -4015,8 +4107,8 @@ struct ixgbe_phy_info {
        bool reset_disable;
        ixgbe_autoneg_advertised autoneg_advertised;
        ixgbe_link_speed speeds_supported;
-       enum ixgbe_ms_type ms_type;
-       enum ixgbe_ms_type original_ms_type;
+       ixgbe_link_speed eee_speeds_supported;
+       ixgbe_link_speed eee_speeds_advertised;
        enum ixgbe_smart_speed smart_speed;
        bool smart_speed_active;
        bool multispeed_fiber;
@@ -4078,6 +4170,7 @@ struct ixgbe_hw {
        bool force_full_reset;
        bool allow_unsupported_sfp;
        bool wol_enabled;
+       bool need_crosstalk_fix;
 };
 
 #define ixgbe_call_func(hw, func, params, error) \
@@ -4136,16 +4229,35 @@ struct ixgbe_hw {
 #define IXGBE_KRM_LINK_S1(P)           ((P) ? 0x8200 : 0x4200)
 #define IXGBE_KRM_LINK_CTRL_1(P)       ((P) ? 0x820C : 0x420C)
 #define IXGBE_KRM_AN_CNTL_1(P)         ((P) ? 0x822C : 0x422C)
+#define IXGBE_KRM_AN_CNTL_4(P)         ((P) ? 0x8238 : 0x4238)
 #define IXGBE_KRM_AN_CNTL_8(P)         ((P) ? 0x8248 : 0x4248)
+#define IXGBE_KRM_PCS_KX_AN(P)         ((P) ? 0x9918 : 0x5918)
+#define IXGBE_KRM_PCS_KX_AN_LP(P)      ((P) ? 0x991C : 0x591C)
 #define IXGBE_KRM_SGMII_CTRL(P)                ((P) ? 0x82A0 : 0x42A0)
 #define IXGBE_KRM_LP_BASE_PAGE_HIGH(P) ((P) ? 0x836C : 0x436C)
 #define IXGBE_KRM_DSP_TXFFE_STATE_4(P) ((P) ? 0x8634 : 0x4634)
 #define IXGBE_KRM_DSP_TXFFE_STATE_5(P) ((P) ? 0x8638 : 0x4638)
 #define IXGBE_KRM_RX_TRN_LINKUP_CTRL(P)        ((P) ? 0x8B00 : 0x4B00)
 #define IXGBE_KRM_PMD_DFX_BURNIN(P)    ((P) ? 0x8E00 : 0x4E00)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20(P) ((P) ? 0x9054 : 0x5054)
 #define IXGBE_KRM_TX_COEFF_CTRL_1(P)   ((P) ? 0x9520 : 0x5520)
 #define IXGBE_KRM_RX_ANA_CTL(P)                ((P) ? 0x9A00 : 0x5A00)
 
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA         ~(0x3 << 20)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR         (1u << 20)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_LR         (0x2 << 20)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN           (1u << 25)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN            (1u << 26)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN              (1u << 27)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10M          ~(0x7 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_100M         (1u << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G           (0x2 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G          (0x3 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN           (0x4 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_2_5G         (0x7 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK         (0x7 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART      (1u << 31)
+
 #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B           (1 << 9)
 #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS         (1 << 11)
 
@@ -4166,9 +4278,14 @@ struct ixgbe_hw {
 
 #define IXGBE_KRM_AN_CNTL_1_SYM_PAUSE                  (1 << 28)
 #define IXGBE_KRM_AN_CNTL_1_ASM_PAUSE                  (1 << 29)
-
+#define IXGBE_KRM_PCS_KX_AN_SYM_PAUSE                  (1 << 1)
+#define IXGBE_KRM_PCS_KX_AN_ASM_PAUSE                  (1 << 2)
+#define IXGBE_KRM_PCS_KX_AN_LP_SYM_PAUSE               (1 << 2)
+#define IXGBE_KRM_PCS_KX_AN_LP_ASM_PAUSE               (1 << 3)
+#define IXGBE_KRM_AN_CNTL_4_ECSR_AN37_OVER_73          (1 << 29)
 #define IXGBE_KRM_AN_CNTL_8_LINEAR                     (1 << 0)
 #define IXGBE_KRM_AN_CNTL_8_LIMITING                   (1 << 1)
+
 #define IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE          (1 << 10)
 #define IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE          (1 << 11)
 
@@ -4207,11 +4324,18 @@ struct ixgbe_hw {
 #define IXGBE_SB_IOSF_TARGET_KR_PHY    0
 
 #define IXGBE_NW_MNG_IF_SEL            0x00011178
-#define IXGBE_NW_MNG_IF_SEL_MDIO_ACT   (1 << 1)
-#define IXGBE_NW_MNG_IF_SEL_ENABLE_10_100M (1 << 23)
-#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE (1 << 24)
+#define IXGBE_NW_MNG_IF_SEL_MDIO_ACT   (1u << 1)
+#define IXGBE_NW_MNG_IF_SEL_MDIO_IF_MODE       (1u << 2)
+#define IXGBE_NW_MNG_IF_SEL_EN_SHARED_MDIO     (1u << 13)
+#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_10M      (1u << 17)
+#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_100M     (1u << 18)
+#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_1G       (1u << 19)
+#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_2_5G     (1u << 20)
+#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_10G      (1u << 21)
+#define IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE       (1u << 25)
+#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE (1 << 24) /* X552 reg field only */
 #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT 3
 #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD       \
-               (0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT)
+                               (0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT)
 
 #endif /* _IXGBE_TYPE_H_ */
index a75074a..8775ee5 100644 (file)
@@ -64,6 +64,7 @@ s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw)
        hw->mac.ops.get_mac_addr = ixgbe_get_mac_addr_vf;
        hw->mac.ops.stop_adapter = ixgbe_stop_adapter_vf;
        hw->mac.ops.get_bus_info = NULL;
+       hw->mac.ops.negotiate_api_version = ixgbevf_negotiate_api_version;
 
        /* Link */
        hw->mac.ops.setup_link = ixgbe_setup_mac_link_vf;
@@ -75,10 +76,12 @@ s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw)
        hw->mac.ops.set_uc_addr = ixgbevf_set_uc_addr_vf;
        hw->mac.ops.init_rx_addrs = NULL;
        hw->mac.ops.update_mc_addr_list = ixgbe_update_mc_addr_list_vf;
+       hw->mac.ops.update_xcast_mode = ixgbevf_update_xcast_mode;
        hw->mac.ops.enable_mc = NULL;
        hw->mac.ops.disable_mc = NULL;
        hw->mac.ops.clear_vfta = NULL;
        hw->mac.ops.set_vfta = ixgbe_set_vfta_vf;
+       hw->mac.ops.set_rlpml = ixgbevf_rlpml_set_vf;
 
        hw->mac.max_tx_queues = 1;
        hw->mac.max_rx_queues = 1;
@@ -322,15 +325,16 @@ STATIC s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr)
        return vector;
 }
 
-STATIC void ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw,
-                                       u32 *msg, u16 size)
+STATIC s32 ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw, u32 *msg,
+                                     u32 *retmsg, u16 size)
 {
        struct ixgbe_mbx_info *mbx = &hw->mbx;
-       u32 retmsg[IXGBE_VFMAILBOX_SIZE];
        s32 retval = mbx->ops.write_posted(hw, msg, size, 0);
 
-       if (!retval)
-               mbx->ops.read_posted(hw, retmsg, size, 0);
+       if (retval)
+               return retval;
+
+       return mbx->ops.read_posted(hw, retmsg, size, 0);
 }
 
 /**
@@ -344,7 +348,6 @@ STATIC void ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw,
 s32 ixgbe_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
                     u32 enable_addr)
 {
-       struct ixgbe_mbx_info *mbx = &hw->mbx;
        u32 msgbuf[3];
        u8 *msg_addr = (u8 *)(&msgbuf[1]);
        s32 ret_val;
@@ -353,10 +356,7 @@ s32 ixgbe_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
        memset(msgbuf, 0, 12);
        msgbuf[0] = IXGBE_VF_SET_MAC_ADDR;
        memcpy(msg_addr, addr, 6);
-       ret_val = mbx->ops.write_posted(hw, msgbuf, 3, 0);
-
-       if (!ret_val)
-               ret_val = mbx->ops.read_posted(hw, msgbuf, 3, 0);
+       ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3);
 
        msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
@@ -418,6 +418,39 @@ s32 ixgbe_update_mc_addr_list_vf(struct ixgbe_hw *hw, u8 *mc_addr_list,
        return mbx->ops.write_posted(hw, msgbuf, IXGBE_VFMAILBOX_SIZE, 0);
 }
 
+/**
+ *  ixgbevf_update_xcast_mode - Update Multicast mode
+ *  @hw: pointer to the HW structure
+ *  @xcast_mode: new multicast mode
+ *
+ *  Updates the Multicast Mode of VF.
+ **/
+s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
+{
+       u32 msgbuf[2];
+       s32 err;
+
+       switch (hw->api_version) {
+       case ixgbe_mbox_api_12:
+       case ixgbe_mbox_api_13:
+               break;
+       default:
+               return IXGBE_ERR_FEATURE_NOT_SUPPORTED;
+       }
+
+       msgbuf[0] = IXGBE_VF_UPDATE_XCAST_MODE;
+       msgbuf[1] = xcast_mode;
+
+       err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+       if (err)
+               return err;
+
+       msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+       if (msgbuf[0] == (IXGBE_VF_UPDATE_XCAST_MODE | IXGBE_VT_MSGTYPE_NACK))
+               return IXGBE_ERR_FEATURE_NOT_SUPPORTED;
+       return IXGBE_SUCCESS;
+}
+
 /**
  *  ixgbe_set_vfta_vf - Set/Unset vlan filter table address
  *  @hw: pointer to the HW structure
@@ -425,11 +458,12 @@ s32 ixgbe_update_mc_addr_list_vf(struct ixgbe_hw *hw, u8 *mc_addr_list,
  *  @vind: unused by VF drivers
  *  @vlan_on: if true then set bit, else clear bit
  *  @vlvf_bypass: boolean flag indicating updating default pool is okay
+ *
+ *  Turn on/off specified VLAN in the VLAN filter table.
  **/
 s32 ixgbe_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
                      bool vlan_on, bool vlvf_bypass)
 {
-       struct ixgbe_mbx_info *mbx = &hw->mbx;
        u32 msgbuf[2];
        s32 ret_val;
        UNREFERENCED_2PARAMETER(vind, vlvf_bypass);
@@ -439,10 +473,7 @@ s32 ixgbe_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
        /* Setting the 8 bit field MSG INFO to TRUE indicates "add" */
        msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT;
 
-       ret_val = mbx->ops.write_posted(hw, msgbuf, 2, 0);
-       if (!ret_val)
-               ret_val = mbx->ops.read_posted(hw, msgbuf, 1, 0);
-
+       ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
        if (!ret_val && (msgbuf[0] & IXGBE_VT_MSGTYPE_ACK))
                return IXGBE_SUCCESS;
 
@@ -489,8 +520,7 @@ s32 ixgbe_get_mac_addr_vf(struct ixgbe_hw *hw, u8 *mac_addr)
 
 s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
 {
-       struct ixgbe_mbx_info *mbx = &hw->mbx;
-       u32 msgbuf[3];
+       u32 msgbuf[3], msgbuf_chk;
        u8 *msg_addr = (u8 *)(&msgbuf[1]);
        s32 ret_val;
 
@@ -503,18 +533,17 @@ s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
         */
        msgbuf[0] |= index << IXGBE_VT_MSGINFO_SHIFT;
        msgbuf[0] |= IXGBE_VF_SET_MACVLAN;
+       msgbuf_chk = msgbuf[0];
        if (addr)
                memcpy(msg_addr, addr, 6);
-       ret_val = mbx->ops.write_posted(hw, msgbuf, 3, 0);
 
-       if (!ret_val)
-               ret_val = mbx->ops.read_posted(hw, msgbuf, 3, 0);
+       ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3);
+       if (!ret_val) {
+               msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
-       msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
-
-       if (!ret_val)
-               if (msgbuf[0] == (IXGBE_VF_SET_MACVLAN | IXGBE_VT_MSGTYPE_NACK))
-                       ret_val = IXGBE_ERR_OUT_OF_MEM;
+               if (msgbuf[0] == (msgbuf_chk | IXGBE_VT_MSGTYPE_NACK))
+                       return IXGBE_ERR_OUT_OF_MEM;
+       }
 
        return ret_val;
 }
@@ -584,13 +613,29 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
        switch (links_reg & IXGBE_LINKS_SPEED_82599) {
        case IXGBE_LINKS_SPEED_10G_82599:
                *speed = IXGBE_LINK_SPEED_10GB_FULL;
+               if (hw->mac.type >= ixgbe_mac_X550) {
+                       if (links_reg & IXGBE_LINKS_SPEED_NON_STD)
+                               *speed = IXGBE_LINK_SPEED_2_5GB_FULL;
+               }
                break;
        case IXGBE_LINKS_SPEED_1G_82599:
                *speed = IXGBE_LINK_SPEED_1GB_FULL;
                break;
        case IXGBE_LINKS_SPEED_100_82599:
                *speed = IXGBE_LINK_SPEED_100_FULL;
+               if (hw->mac.type == ixgbe_mac_X550) {
+                       if (links_reg & IXGBE_LINKS_SPEED_NON_STD)
+                               *speed = IXGBE_LINK_SPEED_5GB_FULL;
+               }
+               break;
+       case IXGBE_LINKS_SPEED_10_X550EM_A:
+               *speed = IXGBE_LINK_SPEED_UNKNOWN;
+               /* Since Reserved in older MAC's */
+               if (hw->mac.type >= ixgbe_mac_X550)
+                       *speed = IXGBE_LINK_SPEED_10_FULL;
                break;
+       default:
+               *speed = IXGBE_LINK_SPEED_UNKNOWN;
        }
 
        /* if the read failed it could just be a mailbox collision, best wait
@@ -627,13 +672,22 @@ out:
  *  @hw: pointer to the HW structure
  *  @max_size: value to assign to max frame size
  **/
-void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size)
+s32 ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size)
 {
        u32 msgbuf[2];
+       s32 retval;
 
        msgbuf[0] = IXGBE_VF_SET_LPE;
        msgbuf[1] = max_size;
-       ixgbevf_write_msg_read_ack(hw, msgbuf, 2);
+
+       retval = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+       if (retval)
+               return retval;
+       if ((msgbuf[0] & IXGBE_VF_SET_LPE) &&
+           (msgbuf[0] & IXGBE_VT_MSGTYPE_NACK))
+               return IXGBE_ERR_MBX;
+
+       return 0;
 }
 
 /**
@@ -650,11 +704,8 @@ int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api)
        msg[0] = IXGBE_VF_API_NEGOTIATE;
        msg[1] = api;
        msg[2] = 0;
-       err = hw->mbx.ops.write_posted(hw, msg, 3, 0);
-
-       if (!err)
-               err = hw->mbx.ops.read_posted(hw, msg, 3, 0);
 
+       err = ixgbevf_write_msg_read_ack(hw, msg, msg, 3);
        if (!err) {
                msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
@@ -680,6 +731,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
        switch (hw->api_version) {
        case ixgbe_mbox_api_11:
        case ixgbe_mbox_api_12:
+       case ixgbe_mbox_api_13:
                break;
        default:
                return 0;
@@ -688,11 +740,8 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
        /* Fetch queue configuration from the PF */
        msg[0] = IXGBE_VF_GET_QUEUES;
        msg[1] = msg[2] = msg[3] = msg[4] = 0;
-       err = hw->mbx.ops.write_posted(hw, msg, 5, 0);
-
-       if (!err)
-               err = hw->mbx.ops.read_posted(hw, msg, 5, 0);
 
+       err = ixgbevf_write_msg_read_ack(hw, msg, msg, 5);
        if (!err) {
                msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
index 8851cb8..3efffe8 100644 (file)
@@ -34,6 +34,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef _IXGBE_VF_H_
 #define _IXGBE_VF_H_
 
+#include "ixgbe_type.h"
+
 #define IXGBE_VF_IRQ_CLEAR_MASK        7
 #define IXGBE_VF_MAX_TX_QUEUES 8
 #define IXGBE_VF_MAX_RX_QUEUES 8
@@ -114,6 +116,7 @@ struct ixgbevf_hw_stats {
        u64 saved_reset_vfmprc;
 };
 
+s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw);
 s32 ixgbe_init_hw_vf(struct ixgbe_hw *hw);
 s32 ixgbe_start_hw_vf(struct ixgbe_hw *hw);
 s32 ixgbe_reset_hw_vf(struct ixgbe_hw *hw);
@@ -131,9 +134,10 @@ s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr);
 s32 ixgbe_update_mc_addr_list_vf(struct ixgbe_hw *hw, u8 *mc_addr_list,
                                 u32 mc_addr_count, ixgbe_mc_addr_itr,
                                 bool clear);
+s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode);
 s32 ixgbe_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
                      bool vlan_on, bool vlvf_bypass);
-void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size);
+s32 ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size);
 int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api);
 int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
                       unsigned int *default_tc);
index 31dead0..499b1fa 100644 (file)
@@ -271,6 +271,7 @@ mac_reset_top:
        if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
                /* Save the SAN MAC RAR index */
                hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
+
                hw->mac.ops.set_rar(hw, hw->mac.san_mac_rar_index,
                                    hw->mac.san_addr, 0, IXGBE_RAH_AV);
 
@@ -490,7 +491,6 @@ s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw)
        u16 length = 0;
        u16 pointer = 0;
        u16 word = 0;
-       u16 checksum_last_word = IXGBE_EEPROM_CHECKSUM;
        u16 ptr_start = IXGBE_PCIE_ANALOG_PTR;
 
        /* Do not use hw->eeprom.ops.read because we do not want to take
@@ -500,14 +500,15 @@ s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw)
 
        DEBUGFUNC("ixgbe_calc_eeprom_checksum_X540");
 
-       /* Include 0x0-0x3F in the checksum */
-       for (i = 0; i <= checksum_last_word; i++) {
+       /* Include 0x0 up to IXGBE_EEPROM_CHECKSUM; do not include the
+        * checksum itself
+        */
+       for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) {
                if (ixgbe_read_eerd_generic(hw, i, &word)) {
                        DEBUGOUT("EEPROM read failed\n");
                        return IXGBE_ERR_EEPROM;
                }
-               if (i != IXGBE_EEPROM_CHECKSUM)
-                       checksum += word;
+               checksum += word;
        }
 
        /* Include all data from pointers 0x3, 0x6-0xE.  This excludes the
@@ -774,8 +775,10 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
                /* SW NVM semaphore bit is used for access to all
                 * SW_FW_SYNC bits (not just NVM)
                 */
-               if (ixgbe_get_swfw_sync_semaphore(hw))
+               if (ixgbe_get_swfw_sync_semaphore(hw)) {
+                       DEBUGOUT("Failed to get NVM access and register semaphore, returning IXGBE_ERR_SWFW_SYNC\n");
                        return IXGBE_ERR_SWFW_SYNC;
+               }
 
                swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC_BY_MAC(hw));
                if (!(swfw_sync & (fwmask | swmask | hwmask))) {
@@ -783,7 +786,6 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
                        IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC_BY_MAC(hw),
                                        swfw_sync);
                        ixgbe_release_swfw_sync_semaphore(hw);
-                       msec_delay(5);
                        return IXGBE_SUCCESS;
                }
                /* Firmware currently using resource (fwmask), hardware
@@ -798,6 +800,7 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
        if (swmask == IXGBE_GSSR_SW_MNG_SM) {
                ERROR_REPORT1(IXGBE_ERROR_POLLING,
                             "Failed to get SW only semaphore");
+               DEBUGOUT("Failed to get SW only semaphore, returning IXGBE_ERR_SWFW_SYNC\n");
                return IXGBE_ERR_SWFW_SYNC;
        }
 
@@ -806,8 +809,10 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
         * of the requested resource(s) while ignoring the corresponding FW/HW
         * bits in the SW_FW_SYNC register.
         */
-       if (ixgbe_get_swfw_sync_semaphore(hw))
+       if (ixgbe_get_swfw_sync_semaphore(hw)) {
+               DEBUGOUT("Failed to get NVM sempahore and register semaphore while forcefully ignoring FW sempahore bit(s) and setting SW semaphore bit(s), returning IXGBE_ERR_SWFW_SYNC\n");
                return IXGBE_ERR_SWFW_SYNC;
+       }
        swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC_BY_MAC(hw));
        if (swfw_sync & (fwmask | hwmask)) {
                swfw_sync |= swmask;
@@ -829,9 +834,11 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
                        rmask |= IXGBE_GSSR_I2C_MASK;
                ixgbe_release_swfw_sync_X540(hw, rmask);
                ixgbe_release_swfw_sync_semaphore(hw);
+               DEBUGOUT("Resource not released by other SW, returning IXGBE_ERR_SWFW_SYNC\n");
                return IXGBE_ERR_SWFW_SYNC;
        }
        ixgbe_release_swfw_sync_semaphore(hw);
+       DEBUGOUT("Returning error IXGBE_ERR_SWFW_SYNC\n");
 
        return IXGBE_ERR_SWFW_SYNC;
 }
@@ -860,7 +867,7 @@ void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
        IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC_BY_MAC(hw), swfw_sync);
 
        ixgbe_release_swfw_sync_semaphore(hw);
-       msec_delay(5);
+       msec_delay(2);
 }
 
 /**
@@ -982,6 +989,9 @@ s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index)
 
        DEBUGFUNC("ixgbe_blink_led_start_X540");
 
+       if (index > 3)
+               return IXGBE_ERR_PARAM;
+
        /*
         * Link should be up in order for the blink bit in the LED control
         * register to work. Force link and speed in the MAC if link is down.
@@ -1016,6 +1026,9 @@ s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index)
        u32 macc_reg;
        u32 ledctl_reg;
 
+       if (index > 3)
+               return IXGBE_ERR_PARAM;
+
        DEBUGFUNC("ixgbe_blink_led_stop_X540");
 
        /* Restore the LED to its default value. */
index aa6e859..6f9c034 100644 (file)
@@ -41,6 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
 STATIC s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed);
 STATIC s32 ixgbe_acquire_swfw_sync_X550a(struct ixgbe_hw *, u32 mask);
 STATIC void ixgbe_release_swfw_sync_X550a(struct ixgbe_hw *, u32 mask);
+STATIC s32 ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw);
 
 /**
  *  ixgbe_init_ops_X550 - Inits func ptrs and MAC type
@@ -61,7 +62,7 @@ s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw)
        mac->ops.dmac_config = ixgbe_dmac_config_X550;
        mac->ops.dmac_config_tcs = ixgbe_dmac_config_tcs_X550;
        mac->ops.dmac_update_tcs = ixgbe_dmac_update_tcs_X550;
-       mac->ops.setup_eee = ixgbe_setup_eee_X550;
+       mac->ops.setup_eee = NULL;
        mac->ops.set_source_address_pruning =
                        ixgbe_set_source_address_pruning_X550;
        mac->ops.set_ethertype_anti_spoofing =
@@ -82,6 +83,8 @@ s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw)
        mac->ops.mdd_event = ixgbe_mdd_event_X550;
        mac->ops.restore_mdd_vf = ixgbe_restore_mdd_vf_X550;
        mac->ops.disable_rx = ixgbe_disable_rx_x550;
+       /* Manageability interface */
+       mac->ops.set_fw_drv_ver = ixgbe_set_fw_drv_ver_x550;
        switch (hw->device_id) {
        case IXGBE_DEV_ID_X550EM_X_10G_T:
        case IXGBE_DEV_ID_X550EM_A_10G_T:
@@ -342,11 +345,10 @@ STATIC s32 ixgbe_read_phy_reg_mdi_22(struct ixgbe_hw *hw, u32 reg_addr,
        UNREFERENCED_1PARAMETER(dev_type);
 
        /* Setup and write the read command */
-       command = (reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
-               (reg_addr << IXGBE_MSCA_DEV_TYPE_SHIFT) |
-               (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
-               IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_READ |
-               IXGBE_MSCA_MDI_COMMAND;
+       command = (reg_addr << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+                 (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+                 IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_READ_AUTOINC |
+                 IXGBE_MSCA_MDI_COMMAND;
 
        IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
 
@@ -393,11 +395,10 @@ STATIC s32 ixgbe_write_phy_reg_mdi_22(struct ixgbe_hw *hw, u32 reg_addr,
        IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)phy_data);
 
        /* Setup and write the write command */
-       command = (reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
-               (reg_addr << IXGBE_MSCA_DEV_TYPE_SHIFT) |
-               (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
-               IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_WRITE |
-               IXGBE_MSCA_MDI_COMMAND;
+       command = (reg_addr << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+                 (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+                 IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_WRITE |
+                 IXGBE_MSCA_MDI_COMMAND;
 
        IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
 
@@ -422,43 +423,6 @@ STATIC s32 ixgbe_write_phy_reg_mdi_22(struct ixgbe_hw *hw, u32 reg_addr,
        return IXGBE_SUCCESS;
 }
 
-/**
- * ixgbe_identify_phy_1g - Get 1g PHY type based on device id
- * @hw: pointer to hardware structure
- *
- * Returns error code
- */
-STATIC s32 ixgbe_identify_phy_1g(struct ixgbe_hw *hw)
-{
-       u32 swfw_mask = hw->phy.phy_semaphore_mask;
-       u16 phy_id_high;
-       u16 phy_id_low;
-       s32 rc;
-
-       rc = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask);
-       if (rc)
-               return rc;
-
-       rc = ixgbe_read_phy_reg_mdi_22(hw, IXGBE_MDIO_PHY_ID_HIGH, 0,
-                                      &phy_id_high);
-       if (rc)
-               goto rel_out;
-
-       rc = ixgbe_read_phy_reg_mdi_22(hw, IXGBE_MDIO_PHY_ID_LOW, 0,
-                                      &phy_id_low);
-       if (rc)
-               goto rel_out;
-
-       hw->phy.id = (u32)phy_id_high << 16;
-       hw->phy.id |= phy_id_low & IXGBE_PHY_REVISION_MASK;
-       hw->phy.revision = (u32)phy_id_low & ~IXGBE_PHY_REVISION_MASK;
-
-rel_out:
-       hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-
-       return rc;
-}
-
 /**
  * ixgbe_identify_phy_x550em - Get PHY type based on device id
  * @hw: pointer to hardware structure
@@ -467,18 +431,15 @@ rel_out:
  */
 STATIC s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw)
 {
+       hw->mac.ops.set_lan_id(hw);
+
+       ixgbe_read_mng_if_sel_x550em(hw);
+
        switch (hw->device_id) {
        case IXGBE_DEV_ID_X550EM_A_SFP:
-               hw->phy.ops.read_reg = ixgbe_read_phy_reg_x550a;
-               hw->phy.ops.write_reg = ixgbe_write_phy_reg_x550a;
-               if (hw->bus.lan_id)
-                       hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM;
-               else
-                       hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM;
                return ixgbe_identify_module_generic(hw);
        case IXGBE_DEV_ID_X550EM_X_SFP:
                /* set up for CS4227 usage */
-               hw->phy.phy_semaphore_mask = IXGBE_GSSR_SHARED_I2C_SM;
                ixgbe_setup_mux_ctl(hw);
                ixgbe_check_cs4227(hw);
                /* Fallthrough */
@@ -489,30 +450,161 @@ STATIC s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw)
        case IXGBE_DEV_ID_X550EM_X_KX4:
                hw->phy.type = ixgbe_phy_x550em_kx4;
                break;
+       case IXGBE_DEV_ID_X550EM_X_XFI:
+               hw->phy.type = ixgbe_phy_x550em_xfi;
+               break;
        case IXGBE_DEV_ID_X550EM_X_KR:
        case IXGBE_DEV_ID_X550EM_A_KR:
        case IXGBE_DEV_ID_X550EM_A_KR_L:
                hw->phy.type = ixgbe_phy_x550em_kr;
                break;
+       case IXGBE_DEV_ID_X550EM_A_10G_T:
        case IXGBE_DEV_ID_X550EM_X_1G_T:
        case IXGBE_DEV_ID_X550EM_X_10G_T:
-       case IXGBE_DEV_ID_X550EM_A_10G_T:
                return ixgbe_identify_phy_generic(hw);
        case IXGBE_DEV_ID_X550EM_A_1G_T:
        case IXGBE_DEV_ID_X550EM_A_1G_T_L:
-               hw->phy.ops.read_reg = ixgbe_read_phy_reg_x550a;
-               hw->phy.ops.write_reg = ixgbe_write_phy_reg_x550a;
+               hw->phy.type = ixgbe_phy_fw;
+               hw->phy.ops.read_reg = NULL;
+               hw->phy.ops.write_reg = NULL;
                if (hw->bus.lan_id)
                        hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM;
                else
                        hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM;
-               return ixgbe_identify_phy_1g(hw);
+               break;
        default:
                break;
        }
        return IXGBE_SUCCESS;
 }
 
+/**
+ * ixgbe_fw_phy_activity - Perform an activity on a PHY
+ * @hw: pointer to hardware structure
+ * @activity: activity to perform
+ * @data: Pointer to 4 32-bit words of data
+ */
+s32 ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity,
+                         u32 (*data)[FW_PHY_ACT_DATA_COUNT])
+{
+       union {
+               struct ixgbe_hic_phy_activity_req cmd;
+               struct ixgbe_hic_phy_activity_resp rsp;
+       } hic;
+       u16 retries = FW_PHY_ACT_RETRIES;
+       s32 rc;
+       u16 i;
+
+       do {
+               memset(&hic, 0, sizeof(hic));
+               hic.cmd.hdr.cmd = FW_PHY_ACT_REQ_CMD;
+               hic.cmd.hdr.buf_len = FW_PHY_ACT_REQ_LEN;
+               hic.cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
+               hic.cmd.port_number = hw->bus.lan_id;
+               hic.cmd.activity_id = IXGBE_CPU_TO_LE16(activity);
+               for (i = 0; i < FW_PHY_ACT_DATA_COUNT; ++i)
+                       hic.cmd.data[i] = IXGBE_CPU_TO_BE32((*data)[i]);
+
+               rc = ixgbe_host_interface_command(hw, (u32 *)&hic.cmd,
+                                                 sizeof(hic.cmd),
+                                                 IXGBE_HI_COMMAND_TIMEOUT,
+                                                 true);
+               if (rc != IXGBE_SUCCESS)
+                       return rc;
+               if (hic.rsp.hdr.cmd_or_resp.ret_status ==
+                   FW_CEM_RESP_STATUS_SUCCESS) {
+                       for (i = 0; i < FW_PHY_ACT_DATA_COUNT; ++i)
+                               (*data)[i] = IXGBE_BE32_TO_CPU(hic.rsp.data[i]);
+                       return IXGBE_SUCCESS;
+               }
+               usec_delay(20);
+               --retries;
+       } while (retries > 0);
+
+       return IXGBE_ERR_HOST_INTERFACE_COMMAND;
+}
+
+static const struct {
+       u16 fw_speed;
+       ixgbe_link_speed phy_speed;
+} ixgbe_fw_map[] = {
+       { FW_PHY_ACT_LINK_SPEED_10, IXGBE_LINK_SPEED_10_FULL },
+       { FW_PHY_ACT_LINK_SPEED_100, IXGBE_LINK_SPEED_100_FULL },
+       { FW_PHY_ACT_LINK_SPEED_1G, IXGBE_LINK_SPEED_1GB_FULL },
+       { FW_PHY_ACT_LINK_SPEED_2_5G, IXGBE_LINK_SPEED_2_5GB_FULL },
+       { FW_PHY_ACT_LINK_SPEED_5G, IXGBE_LINK_SPEED_5GB_FULL },
+       { FW_PHY_ACT_LINK_SPEED_10G, IXGBE_LINK_SPEED_10GB_FULL },
+};
+
+/**
+ * ixgbe_get_phy_id_fw - Get the phy ID via firmware command
+ * @hw: pointer to hardware structure
+ *
+ * Returns error code
+ */
+static s32 ixgbe_get_phy_id_fw(struct ixgbe_hw *hw)
+{
+       u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 };
+       u16 phy_speeds;
+       u16 phy_id_lo;
+       s32 rc;
+       u16 i;
+
+       rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_PHY_INFO, &info);
+       if (rc)
+               return rc;
+
+       hw->phy.speeds_supported = 0;
+       phy_speeds = info[0] & FW_PHY_INFO_SPEED_MASK;
+       for (i = 0; i < sizeof(ixgbe_fw_map) / sizeof(ixgbe_fw_map[0]); ++i) {
+               if (phy_speeds & ixgbe_fw_map[i].fw_speed)
+                       hw->phy.speeds_supported |= ixgbe_fw_map[i].phy_speed;
+       }
+       if (!hw->phy.autoneg_advertised)
+               hw->phy.autoneg_advertised = hw->phy.speeds_supported;
+
+       hw->phy.id = info[0] & FW_PHY_INFO_ID_HI_MASK;
+       phy_id_lo = info[1] & FW_PHY_INFO_ID_LO_MASK;
+       hw->phy.id |= phy_id_lo & IXGBE_PHY_REVISION_MASK;
+       hw->phy.revision = phy_id_lo & ~IXGBE_PHY_REVISION_MASK;
+       if (!hw->phy.id || hw->phy.id == IXGBE_PHY_REVISION_MASK)
+               return IXGBE_ERR_PHY_ADDR_INVALID;
+       return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_identify_phy_fw - Get PHY type based on firmware command
+ * @hw: pointer to hardware structure
+ *
+ * Returns error code
+ */
+static s32 ixgbe_identify_phy_fw(struct ixgbe_hw *hw)
+{
+       if (hw->bus.lan_id)
+               hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY1_SM;
+       else
+               hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM;
+
+       hw->phy.type = ixgbe_phy_fw;
+       hw->phy.ops.read_reg = NULL;
+       hw->phy.ops.write_reg = NULL;
+       return ixgbe_get_phy_id_fw(hw);
+}
+
+/**
+ * ixgbe_shutdown_fw_phy - Shutdown a firmware-controlled PHY
+ * @hw: pointer to hardware structure
+ *
+ * Returns error code
+ */
+s32 ixgbe_shutdown_fw_phy(struct ixgbe_hw *hw)
+{
+       u32 setup[FW_PHY_ACT_DATA_COUNT] = { 0 };
+
+       setup[0] = FW_PHY_ACT_FORCE_LINK_DOWN_OFF;
+       return ixgbe_fw_phy_activity(hw, FW_PHY_ACT_FORCE_LINK_DOWN, &setup);
+}
+
 STATIC s32 ixgbe_read_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr,
                                     u32 device_type, u16 *phy_data)
 {
@@ -601,7 +693,6 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw)
        struct ixgbe_mac_info *mac = &hw->mac;
        struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
        struct ixgbe_phy_info *phy = &hw->phy;
-       struct ixgbe_link_info *link = &hw->link;
        s32 ret_val;
 
        DEBUGFUNC("ixgbe_init_ops_X550EM");
@@ -637,25 +728,6 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw)
        hw->bus.type = ixgbe_bus_type_internal;
        mac->ops.get_bus_info = ixgbe_get_bus_info_X550em;
 
-       if (hw->mac.type == ixgbe_mac_X550EM_x) {
-               mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550;
-               mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550;
-               mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550em;
-               mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550em;
-               link->ops.read_link = ixgbe_read_i2c_combined_generic;
-               link->ops.read_link_unlocked =
-                               ixgbe_read_i2c_combined_generic_unlocked;
-               link->ops.write_link = ixgbe_write_i2c_combined_generic;
-               link->ops.write_link_unlocked =
-                               ixgbe_write_i2c_combined_generic_unlocked;
-               link->addr = IXGBE_CS4227;
-       }
-       if (hw->mac.type == ixgbe_mac_X550EM_a) {
-               mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550;
-               mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550;
-               mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550a;
-               mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550a;
-       }
 
        mac->ops.get_media_type = ixgbe_get_media_type_X550em;
        mac->ops.setup_sfp = ixgbe_setup_sfp_modules_X550em;
@@ -666,25 +738,23 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw)
 
        if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper)
                mac->ops.setup_fc = ixgbe_setup_fc_generic;
-       else if (hw->mac.type == ixgbe_mac_X550EM_a) {
-               mac->ops.setup_fc = ixgbe_setup_fc_x550a;
-               mac->ops.fc_autoneg = ixgbe_fc_autoneg_x550a;
-       }
        else
                mac->ops.setup_fc = ixgbe_setup_fc_X550em;
 
+       /* PHY */
+       phy->ops.init = ixgbe_init_phy_ops_X550em;
        switch (hw->device_id) {
-       case IXGBE_DEV_ID_X550EM_X_KR:
-       case IXGBE_DEV_ID_X550EM_A_KR:
-       case IXGBE_DEV_ID_X550EM_A_KR_L:
+       case IXGBE_DEV_ID_X550EM_A_1G_T:
+       case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+               mac->ops.setup_fc = NULL;
+               phy->ops.identify = ixgbe_identify_phy_fw;
+               phy->ops.set_phy_power = NULL;
+               phy->ops.get_firmware_version = NULL;
                break;
        default:
-               mac->ops.setup_eee = NULL;
+               phy->ops.identify = ixgbe_identify_phy_x550em;
        }
 
-       /* PHY */
-       phy->ops.init = ixgbe_init_phy_ops_X550em;
-       phy->ops.identify = ixgbe_identify_phy_x550em;
        if (mac->ops.get_media_type(hw) != ixgbe_media_type_copper)
                phy->ops.set_phy_power = NULL;
 
@@ -702,6 +772,183 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw)
        return ret_val;
 }
 
+/**
+ * ixgbe_setup_fw_link - Setup firmware-controlled PHYs
+ * @hw: pointer to hardware structure
+ */
+static s32 ixgbe_setup_fw_link(struct ixgbe_hw *hw)
+{
+       u32 setup[FW_PHY_ACT_DATA_COUNT] = { 0 };
+       s32 rc;
+       u16 i;
+
+       if (hw->phy.reset_disable || ixgbe_check_reset_blocked(hw))
+               return 0;
+
+       if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
+               ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED,
+                             "ixgbe_fc_rx_pause not valid in strict IEEE mode\n");
+               return IXGBE_ERR_INVALID_LINK_SETTINGS;
+       }
+
+       switch (hw->fc.requested_mode) {
+       case ixgbe_fc_full:
+               setup[0] |= FW_PHY_ACT_SETUP_LINK_PAUSE_RXTX <<
+                           FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT;
+               break;
+       case ixgbe_fc_rx_pause:
+               setup[0] |= FW_PHY_ACT_SETUP_LINK_PAUSE_RX <<
+                           FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT;
+               break;
+       case ixgbe_fc_tx_pause:
+               setup[0] |= FW_PHY_ACT_SETUP_LINK_PAUSE_TX <<
+                           FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT;
+               break;
+       default:
+               break;
+       }
+
+       for (i = 0; i < sizeof(ixgbe_fw_map) / sizeof(ixgbe_fw_map[0]); ++i) {
+               if (hw->phy.autoneg_advertised & ixgbe_fw_map[i].phy_speed)
+                       setup[0] |= ixgbe_fw_map[i].fw_speed;
+       }
+       setup[0] |= FW_PHY_ACT_SETUP_LINK_HP | FW_PHY_ACT_SETUP_LINK_AN;
+
+       if (hw->phy.eee_speeds_advertised)
+               setup[0] |= FW_PHY_ACT_SETUP_LINK_EEE;
+
+       rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_SETUP_LINK, &setup);
+       if (rc)
+               return rc;
+       if (setup[0] == FW_PHY_ACT_SETUP_LINK_RSP_DOWN)
+               return IXGBE_ERR_OVERTEMP;
+       return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_fc_autoneg_fw _ Set up flow control for FW-controlled PHYs
+ * @hw: pointer to hardware structure
+ *
+ *  Called at init time to set up flow control.
+ */
+static s32 ixgbe_fc_autoneg_fw(struct ixgbe_hw *hw)
+{
+       if (hw->fc.requested_mode == ixgbe_fc_default)
+               hw->fc.requested_mode = ixgbe_fc_full;
+
+       return ixgbe_setup_fw_link(hw);
+}
+
+/**
+ * ixgbe_setup_eee_fw - Enable/disable EEE support
+ * @hw: pointer to the HW structure
+ * @enable_eee: boolean flag to enable EEE
+ *
+ * Enable/disable EEE based on enable_eee flag.
+ * This function controls EEE for firmware-based PHY implementations.
+ */
+static s32 ixgbe_setup_eee_fw(struct ixgbe_hw *hw, bool enable_eee)
+{
+       if (!!hw->phy.eee_speeds_advertised == enable_eee)
+               return IXGBE_SUCCESS;
+       if (enable_eee)
+               hw->phy.eee_speeds_advertised = hw->phy.eee_speeds_supported;
+       else
+               hw->phy.eee_speeds_advertised = 0;
+       return hw->phy.ops.setup_link(hw);
+}
+
+/**
+*  ixgbe_init_ops_X550EM_a - Inits func ptrs and MAC type
+*  @hw: pointer to hardware structure
+*
+*  Initialize the function pointers and for MAC type X550EM_a.
+*  Does not touch the hardware.
+**/
+s32 ixgbe_init_ops_X550EM_a(struct ixgbe_hw *hw)
+{
+       struct ixgbe_mac_info *mac = &hw->mac;
+       s32 ret_val;
+
+       DEBUGFUNC("ixgbe_init_ops_X550EM_a");
+
+       /* Start with generic X550EM init */
+       ret_val = ixgbe_init_ops_X550EM(hw);
+
+       if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII ||
+           hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII_L) {
+               mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550;
+               mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550;
+       } else {
+               mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550a;
+               mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550a;
+       }
+       mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550a;
+       mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550a;
+
+       switch (mac->ops.get_media_type(hw)) {
+       case ixgbe_media_type_fiber:
+               mac->ops.setup_fc = NULL;
+               mac->ops.fc_autoneg = ixgbe_fc_autoneg_fiber_x550em_a;
+               break;
+       case ixgbe_media_type_backplane:
+               mac->ops.fc_autoneg = ixgbe_fc_autoneg_backplane_x550em_a;
+               mac->ops.setup_fc = ixgbe_setup_fc_backplane_x550em_a;
+               break;
+       default:
+               break;
+       }
+
+       switch (hw->device_id) {
+       case IXGBE_DEV_ID_X550EM_A_1G_T:
+       case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+               mac->ops.fc_autoneg = ixgbe_fc_autoneg_sgmii_x550em_a;
+               mac->ops.setup_fc = ixgbe_fc_autoneg_fw;
+               mac->ops.setup_eee = ixgbe_setup_eee_fw;
+               hw->phy.eee_speeds_supported = IXGBE_LINK_SPEED_100_FULL |
+                                              IXGBE_LINK_SPEED_1GB_FULL;
+               hw->phy.eee_speeds_advertised = hw->phy.eee_speeds_supported;
+               break;
+       default:
+               break;
+       }
+
+       return ret_val;
+}
+
+/**
+*  ixgbe_init_ops_X550EM_x - Inits func ptrs and MAC type
+*  @hw: pointer to hardware structure
+*
+*  Initialize the function pointers and for MAC type X550EM_x.
+*  Does not touch the hardware.
+**/
+s32 ixgbe_init_ops_X550EM_x(struct ixgbe_hw *hw)
+{
+       struct ixgbe_mac_info *mac = &hw->mac;
+       struct ixgbe_link_info *link = &hw->link;
+       s32 ret_val;
+
+       DEBUGFUNC("ixgbe_init_ops_X550EM_x");
+
+       /* Start with generic X550EM init */
+       ret_val = ixgbe_init_ops_X550EM(hw);
+
+       mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550;
+       mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550;
+       mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550em;
+       mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550em;
+       link->ops.read_link = ixgbe_read_i2c_combined_generic;
+       link->ops.read_link_unlocked = ixgbe_read_i2c_combined_generic_unlocked;
+       link->ops.write_link = ixgbe_write_i2c_combined_generic;
+       link->ops.write_link_unlocked =
+                                     ixgbe_write_i2c_combined_generic_unlocked;
+       link->addr = IXGBE_CS4227;
+
+
+       return ret_val;
+}
+
 /**
  *  ixgbe_dmac_config_X550
  *  @hw: pointer to hardware structure
@@ -765,6 +1012,7 @@ s32 ixgbe_dmac_config_tcs_X550(struct ixgbe_hw *hw)
 
        /* Configure DMA coalescing enabled */
        switch (hw->mac.dmac_config.link_speed) {
+       case IXGBE_LINK_SPEED_10_FULL:
        case IXGBE_LINK_SPEED_100_FULL:
                pb_headroom = IXGBE_DMACRXT_100M;
                break;
@@ -864,158 +1112,6 @@ s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw)
        return IXGBE_SUCCESS;
 }
 
-/**
- * ixgbe_enable_eee_x550 - Enable EEE support
- * @hw: pointer to hardware structure
- */
-STATIC s32 ixgbe_enable_eee_x550(struct ixgbe_hw *hw)
-{
-       u16 autoneg_eee_reg;
-       u32 link_reg;
-       s32 status;
-
-       if (hw->mac.type == ixgbe_mac_X550) {
-               /* Advertise EEE capability */
-               hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT,
-                                    IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                    &autoneg_eee_reg);
-
-               autoneg_eee_reg |= (IXGBE_AUTO_NEG_10GBASE_EEE_ADVT |
-                                   IXGBE_AUTO_NEG_1000BASE_EEE_ADVT |
-                                   IXGBE_AUTO_NEG_100BASE_EEE_ADVT);
-
-               hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                     autoneg_eee_reg);
-               return IXGBE_SUCCESS;
-       }
-
-       switch (hw->device_id) {
-       case IXGBE_DEV_ID_X550EM_X_KR:
-       case IXGBE_DEV_ID_X550EM_A_KR:
-       case IXGBE_DEV_ID_X550EM_A_KR_L:
-               status = hw->mac.ops.read_iosf_sb_reg(hw,
-                                    IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                                    IXGBE_SB_IOSF_TARGET_KR_PHY, &link_reg);
-               if (status != IXGBE_SUCCESS)
-                       return status;
-
-               link_reg |= IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR |
-                       IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX;
-
-               /* Don't advertise FEC capability when EEE enabled. */
-               link_reg &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_FEC;
-
-               status = hw->mac.ops.write_iosf_sb_reg(hw,
-                                     IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                                     IXGBE_SB_IOSF_TARGET_KR_PHY, link_reg);
-               if (status != IXGBE_SUCCESS)
-                       return status;
-               break;
-       default:
-               break;
-       }
-
-       return IXGBE_SUCCESS;
-}
-
-/**
- * ixgbe_disable_eee_x550 - Disable EEE support
- * @hw: pointer to hardware structure
- */
-STATIC s32 ixgbe_disable_eee_x550(struct ixgbe_hw *hw)
-{
-       u16 autoneg_eee_reg;
-       u32 link_reg;
-       s32 status;
-
-       if (hw->mac.type == ixgbe_mac_X550) {
-               /* Disable advertised EEE capability */
-               hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT,
-                                    IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                    &autoneg_eee_reg);
-
-               autoneg_eee_reg &= ~(IXGBE_AUTO_NEG_10GBASE_EEE_ADVT |
-                                    IXGBE_AUTO_NEG_1000BASE_EEE_ADVT |
-                                    IXGBE_AUTO_NEG_100BASE_EEE_ADVT);
-
-               hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                     autoneg_eee_reg);
-               return IXGBE_SUCCESS;
-       }
-
-       switch (hw->device_id) {
-       case IXGBE_DEV_ID_X550EM_X_KR:
-       case IXGBE_DEV_ID_X550EM_A_KR:
-       case IXGBE_DEV_ID_X550EM_A_KR_L:
-               status = hw->mac.ops.read_iosf_sb_reg(hw,
-                                    IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                                    IXGBE_SB_IOSF_TARGET_KR_PHY, &link_reg);
-               if (status != IXGBE_SUCCESS)
-                       return status;
-
-               link_reg &= ~(IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR |
-                             IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX);
-
-               /* Advertise FEC capability when EEE is disabled. */
-               link_reg |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_FEC;
-
-               status = hw->mac.ops.write_iosf_sb_reg(hw,
-                                     IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                                     IXGBE_SB_IOSF_TARGET_KR_PHY, link_reg);
-               if (status != IXGBE_SUCCESS)
-                       return status;
-               break;
-       default:
-               break;
-       }
-
-       return IXGBE_SUCCESS;
-}
-
-/**
- *  ixgbe_setup_eee_X550 - Enable/disable EEE support
- *  @hw: pointer to the HW structure
- *  @enable_eee: boolean flag to enable EEE
- *
- *  Enable/disable EEE based on enable_eee flag.
- *  Auto-negotiation must be started after BASE-T EEE bits in PHY register 7.3C
- *  are modified.
- *
- **/
-s32 ixgbe_setup_eee_X550(struct ixgbe_hw *hw, bool enable_eee)
-{
-       s32 status;
-       u32 eeer;
-
-       DEBUGFUNC("ixgbe_setup_eee_X550");
-
-       eeer = IXGBE_READ_REG(hw, IXGBE_EEER);
-       /* Enable or disable EEE per flag */
-       if (enable_eee) {
-               eeer |= (IXGBE_EEER_TX_LPI_EN | IXGBE_EEER_RX_LPI_EN);
-
-               /* Not supported on first revision of X550EM_x. */
-               if ((hw->mac.type == ixgbe_mac_X550EM_x) &&
-                   !(IXGBE_FUSES0_REV_MASK &
-                     IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0))))
-                       return IXGBE_SUCCESS;
-               status = ixgbe_enable_eee_x550(hw);
-               if (status)
-                       return status;
-       } else {
-               eeer &= ~(IXGBE_EEER_TX_LPI_EN | IXGBE_EEER_RX_LPI_EN);
-
-               status = ixgbe_disable_eee_x550(hw);
-               if (status)
-                       return status;
-       }
-       IXGBE_WRITE_REG(hw, IXGBE_EEER, eeer);
-
-       return IXGBE_SUCCESS;
-}
-
 /**
  * ixgbe_set_source_address_pruning_X550 - Enable/Disbale source address pruning
  * @hw: pointer to hardware structure
@@ -1102,8 +1198,8 @@ STATIC s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl)
 }
 
 /**
- *  ixgbe_write_iosf_sb_reg_x550 - Writes a value to specified register of the IOSF
- *  device
+ *  ixgbe_write_iosf_sb_reg_x550 - Writes a value to specified register
+ *  of the IOSF device
  *  @hw: pointer to hardware structure
  *  @reg_addr: 32 bit PHY register to write
  *  @device_type: 3 bit device type
@@ -1149,12 +1245,11 @@ out:
 }
 
 /**
- *  ixgbe_read_iosf_sb_reg_x550 - Writes a value to specified register of the IOSF
- *  device
+ *  ixgbe_read_iosf_sb_reg_x550 - Reads specified register of the IOSF device
  *  @hw: pointer to hardware structure
  *  @reg_addr: 32 bit PHY register to write
  *  @device_type: 3 bit device type
- *  @phy_data: Pointer to read data from the register
+ *  @data: Pointer to read data from the register
  **/
 s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
                           u32 device_type, u32 *data)
@@ -1216,13 +1311,20 @@ s32 ixgbe_get_phy_token(struct ixgbe_hw *hw)
                                              sizeof(token_cmd),
                                              IXGBE_HI_COMMAND_TIMEOUT,
                                              true);
-       if (status)
+       if (status) {
+               DEBUGOUT1("Issuing host interface command failed with Status = %d\n",
+                         status);
                return status;
+       }
        if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK)
                return IXGBE_SUCCESS;
-       if (token_cmd.hdr.cmd_or_resp.ret_status != FW_PHY_TOKEN_RETRY)
+       if (token_cmd.hdr.cmd_or_resp.ret_status != FW_PHY_TOKEN_RETRY) {
+               DEBUGOUT1("Host interface command returned 0x%08x , returning IXGBE_ERR_FW_RESP_INVALID\n",
+                         token_cmd.hdr.cmd_or_resp.ret_status);
                return IXGBE_ERR_FW_RESP_INVALID;
+       }
 
+       DEBUGOUT("Returning  IXGBE_ERR_TOKEN_RETRY\n");
        return IXGBE_ERR_TOKEN_RETRY;
 }
 
@@ -1278,7 +1380,7 @@ s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
        write_cmd.port_number = hw->bus.lan_id;
        write_cmd.command_type = FW_INT_PHY_REQ_WRITE;
        write_cmd.address = IXGBE_CPU_TO_BE16(reg_addr);
-       write_cmd.write_data = IXGBE_CPU_TO_LE32(data);
+       write_cmd.write_data = IXGBE_CPU_TO_BE32(data);
 
        status = ixgbe_host_interface_command(hw, (u32 *)&write_cmd,
                                              sizeof(write_cmd),
@@ -1288,8 +1390,7 @@ s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
 }
 
 /**
- *  ixgbe_read_iosf_sb_reg_x550a - Writes a value to specified register
- *  of the IOSF device.
+ *  ixgbe_read_iosf_sb_reg_x550a - Reads specified register of the IOSF device
  *  @hw: pointer to hardware structure
  *  @reg_addr: 32 bit PHY register to write
  *  @device_type: 3 bit device type
@@ -1318,7 +1419,7 @@ s32 ixgbe_read_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
                                              IXGBE_HI_COMMAND_TIMEOUT, true);
 
        /* Extract the register value from the response. */
-       *data = IXGBE_LE32_TO_CPU(hic.rsp.read_data);
+       *data = IXGBE_BE32_TO_CPU(hic.rsp.read_data);
 
        return status;
 }
@@ -1482,6 +1583,7 @@ enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw)
        switch (hw->device_id) {
        case IXGBE_DEV_ID_X550EM_X_KR:
        case IXGBE_DEV_ID_X550EM_X_KX4:
+       case IXGBE_DEV_ID_X550EM_X_XFI:
        case IXGBE_DEV_ID_X550EM_A_KR:
        case IXGBE_DEV_ID_X550EM_A_KR_L:
                media_type = ixgbe_media_type_backplane;
@@ -1506,7 +1608,6 @@ enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw)
        case IXGBE_DEV_ID_X550EM_A_1G_T:
        case IXGBE_DEV_ID_X550EM_A_1G_T_L:
                media_type = ixgbe_media_type_copper;
-               hw->phy.type = ixgbe_phy_m88;
                break;
        default:
                media_type = ixgbe_media_type_unknown;
@@ -1598,17 +1699,63 @@ s32 ixgbe_setup_sfp_modules_X550em(struct ixgbe_hw *hw)
        return IXGBE_SUCCESS;
 }
 
+/**
+*  ixgbe_restart_an_internal_phy_x550em - restart autonegotiation for the
+*  internal PHY
+*  @hw: pointer to hardware structure
+**/
+STATIC s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw)
+{
+       s32 status;
+       u32 link_ctrl;
+
+       /* Restart auto-negotiation. */
+       status = hw->mac.ops.read_iosf_sb_reg(hw,
+                                      IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+                                      IXGBE_SB_IOSF_TARGET_KR_PHY, &link_ctrl);
+
+       if (status) {
+               DEBUGOUT("Auto-negotiation did not complete\n");
+               return status;
+       }
+
+       link_ctrl |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+       status = hw->mac.ops.write_iosf_sb_reg(hw,
+                                       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, link_ctrl);
+
+       if (hw->mac.type == ixgbe_mac_X550EM_a) {
+               u32 flx_mask_st20;
+
+               /* Indicate to FW that AN restart has been asserted */
+               status = hw->mac.ops.read_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_mask_st20);
+
+               if (status) {
+                       DEBUGOUT("Auto-negotiation did not complete\n");
+                       return status;
+               }
+
+               flx_mask_st20 |= IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART;
+               status = hw->mac.ops.write_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, flx_mask_st20);
+       }
+
+       return status;
+}
+
 /**
  * ixgbe_setup_sgmii - Set up link for sgmii
  * @hw: pointer to hardware structure
  */
 STATIC s32 ixgbe_setup_sgmii(struct ixgbe_hw *hw, ixgbe_link_speed speed,
-                            bool autoneg_wait_to_complete)
+                            bool autoneg_wait)
 {
        struct ixgbe_mac_info *mac = &hw->mac;
-       u32 lval, sval;
+       u32 lval, sval, flx_val;
        s32 rc;
-       UNREFERENCED_2PARAMETER(speed, autoneg_wait_to_complete);
 
        rc = mac->ops.read_iosf_sb_reg(hw,
                                       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
@@ -1641,12 +1788,100 @@ STATIC s32 ixgbe_setup_sgmii(struct ixgbe_hw *hw, ixgbe_link_speed speed,
        if (rc)
                return rc;
 
-       lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+       rc = mac->ops.read_iosf_sb_reg(hw,
+                                   IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                                   IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val);
+       if (rc)
+               return rc;
+
+       flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+       flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G;
+       flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+       flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+       flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+
+       rc = mac->ops.write_iosf_sb_reg(hw,
+                                   IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                                   IXGBE_SB_IOSF_TARGET_KR_PHY, flx_val);
+       if (rc)
+               return rc;
+
+       rc = ixgbe_restart_an_internal_phy_x550em(hw);
+       if (rc)
+               return rc;
+
+       return hw->phy.ops.setup_link_speed(hw, speed, autoneg_wait);
+}
+
+/**
+ * ixgbe_setup_sgmii_fw - Set up link for sgmii with firmware-controlled PHYs
+ * @hw: pointer to hardware structure
+ */
+STATIC s32 ixgbe_setup_sgmii_fw(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+                               bool autoneg_wait)
+{
+       struct ixgbe_mac_info *mac = &hw->mac;
+       u32 lval, sval, flx_val;
+       s32 rc;
+
+       rc = mac->ops.read_iosf_sb_reg(hw,
+                                      IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+                                      IXGBE_SB_IOSF_TARGET_KR_PHY, &lval);
+       if (rc)
+               return rc;
+
+       lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
+       lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK;
+       lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_SGMII_EN;
+       lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CLAUSE_37_EN;
+       lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G;
        rc = mac->ops.write_iosf_sb_reg(hw,
                                        IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
                                        IXGBE_SB_IOSF_TARGET_KR_PHY, lval);
+       if (rc)
+               return rc;
 
-       return rc;
+       rc = mac->ops.read_iosf_sb_reg(hw,
+                                      IXGBE_KRM_SGMII_CTRL(hw->bus.lan_id),
+                                      IXGBE_SB_IOSF_TARGET_KR_PHY, &sval);
+       if (rc)
+               return rc;
+
+       sval &= ~IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_10_D;
+       sval &= ~IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_100_D;
+       rc = mac->ops.write_iosf_sb_reg(hw,
+                                       IXGBE_KRM_SGMII_CTRL(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, sval);
+       if (rc)
+               return rc;
+
+       rc = mac->ops.write_iosf_sb_reg(hw,
+                                       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, lval);
+       if (rc)
+               return rc;
+
+       rc = mac->ops.read_iosf_sb_reg(hw,
+                                   IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                                   IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val);
+       if (rc)
+               return rc;
+
+       flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+       flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN;
+       flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+       flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+       flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+
+       rc = mac->ops.write_iosf_sb_reg(hw,
+                                   IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                                   IXGBE_SB_IOSF_TARGET_KR_PHY, flx_val);
+       if (rc)
+               return rc;
+
+       rc = ixgbe_restart_an_internal_phy_x550em(hw);
+
+       return hw->phy.ops.setup_link_speed(hw, speed, autoneg_wait);
 }
 
 /**
@@ -1670,17 +1905,30 @@ void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw)
                mac->ops.setup_link = ixgbe_setup_mac_link_multispeed_fiber;
                mac->ops.set_rate_select_speed =
                                        ixgbe_set_soft_rate_select_speed;
+
                if ((hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N) ||
                    (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP))
                        mac->ops.setup_mac_link =
-                               ixgbe_setup_mac_link_sfp_x550a;
+                                               ixgbe_setup_mac_link_sfp_x550a;
                else
                        mac->ops.setup_mac_link =
-                               ixgbe_setup_mac_link_sfp_x550em;
+                                               ixgbe_setup_mac_link_sfp_x550em;
                break;
        case ixgbe_media_type_copper:
-               mac->ops.setup_link = ixgbe_setup_mac_link_t_X550em;
-               mac->ops.check_link = ixgbe_check_link_t_X550em;
+               if (hw->mac.type == ixgbe_mac_X550EM_a) {
+                       if (hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T ||
+                           hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L) {
+                               mac->ops.setup_link = ixgbe_setup_sgmii_fw;
+                               mac->ops.check_link =
+                                                  ixgbe_check_mac_link_generic;
+                       } else {
+                               mac->ops.setup_link =
+                                                 ixgbe_setup_mac_link_t_X550em;
+                       }
+               } else {
+                       mac->ops.setup_link = ixgbe_setup_mac_link_t_X550em;
+                       mac->ops.check_link = ixgbe_check_link_t_X550em;
+               }
                break;
        case ixgbe_media_type_backplane:
                if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII ||
@@ -1704,6 +1952,13 @@ s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw,
 {
        DEBUGFUNC("ixgbe_get_link_capabilities_X550em");
 
+
+       if (hw->phy.type == ixgbe_phy_fw) {
+               *autoneg = true;
+               *speed = hw->phy.speeds_supported;
+               return 0;
+       }
+
        /* SFP */
        if (hw->phy.media_type == ixgbe_media_type_fiber) {
 
@@ -1727,13 +1982,24 @@ s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw,
                        *speed = IXGBE_LINK_SPEED_10GB_FULL;
        } else {
                switch (hw->phy.type) {
-               case ixgbe_phy_m88:
-                       *speed = IXGBE_LINK_SPEED_100_FULL |
-                                IXGBE_LINK_SPEED_1GB_FULL;
-                       break;
+               case ixgbe_phy_ext_1g_t:
                case ixgbe_phy_sgmii:
                        *speed = IXGBE_LINK_SPEED_1GB_FULL;
                        break;
+               case ixgbe_phy_x550em_kr:
+                       if (hw->mac.type == ixgbe_mac_X550EM_a) {
+                               /* check different backplane modes */
+                               if (hw->phy.nw_mng_if_sel &
+                                          IXGBE_NW_MNG_IF_SEL_PHY_SPEED_2_5G) {
+                                       *speed = IXGBE_LINK_SPEED_2_5GB_FULL;
+                                       break;
+                               } else if (hw->device_id ==
+                                                  IXGBE_DEV_ID_X550EM_A_KR_L) {
+                                       *speed = IXGBE_LINK_SPEED_1GB_FULL;
+                                       break;
+                               }
+                       }
+                       /* fall through */
                default:
                        *speed = IXGBE_LINK_SPEED_10GB_FULL |
                                 IXGBE_LINK_SPEED_1GB_FULL;
@@ -1854,19 +2120,32 @@ STATIC s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
        status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc);
 
        /* Enable link status change alarm */
-       status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg);
 
-       if (status != IXGBE_SUCCESS)
-               return status;
+       /* Enable the LASI interrupts on X552 devices to receive notifications
+        * of the link configurations of the external PHY and correspondingly
+        * support the configuration of the internal iXFI link, since iXFI does
+        * not support auto-negotiation. This is not required for X553 devices
+        * having KR support, which performs auto-negotiations and which is used
+        * as the internal link to the external PHY. Hence adding a check here
+        * to avoid enabling LASI interrupts for X553 devices.
+        */
+       if (hw->mac.type != ixgbe_mac_X550EM_a) {
+               status = hw->phy.ops.read_reg(hw,
+                                       IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
+                                       IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg);
 
-       reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN;
+               if (status != IXGBE_SUCCESS)
+                       return status;
 
-       status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
-                                      IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg);
+               reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN;
 
-       if (status != IXGBE_SUCCESS)
-               return status;
+               status = hw->phy.ops.write_reg(hw,
+                                       IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
+                                       IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg);
+
+               if (status != IXGBE_SUCCESS)
+                       return status;
+       }
 
        /* Enable high temperature failure and global fault alarms */
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
@@ -1935,8 +2214,8 @@ STATIC s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw,
        u32 reg_val;
 
        status = hw->mac.ops.read_iosf_sb_reg(hw,
-                     IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                     IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+                                       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
        if (status)
                return status;
 
@@ -1952,182 +2231,75 @@ STATIC s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw,
        if (speed & IXGBE_LINK_SPEED_1GB_FULL)
                reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX;
 
-       /* Restart auto-negotiation. */
-       reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
        status = hw->mac.ops.write_iosf_sb_reg(hw,
-                      IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                      IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
-
-       return status;
-}
+                                       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
 
-/**
- * ixgbe_set_master_slave_mode - Set up PHY for master/slave mode
- * @hw: pointer to hardware structure
- *
- * Must be called while holding the PHY semaphore and token
- */
-STATIC s32 ixgbe_set_master_slave_mode(struct ixgbe_hw *hw)
-{
-       u16 phy_data;
-       s32 rc;
+       if (hw->mac.type == ixgbe_mac_X550EM_a) {
+               /* Set lane mode  to KR auto negotiation */
+               status = hw->mac.ops.read_iosf_sb_reg(hw,
+                                   IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                                   IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
 
-       /* Resolve master/slave mode */
-       rc = ixgbe_read_phy_reg_mdi_22(hw, IXGBE_M88E1500_1000T_CTRL, 0,
-                                      &phy_data);
-       if (rc)
-               return rc;
+               if (status)
+                       return status;
 
-       /* load defaults for future use */
-       if (phy_data & IXGBE_M88E1500_1000T_CTRL_MS_ENABLE) {
-               if (phy_data & IXGBE_M88E1500_1000T_CTRL_MS_VALUE)
-                       hw->phy.original_ms_type = ixgbe_ms_force_master;
-               else
-                       hw->phy.original_ms_type = ixgbe_ms_force_slave;
-       } else {
-               hw->phy.original_ms_type = ixgbe_ms_auto;
-       }
+               reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+               reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN;
+               reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+               reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+               reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
 
-       switch (hw->phy.ms_type) {
-       case ixgbe_ms_force_master:
-               phy_data |= IXGBE_M88E1500_1000T_CTRL_MS_ENABLE;
-               phy_data |= IXGBE_M88E1500_1000T_CTRL_MS_VALUE;
-               break;
-       case ixgbe_ms_force_slave:
-               phy_data |= IXGBE_M88E1500_1000T_CTRL_MS_ENABLE;
-               phy_data &= ~IXGBE_M88E1500_1000T_CTRL_MS_VALUE;
-               break;
-       case ixgbe_ms_auto:
-               phy_data &= ~IXGBE_M88E1500_1000T_CTRL_MS_ENABLE;
-               break;
-       default:
-               break;
+               status = hw->mac.ops.write_iosf_sb_reg(hw,
+                                   IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                                   IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
        }
 
-       return ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_1000T_CTRL, 0,
-                                         phy_data);
-}
-
-/**
- * ixgbe_reset_phy_m88_nolock - Reset m88 PHY without locking
- * @hw: pointer to hardware structure
- *
- * Must be called while holding the PHY semaphore and token
- */
-STATIC s32 ixgbe_reset_phy_m88_nolock(struct ixgbe_hw *hw)
-{
-       s32 rc;
-
-       rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 1);
-       if (rc)
-               return rc;
-
-       rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_FIBER_CTRL, 0,
-                                       IXGBE_M88E1500_FIBER_CTRL_RESET |
-                                       IXGBE_M88E1500_FIBER_CTRL_DUPLEX_FULL |
-                                       IXGBE_M88E1500_FIBER_CTRL_SPEED_MSB);
-       if (rc)
-               goto res_out;
-
-       rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 18);
-       if (rc)
-               goto res_out;
-
-       rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_GEN_CTRL, 0,
-                                       IXGBE_M88E1500_GEN_CTRL_RESET |
-                                       IXGBE_M88E1500_GEN_CTRL_SGMII_COPPER);
-       if (rc)
-               goto res_out;
-
-       rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 0);
-       if (rc)
-               goto res_out;
-
-       rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_COPPER_CTRL, 0,
-                                       IXGBE_M88E1500_COPPER_CTRL_RESET |
-                                       IXGBE_M88E1500_COPPER_CTRL_AN_EN |
-                                       IXGBE_M88E1500_COPPER_CTRL_RESTART_AN |
-                                       IXGBE_M88E1500_COPPER_CTRL_FULL_DUPLEX |
-                                       IXGBE_M88E1500_COPPER_CTRL_SPEED_MSB);
-
-res_out:
-       ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 0);
-       return rc;
+       return ixgbe_restart_an_internal_phy_x550em(hw);
 }
 
 /**
- * ixgbe_reset_phy_m88 - Reset m88 PHY
+ * ixgbe_reset_phy_fw - Reset firmware-controlled PHYs
  * @hw: pointer to hardware structure
  */
-STATIC s32 ixgbe_reset_phy_m88(struct ixgbe_hw *hw)
+static s32 ixgbe_reset_phy_fw(struct ixgbe_hw *hw)
 {
-       u32 swfw_mask = hw->phy.phy_semaphore_mask;
+       u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 };
        s32 rc;
 
        if (hw->phy.reset_disable || ixgbe_check_reset_blocked(hw))
                return IXGBE_SUCCESS;
 
-       rc = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask);
+       rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_PHY_SW_RESET, &store);
        if (rc)
                return rc;
+       memset(store, 0, sizeof(store));
 
-       rc = ixgbe_reset_phy_m88_nolock(hw);
+       rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_INIT_PHY, &store);
+       if (rc)
+               return rc;
 
-       hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-       return rc;
+       return ixgbe_setup_fw_link(hw);
 }
 
 /**
- * ixgbe_setup_m88 - setup m88 PHY
+ * ixgbe_check_overtemp_fw - Check firmware-controlled PHYs for overtemp
  * @hw: pointer to hardware structure
  */
-STATIC s32 ixgbe_setup_m88(struct ixgbe_hw *hw)
+static s32 ixgbe_check_overtemp_fw(struct ixgbe_hw *hw)
 {
-       u32 swfw_mask = hw->phy.phy_semaphore_mask;
-       struct ixgbe_phy_info *phy = &hw->phy;
-       u16 phy_data;
+       u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 };
        s32 rc;
 
-       if (phy->reset_disable || ixgbe_check_reset_blocked(hw))
-               return IXGBE_SUCCESS;
-
-       rc = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask);
+       rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &store);
        if (rc)
                return rc;
 
-       rc = ixgbe_read_phy_reg_mdi_22(hw, IXGBE_M88E1500_PHY_SPEC_CTRL, 0,
-                                      &phy_data);
-       if (rc)
-               goto rel_out;
-
-       /* Enable downshift and setting it to X6 */
-       phy_data &= ~IXGBE_M88E1500_PSCR_DOWNSHIFT_ENABLE;
-       phy_data |= IXGBE_M88E1500_PSCR_DOWNSHIFT_6X;
-       phy_data |= IXGBE_M88E1500_PSCR_DOWNSHIFT_ENABLE;
-       rc = ixgbe_write_phy_reg_mdi_22(hw,
-                                       IXGBE_M88E1500_PHY_SPEC_CTRL, 0,
-                                       phy_data);
-       if (rc)
-               goto rel_out;
-
-       ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 0);
-
-       /* Commit the changes */
-       rc = ixgbe_reset_phy_m88_nolock(hw);
-       if (rc) {
-               DEBUGOUT("Error committing the PHY changes\n");
-               goto rel_out;
+       if (store[0] & FW_PHY_ACT_GET_LINK_INFO_TEMP) {
+               ixgbe_shutdown_fw_phy(hw);
+               return IXGBE_ERR_OVERTEMP;
        }
-
-       rc = ixgbe_set_master_slave_mode(hw);
-
-       hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-       return rc;
-
-rel_out:
-       ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 0);
-       hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-       return rc;
+       return IXGBE_SUCCESS;
 }
 
 /**
@@ -2151,7 +2323,7 @@ STATIC s32 ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw)
            hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) {
                hw->phy.addr = (hw->phy.nw_mng_if_sel &
                                IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
-                               IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
+                              IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
        }
 
        return IXGBE_SUCCESS;
@@ -2173,7 +2345,6 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
        DEBUGFUNC("ixgbe_init_phy_ops_X550em");
 
        hw->mac.ops.set_lan_id(hw);
-
        ixgbe_read_mng_if_sel_x550em(hw);
 
        if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) {
@@ -2182,9 +2353,45 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
                phy->ops.identify_sfp = ixgbe_identify_sfp_module_X550em;
        }
 
+       switch (hw->device_id) {
+       case IXGBE_DEV_ID_X550EM_A_1G_T:
+       case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+               phy->ops.read_reg_mdi = ixgbe_read_phy_reg_mdi_22;
+               phy->ops.write_reg_mdi = ixgbe_write_phy_reg_mdi_22;
+               hw->phy.ops.read_reg = ixgbe_read_phy_reg_x550a;
+               hw->phy.ops.write_reg = ixgbe_write_phy_reg_x550a;
+               phy->ops.check_overtemp = ixgbe_check_overtemp_fw;
+               if (hw->bus.lan_id)
+                       hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM;
+               else
+                       hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM;
+
+               break;
+       case IXGBE_DEV_ID_X550EM_A_10G_T:
+       case IXGBE_DEV_ID_X550EM_A_SFP:
+               hw->phy.ops.read_reg = ixgbe_read_phy_reg_x550a;
+               hw->phy.ops.write_reg = ixgbe_write_phy_reg_x550a;
+               if (hw->bus.lan_id)
+                       hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM;
+               else
+                       hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM;
+               break;
+       case IXGBE_DEV_ID_X550EM_X_SFP:
+               /* set up for CS4227 usage */
+               hw->phy.phy_semaphore_mask = IXGBE_GSSR_SHARED_I2C_SM;
+               break;
+       case IXGBE_DEV_ID_X550EM_X_1G_T:
+               phy->ops.read_reg_mdi = ixgbe_read_phy_reg_mdi_22;
+               phy->ops.write_reg_mdi = ixgbe_write_phy_reg_mdi_22;
+               break;
+       default:
+               break;
+       }
+
        /* Identify the PHY or SFP module */
        ret_val = phy->ops.identify(hw);
-       if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED)
+       if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED ||
+           ret_val == IXGBE_ERR_PHY_ADDR_INVALID)
                return ret_val;
 
        /* Setup function pointers based on detected hardware */
@@ -2204,6 +2411,16 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
                phy->ops.read_reg = ixgbe_read_phy_reg_x550em;
                phy->ops.write_reg = ixgbe_write_phy_reg_x550em;
                break;
+       case ixgbe_phy_ext_1g_t:
+               /* link is managed by FW */
+               phy->ops.setup_link = NULL;
+               break;
+       case ixgbe_phy_x550em_xfi:
+               /* link is managed by HW */
+               phy->ops.setup_link = NULL;
+               phy->ops.read_reg = ixgbe_read_phy_reg_x550em;
+               phy->ops.write_reg = ixgbe_write_phy_reg_x550em;
+               break;
        case ixgbe_phy_x550em_ext_t:
                /* If internal link mode is XFI, then setup iXFI internal link,
                 * else setup KR now.
@@ -2223,11 +2440,9 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
        case ixgbe_phy_sgmii:
                phy->ops.setup_link = NULL;
                break;
-       case ixgbe_phy_m88:
-               phy->ops.setup_link = ixgbe_setup_m88;
-               phy->ops.read_reg_mdi = ixgbe_read_phy_reg_mdi_22;
-               phy->ops.write_reg_mdi = ixgbe_write_phy_reg_mdi_22;
-               phy->ops.reset = ixgbe_reset_phy_m88;
+       case ixgbe_phy_fw:
+               phy->ops.setup_link = ixgbe_setup_fw_link;
+               phy->ops.reset = ixgbe_reset_phy_fw;
                break;
        default:
                break;
@@ -2247,8 +2462,6 @@ STATIC void ixgbe_set_mdio_speed(struct ixgbe_hw *hw)
        case IXGBE_DEV_ID_X550EM_X_10G_T:
        case IXGBE_DEV_ID_X550EM_A_SGMII:
        case IXGBE_DEV_ID_X550EM_A_SGMII_L:
-       case IXGBE_DEV_ID_X550EM_A_1G_T:
-       case IXGBE_DEV_ID_X550EM_A_1G_T_L:
        case IXGBE_DEV_ID_X550EM_A_10G_T:
        case IXGBE_DEV_ID_X550EM_A_SFP:
        case IXGBE_DEV_ID_X550EM_A_QSFP:
@@ -2257,6 +2470,13 @@ STATIC void ixgbe_set_mdio_speed(struct ixgbe_hw *hw)
                hlreg0 &= ~IXGBE_HLREG0_MDCSPD;
                IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
                break;
+       case IXGBE_DEV_ID_X550EM_A_1G_T:
+       case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+               /* Select fast MDIO clock speed for these devices */
+               hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+               hlreg0 |= IXGBE_HLREG0_MDCSPD;
+               IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
+               break;
        default:
                break;
        }
@@ -2282,9 +2502,10 @@ s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw)
 
        /* Call adapter stop to disable Tx/Rx and clear interrupts */
        status = hw->mac.ops.stop_adapter(hw);
-       if (status != IXGBE_SUCCESS)
+       if (status != IXGBE_SUCCESS) {
+               DEBUGOUT1("Failed to stop adapter, STATUS = %d\n", status);
                return status;
-
+       }
        /* flush pending Tx transactions */
        ixgbe_clear_tx_pending(hw);
 
@@ -2293,14 +2514,23 @@ s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw)
        /* PHY ops must be identified and initialized prior to reset */
        status = hw->phy.ops.init(hw);
 
-       if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+       if (status)
+               DEBUGOUT1("Failed to initialize PHY ops, STATUS = %d\n",
+                         status);
+
+       if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) {
+               DEBUGOUT("Returning from reset HW since PHY ops init returned IXGBE_ERR_SFP_NOT_SUPPORTED\n");
                return status;
+       }
 
        /* start the external PHY */
        if (hw->phy.type == ixgbe_phy_x550em_ext_t) {
                status = ixgbe_init_ext_t_x550em(hw);
-               if (status)
+               if (status) {
+                       DEBUGOUT1("Failed to start the external PHY, STATUS = %d\n",
+                                 status);
                        return status;
+               }
        }
 
        /* Setup SFP module if there is one present. */
@@ -2313,8 +2543,10 @@ s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw)
                return status;
 
        /* Reset PHY */
-       if (!hw->phy.reset_disable && hw->phy.ops.reset)
-               hw->phy.ops.reset(hw);
+       if (!hw->phy.reset_disable && hw->phy.ops.reset) {
+               if (hw->phy.ops.reset(hw) == IXGBE_ERR_OVERTEMP)
+                       return IXGBE_ERR_OVERTEMP;
+       }
 
 mac_reset_top:
        /* Issue global reset to the MAC.  Needs to be SW reset if link is up.
@@ -2372,6 +2604,9 @@ mac_reset_top:
        if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
                ixgbe_setup_mux_ctl(hw);
 
+       if (status != IXGBE_SUCCESS)
+               DEBUGOUT1("Reset HW failed, STATUS = %d\n", status);
+
        return status;
 }
 
@@ -2421,12 +2656,11 @@ s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw)
 /**
  *  ixgbe_setup_kr_x550em - Configure the KR PHY.
  *  @hw: pointer to hardware structure
- *
- *  Configures the integrated KR PHY for X550EM_x.
  **/
 s32 ixgbe_setup_kr_x550em(struct ixgbe_hw *hw)
 {
-       if (hw->mac.type != ixgbe_mac_X550EM_x)
+       /* leave link alone for 2.5G */
+       if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_2_5GB_FULL)
                return IXGBE_SUCCESS;
 
        return ixgbe_setup_kr_speed_x550em(hw, hw->phy.autoneg_advertised);
@@ -2510,6 +2744,55 @@ s32 ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
        return ret_val;
 }
 
+/**
+ *  ixgbe_setup_sfi_x550a - Configure the internal PHY for native SFI mode
+ *  @hw: pointer to hardware structure
+ *  @speed: the link speed to force
+ *
+ *  Configures the integrated PHY for native SFI mode. Used to connect the
+ *  internal PHY directly to an SFP cage, without autonegotiation.
+ **/
+STATIC s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
+{
+       struct ixgbe_mac_info *mac = &hw->mac;
+       s32 status;
+       u32 reg_val;
+
+       /* Disable all AN and force speed to 10G Serial. */
+       status = mac->ops.read_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+       if (status != IXGBE_SUCCESS)
+               return status;
+
+       reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+       reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+       reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+       reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+
+       /* Select forced link speed for internal PHY. */
+       switch (*speed) {
+       case IXGBE_LINK_SPEED_10GB_FULL:
+               reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G;
+               break;
+       case IXGBE_LINK_SPEED_1GB_FULL:
+               reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G;
+               break;
+       default:
+               /* Other link speeds are not supported by internal PHY. */
+               return IXGBE_ERR_LINK_SETUP;
+       }
+
+       status = mac->ops.write_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+       /* Toggle port SW reset by AN reset. */
+       status = ixgbe_restart_an_internal_phy_x550em(hw);
+
+       return status;
+}
+
 /**
  *  ixgbe_setup_mac_link_sfp_x550a - Setup internal PHY for SFP
  *  @hw: pointer to hardware structure
@@ -2517,8 +2800,8 @@ s32 ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
  *  Configure the the integrated PHY for SFP support.
  **/
 s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw,
-                                  ixgbe_link_speed speed,
-                                  bool autoneg_wait_to_complete)
+                                   ixgbe_link_speed speed,
+                                   bool autoneg_wait_to_complete)
 {
        s32 ret_val;
        u16 reg_phy_ext;
@@ -2540,31 +2823,27 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw,
                return ret_val;
 
        if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N) {
-               /* Configure internal PHY for native SFI */
+               /* Configure internal PHY for native SFI based on module type */
                ret_val = hw->mac.ops.read_iosf_sb_reg(hw,
-                              IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id),
-                              IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_phy_int);
+                                  IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                                  IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_phy_int);
 
                if (ret_val != IXGBE_SUCCESS)
                        return ret_val;
 
-               if (setup_linear) {
-                       reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LIMITING;
-                       reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LINEAR;
-               } else {
-                       reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LIMITING;
-                       reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LINEAR;
-               }
+               reg_phy_int &= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA;
+               if (!setup_linear)
+                       reg_phy_int |= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR;
 
                ret_val = hw->mac.ops.write_iosf_sb_reg(hw,
-                               IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id),
-                               IXGBE_SB_IOSF_TARGET_KR_PHY, reg_phy_int);
+                                  IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                                  IXGBE_SB_IOSF_TARGET_KR_PHY, reg_phy_int);
 
                if (ret_val != IXGBE_SUCCESS)
                        return ret_val;
 
-               /* Setup XFI/SFI internal link. */
-               ret_val = ixgbe_setup_ixfi_x550em(hw, &speed);
+               /* Setup SFI internal link. */
+               ret_val = ixgbe_setup_sfi_x550a(hw, &speed);
        } else {
                /* Configure internal PHY for KR/KX. */
                ixgbe_setup_kr_speed_x550em(hw, speed);
@@ -2575,9 +2854,9 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw,
                        return IXGBE_ERR_PHY_ADDR_INVALID;
                }
 
-               /* Get external PHY device id */
-               ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_GLOBAL_ID_MSB,
-                                      IXGBE_MDIO_ZERO_DEV_TYPE, &reg_phy_ext);
+               /* Get external PHY SKU id */
+               ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_EFUSE_PDF_SKU,
+                                       IXGBE_MDIO_ZERO_DEV_TYPE, &reg_phy_ext);
 
                if (ret_val != IXGBE_SUCCESS)
                        return ret_val;
@@ -2585,7 +2864,7 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw,
                /* When configuring quad port CS4223, the MAC instance is part
                 * of the slice offset.
                 */
-               if (reg_phy_ext == IXGBE_CS4223_PHY_ID)
+               if (reg_phy_ext == IXGBE_CS4223_SKU_ID)
                        slice_offset = (hw->bus.lan_id +
                                        (hw->bus.instance_id << 1)) << 12;
                else
@@ -2593,12 +2872,26 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw,
 
                /* Configure CS4227/CS4223 LINE side to proper mode. */
                reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB + slice_offset;
+
+               ret_val = hw->phy.ops.read_reg(hw, reg_slice,
+                                       IXGBE_MDIO_ZERO_DEV_TYPE, &reg_phy_ext);
+
+               if (ret_val != IXGBE_SUCCESS)
+                       return ret_val;
+
+               reg_phy_ext &= ~((IXGBE_CS4227_EDC_MODE_CX1 << 1) |
+                                (IXGBE_CS4227_EDC_MODE_SR << 1));
+
                if (setup_linear)
                        reg_phy_ext = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1;
                else
                        reg_phy_ext = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1;
                ret_val = hw->phy.ops.write_reg(hw, reg_slice,
-                                       IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext);
+                                        IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext);
+
+               /* Flush previous write with a read */
+               ret_val = hw->phy.ops.read_reg(hw, reg_slice,
+                                       IXGBE_MDIO_ZERO_DEV_TYPE, &reg_phy_ext);
        }
        return ret_val;
 }
@@ -2611,24 +2904,25 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw,
  **/
 STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw)
 {
+       struct ixgbe_mac_info *mac = &hw->mac;
        s32 status;
        u32 reg_val;
 
        /* Disable training protocol FSM. */
-       status = ixgbe_read_iosf_sb_reg_x550(hw,
+       status = mac->ops.read_iosf_sb_reg(hw,
                                IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
                                IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
        if (status != IXGBE_SUCCESS)
                return status;
        reg_val |= IXGBE_KRM_RX_TRN_LINKUP_CTRL_CONV_WO_PROTOCOL;
-       status = ixgbe_write_iosf_sb_reg_x550(hw,
+       status = mac->ops.write_iosf_sb_reg(hw,
                                IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
                                IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
        if (status != IXGBE_SUCCESS)
                return status;
 
        /* Disable Flex from training TXFFE. */
-       status = ixgbe_read_iosf_sb_reg_x550(hw,
+       status = mac->ops.read_iosf_sb_reg(hw,
                                IXGBE_KRM_DSP_TXFFE_STATE_4(hw->bus.lan_id),
                                IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
        if (status != IXGBE_SUCCESS)
@@ -2636,12 +2930,12 @@ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw)
        reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_C0_EN;
        reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN;
        reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN;
-       status = ixgbe_write_iosf_sb_reg_x550(hw,
+       status = mac->ops.write_iosf_sb_reg(hw,
                                IXGBE_KRM_DSP_TXFFE_STATE_4(hw->bus.lan_id),
                                IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
        if (status != IXGBE_SUCCESS)
                return status;
-       status = ixgbe_read_iosf_sb_reg_x550(hw,
+       status = mac->ops.read_iosf_sb_reg(hw,
                                IXGBE_KRM_DSP_TXFFE_STATE_5(hw->bus.lan_id),
                                IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
        if (status != IXGBE_SUCCESS)
@@ -2649,14 +2943,14 @@ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw)
        reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_C0_EN;
        reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN;
        reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN;
-       status = ixgbe_write_iosf_sb_reg_x550(hw,
+       status = mac->ops.write_iosf_sb_reg(hw,
                                IXGBE_KRM_DSP_TXFFE_STATE_5(hw->bus.lan_id),
                                IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
        if (status != IXGBE_SUCCESS)
                return status;
 
        /* Enable override for coefficients. */
-       status = ixgbe_read_iosf_sb_reg_x550(hw,
+       status = mac->ops.read_iosf_sb_reg(hw,
                                IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id),
                                IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
        if (status != IXGBE_SUCCESS)
@@ -2665,7 +2959,7 @@ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw)
        reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CZERO_EN;
        reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CPLUS1_OVRRD_EN;
        reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CMINUS1_OVRRD_EN;
-       status = ixgbe_write_iosf_sb_reg_x550(hw,
+       status = mac->ops.write_iosf_sb_reg(hw,
                                IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id),
                                IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
        return status;
@@ -2681,11 +2975,16 @@ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw)
  **/
 STATIC s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
 {
+       struct ixgbe_mac_info *mac = &hw->mac;
        s32 status;
        u32 reg_val;
 
+       /* iXFI is only supported with X552 */
+       if (mac->type != ixgbe_mac_X550EM_x)
+               return IXGBE_ERR_LINK_SETUP;
+
        /* Disable AN and force speed to 10G Serial. */
-       status = ixgbe_read_iosf_sb_reg_x550(hw,
+       status = mac->ops.read_iosf_sb_reg(hw,
                                        IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
                                        IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
        if (status != IXGBE_SUCCESS)
@@ -2707,7 +3006,7 @@ STATIC s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
                return IXGBE_ERR_LINK_SETUP;
        }
 
-       status = ixgbe_write_iosf_sb_reg_x550(hw,
+       status = mac->ops.write_iosf_sb_reg(hw,
                                        IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
                                        IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
        if (status != IXGBE_SUCCESS)
@@ -2721,15 +3020,7 @@ STATIC s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
        }
 
        /* Toggle port SW reset by AN reset. */
-       status = ixgbe_read_iosf_sb_reg_x550(hw,
-                                       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                                       IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
-       if (status != IXGBE_SUCCESS)
-               return status;
-       reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
-       status = ixgbe_write_iosf_sb_reg_x550(hw,
-                                       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                                       IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+       status = ixgbe_restart_an_internal_phy_x550em(hw);
 
        return status;
 }
@@ -2788,7 +3079,8 @@ s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw)
        if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
                return IXGBE_ERR_CONFIG;
 
-       if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
+       if (hw->mac.type == ixgbe_mac_X550EM_x &&
+           !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
                /* If link is down, there is no setup necessary so return  */
                status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up);
                if (status != IXGBE_SUCCESS)
@@ -2847,56 +3139,56 @@ s32 ixgbe_setup_phy_loopback_x550em(struct ixgbe_hw *hw)
 
        /* Disable AN and force speed to 10G Serial. */
        status = hw->mac.ops.read_iosf_sb_reg(hw,
-                     IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                     IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+                                       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
        if (status != IXGBE_SUCCESS)
                return status;
        reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
        reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK;
        reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G;
        status = hw->mac.ops.write_iosf_sb_reg(hw,
-                      IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                      IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+                                       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
        if (status != IXGBE_SUCCESS)
                return status;
 
        /* Set near-end loopback clocks. */
        status = hw->mac.ops.read_iosf_sb_reg(hw,
-                     IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id),
-                     IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+                               IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
        if (status != IXGBE_SUCCESS)
                return status;
        reg_val |= IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B;
        reg_val |= IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS;
        status = hw->mac.ops.write_iosf_sb_reg(hw,
-                      IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id),
-                      IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+                               IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
        if (status != IXGBE_SUCCESS)
                return status;
 
        /* Set loopback enable. */
        status = hw->mac.ops.read_iosf_sb_reg(hw,
-                     IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id),
-                     IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+                               IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
        if (status != IXGBE_SUCCESS)
                return status;
        reg_val |= IXGBE_KRM_PMD_DFX_BURNIN_TX_RX_KR_LB_MASK;
        status = hw->mac.ops.write_iosf_sb_reg(hw,
-                      IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id),
-                      IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+                               IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
        if (status != IXGBE_SUCCESS)
                return status;
 
        /* Training bypass. */
        status = hw->mac.ops.read_iosf_sb_reg(hw,
-                     IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
-                     IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+                               IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
        if (status != IXGBE_SUCCESS)
                return status;
        reg_val |= IXGBE_KRM_RX_TRN_LINKUP_CTRL_PROTOCOL_BYPASS;
        status = hw->mac.ops.write_iosf_sb_reg(hw,
-                      IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
-                      IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+                               IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
 
        return status;
 }
@@ -2910,13 +3202,13 @@ s32 ixgbe_setup_phy_loopback_x550em(struct ixgbe_hw *hw)
  *
  *  Reads a 16 bit word from the EEPROM using the hostif.
  **/
-s32 ixgbe_read_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
-                                  u16 *data)
+s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 *data)
 {
-       s32 status;
+       const u32 mask = IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_EEP_SM;
        struct ixgbe_hic_read_shadow_ram buffer;
+       s32 status;
 
-       DEBUGFUNC("ixgbe_read_ee_hostif_data_X550");
+       DEBUGFUNC("ixgbe_read_ee_hostif_X550");
        buffer.hdr.req.cmd = FW_READ_SHADOW_RAM_CMD;
        buffer.hdr.req.buf_lenh = 0;
        buffer.hdr.req.buf_lenl = FW_READ_SHADOW_RAM_LEN;
@@ -2927,42 +3219,18 @@ s32 ixgbe_read_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
        /* one word */
        buffer.length = IXGBE_CPU_TO_BE16(sizeof(u16));
 
-       status = ixgbe_host_interface_command(hw, (u32 *)&buffer,
-                                             sizeof(buffer),
-                                             IXGBE_HI_COMMAND_TIMEOUT, false);
-
+       status = hw->mac.ops.acquire_swfw_sync(hw, mask);
        if (status)
                return status;
 
-       *data = (u16)IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG,
-                                         FW_NVM_DATA_OFFSET);
-
-       return 0;
-}
-
-/**
- *  ixgbe_read_ee_hostif_X550 - Read EEPROM word using a host interface command
- *  @hw: pointer to hardware structure
- *  @offset: offset of  word in the EEPROM to read
- *  @data: word read from the EEPROM
- *
- *  Reads a 16 bit word from the EEPROM using the hostif.
- **/
-s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset,
-                             u16 *data)
-{
-       s32 status = IXGBE_SUCCESS;
-
-       DEBUGFUNC("ixgbe_read_ee_hostif_X550");
-
-       if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
-           IXGBE_SUCCESS) {
-               status = ixgbe_read_ee_hostif_data_X550(hw, offset, data);
-               hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
-       } else {
-               status = IXGBE_ERR_SWFW_SYNC;
+       status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer),
+                                   IXGBE_HI_COMMAND_TIMEOUT);
+       if (!status) {
+               *data = (u16)IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG,
+                                                 FW_NVM_DATA_OFFSET);
        }
 
+       hw->mac.ops.release_swfw_sync(hw, mask);
        return status;
 }
 
@@ -2978,6 +3246,7 @@ s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset,
 s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
                                     u16 offset, u16 words, u16 *data)
 {
+       const u32 mask = IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_EEP_SM;
        struct ixgbe_hic_read_shadow_ram buffer;
        u32 current_word = 0;
        u16 words_to_read;
@@ -2987,11 +3256,12 @@ s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
        DEBUGFUNC("ixgbe_read_ee_hostif_buffer_X550");
 
        /* Take semaphore for the entire operation. */
-       status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+       status = hw->mac.ops.acquire_swfw_sync(hw, mask);
        if (status) {
                DEBUGOUT("EEPROM read buffer - semaphore failed\n");
                return status;
        }
+
        while (words) {
                if (words > FW_MAX_READ_BUFFER_SIZE / 2)
                        words_to_read = FW_MAX_READ_BUFFER_SIZE / 2;
@@ -3007,10 +3277,8 @@ s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
                buffer.address = IXGBE_CPU_TO_BE32((offset + current_word) * 2);
                buffer.length = IXGBE_CPU_TO_BE16(words_to_read * 2);
 
-               status = ixgbe_host_interface_command(hw, (u32 *)&buffer,
-                                                     sizeof(buffer),
-                                                     IXGBE_HI_COMMAND_TIMEOUT,
-                                                     false);
+               status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer),
+                                           IXGBE_HI_COMMAND_TIMEOUT);
 
                if (status) {
                        DEBUGOUT("Host interface command failed\n");
@@ -3035,7 +3303,7 @@ s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
        }
 
 out:
-       hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+       hw->mac.ops.release_swfw_sync(hw, mask);
        return status;
 }
 
@@ -3439,6 +3707,7 @@ u32 ixgbe_get_supported_physical_layer_X550em(struct ixgbe_hw *hw)
 
        switch (hw->phy.type) {
        case ixgbe_phy_x550em_kr:
+       case ixgbe_phy_x550em_xfi:
                physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KR |
                                 IXGBE_PHYSICAL_LAYER_1000BASE_KX;
                break;
@@ -3455,6 +3724,20 @@ u32 ixgbe_get_supported_physical_layer_X550em(struct ixgbe_hw *hw)
                if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY)
                        physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
                break;
+       case ixgbe_phy_fw:
+               if (hw->phy.speeds_supported & IXGBE_LINK_SPEED_1GB_FULL)
+                       physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
+               if (hw->phy.speeds_supported & IXGBE_LINK_SPEED_100_FULL)
+                       physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX;
+               if (hw->phy.speeds_supported & IXGBE_LINK_SPEED_10_FULL)
+                       physical_layer |= IXGBE_PHYSICAL_LAYER_10BASE_T;
+               break;
+       case ixgbe_phy_sgmii:
+               physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX;
+               break;
+       case ixgbe_phy_ext_1g_t:
+               physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
+               break;
        default:
                break;
        }
@@ -3737,19 +4020,19 @@ s32 ixgbe_setup_fc_X550em(struct ixgbe_hw *hw)
        case IXGBE_DEV_ID_X550EM_A_KR:
        case IXGBE_DEV_ID_X550EM_A_KR_L:
                ret_val = hw->mac.ops.read_iosf_sb_reg(hw,
-                                     IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
-                                     IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+                                       IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
                if (ret_val != IXGBE_SUCCESS)
                        goto out;
                reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
-                            IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
+                       IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
                if (pause)
                        reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
                if (asm_dir)
                        reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
                ret_val = hw->mac.ops.write_iosf_sb_reg(hw,
-                                      IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
-                                      IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+                                       IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
 
                /* This device does not fully support AN. */
                hw->fc.disable_fc_autoneg = true;
@@ -3763,12 +4046,12 @@ out:
 }
 
 /**
- *  ixgbe_fc_autoneg_x550a - Enable flow control IEEE clause 37
+ *  ixgbe_fc_autoneg_backplane_x550em_a - Enable flow control IEEE clause 37
  *  @hw: pointer to hardware structure
  *
  *  Enable flow control according to IEEE clause 37.
  **/
-void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw)
+void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw)
 {
        u32 link_s1, lp_an_page_low, an_cntl_1;
        s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED;
@@ -3782,7 +4065,7 @@ void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw)
         */
        if (hw->fc.disable_fc_autoneg) {
                ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED,
-                             "Flow control autoneg is disabled");
+                            "Flow control autoneg is disabled");
                goto out;
        }
 
@@ -3794,12 +4077,13 @@ void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw)
 
        /* Check at auto-negotiation has completed */
        status = hw->mac.ops.read_iosf_sb_reg(hw,
-                                     IXGBE_KRM_LINK_S1(hw->bus.lan_id),
-                                     IXGBE_SB_IOSF_TARGET_KR_PHY, &link_s1);
+                                       IXGBE_KRM_LINK_S1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, &link_s1);
 
        if (status != IXGBE_SUCCESS ||
            (link_s1 & IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE) == 0) {
                DEBUGOUT("Auto-Negotiation did not complete\n");
+               status = IXGBE_ERR_FC_NOT_NEGOTIATED;
                goto out;
        }
 
@@ -3807,8 +4091,8 @@ void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw)
         * local flow control settings accordingly
         */
        status = hw->mac.ops.read_iosf_sb_reg(hw,
-                                     IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
-                                     IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl_1);
+                               IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl_1);
 
        if (status != IXGBE_SUCCESS) {
                DEBUGOUT("Auto-Negotiation did not complete\n");
@@ -3816,8 +4100,8 @@ void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw)
        }
 
        status = hw->mac.ops.read_iosf_sb_reg(hw,
-                             IXGBE_KRM_LP_BASE_PAGE_HIGH(hw->bus.lan_id),
-                             IXGBE_SB_IOSF_TARGET_KR_PHY, &lp_an_page_low);
+                               IXGBE_KRM_LP_BASE_PAGE_HIGH(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &lp_an_page_low);
 
        if (status != IXGBE_SUCCESS) {
                DEBUGOUT("Auto-Negotiation did not complete\n");
@@ -3840,22 +4124,88 @@ out:
 }
 
 /**
- *  ixgbe_setup_fc_x550em - Set up flow control
+ *  ixgbe_fc_autoneg_fiber_x550em_a - passthrough FC settings
+ *  @hw: pointer to hardware structure
+ *
+ **/
+void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *hw)
+{
+       hw->fc.fc_was_autonegged = false;
+       hw->fc.current_mode = hw->fc.requested_mode;
+}
+
+/**
+ *  ixgbe_fc_autoneg_sgmii_x550em_a - Enable flow control IEEE clause 37
+ *  @hw: pointer to hardware structure
+ *
+ *  Enable flow control according to IEEE clause 37.
+ **/
+void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw)
+{
+       s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED;
+       u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 };
+       ixgbe_link_speed speed;
+       bool link_up;
+
+       /* AN should have completed when the cable was plugged in.
+        * Look for reasons to bail out.  Bail out if:
+        * - FC autoneg is disabled, or if
+        * - link is not up.
+        */
+       if (hw->fc.disable_fc_autoneg) {
+               ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED,
+                            "Flow control autoneg is disabled");
+               goto out;
+       }
+
+       hw->mac.ops.check_link(hw, &speed, &link_up, false);
+       if (!link_up) {
+               ERROR_REPORT1(IXGBE_ERROR_SOFTWARE, "The link is down");
+               goto out;
+       }
+
+       /* Check if auto-negotiation has completed */
+       status = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &info);
+       if (status != IXGBE_SUCCESS ||
+           !(info[0] & FW_PHY_ACT_GET_LINK_INFO_AN_COMPLETE)) {
+               DEBUGOUT("Auto-Negotiation did not complete\n");
+               status = IXGBE_ERR_FC_NOT_NEGOTIATED;
+               goto out;
+       }
+
+       /* Negotiate the flow control */
+       status = ixgbe_negotiate_fc(hw, info[0], info[0],
+                                   FW_PHY_ACT_GET_LINK_INFO_FC_RX,
+                                   FW_PHY_ACT_GET_LINK_INFO_FC_TX,
+                                   FW_PHY_ACT_GET_LINK_INFO_LP_FC_RX,
+                                   FW_PHY_ACT_GET_LINK_INFO_LP_FC_TX);
+
+out:
+       if (status == IXGBE_SUCCESS) {
+               hw->fc.fc_was_autonegged = true;
+       } else {
+               hw->fc.fc_was_autonegged = false;
+               hw->fc.current_mode = hw->fc.requested_mode;
+       }
+}
+
+/**
+ *  ixgbe_setup_fc_backplane_x550em_a - Set up flow control
  *  @hw: pointer to hardware structure
  *
  *  Called at init time to set up flow control.
  **/
-s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw)
+s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw)
 {
        s32 status = IXGBE_SUCCESS;
-       u32 an_cntl, link_ctrl = 0;
+       u32 an_cntl = 0;
 
-       DEBUGFUNC("ixgbe_setup_fc_x550em");
+       DEBUGFUNC("ixgbe_setup_fc_backplane_x550em_a");
 
        /* Validate the requested mode */
        if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
                ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED,
-                     "ixgbe_fc_rx_pause not valid in strict IEEE mode\n");
+                             "ixgbe_fc_rx_pause not valid in strict IEEE mode\n");
                return IXGBE_ERR_INVALID_LINK_SETTINGS;
        }
 
@@ -3867,8 +4217,8 @@ s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw)
         * we link at 10G, the 1G advertisement is harmless and vice versa.
         */
        status = hw->mac.ops.read_iosf_sb_reg(hw,
-                                     IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
-                                     IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl);
+                                       IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl);
 
        if (status != IXGBE_SUCCESS) {
                DEBUGOUT("Auto-Negotiation did not complete\n");
@@ -3909,7 +4259,7 @@ s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw)
        case ixgbe_fc_full:
                /* Flow control (both Rx and Tx) is enabled by SW override. */
                an_cntl |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
-                       IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
+                          IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
                break;
        default:
                ERROR_REPORT1(IXGBE_ERROR_ARGUMENT,
@@ -3918,23 +4268,11 @@ s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw)
        }
 
        status = hw->mac.ops.write_iosf_sb_reg(hw,
-                                      IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
-                                      IXGBE_SB_IOSF_TARGET_KR_PHY, an_cntl);
+                                       IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, an_cntl);
 
        /* Restart auto-negotiation. */
-       status = hw->mac.ops.read_iosf_sb_reg(hw,
-                                     IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                                     IXGBE_SB_IOSF_TARGET_KR_PHY, &link_ctrl);
-
-       if (status != IXGBE_SUCCESS) {
-               DEBUGOUT("Auto-Negotiation did not complete\n");
-               return status;
-       }
-
-       link_ctrl |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
-       status = hw->mac.ops.write_iosf_sb_reg(hw,
-                                      IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                                      IXGBE_SB_IOSF_TARGET_KR_PHY, link_ctrl);
+       status = ixgbe_restart_an_internal_phy_x550em(hw);
 
        return status;
 }
@@ -4018,22 +4356,34 @@ STATIC s32 ixgbe_acquire_swfw_sync_X550a(struct ixgbe_hw *hw, u32 mask)
                status = IXGBE_SUCCESS;
                if (hmask)
                        status = ixgbe_acquire_swfw_sync_X540(hw, hmask);
-               if (status)
+               if (status) {
+                       DEBUGOUT1("Could not acquire SWFW semaphore, Status = %d\n",
+                                 status);
                        return status;
+               }
                if (!(mask & IXGBE_GSSR_TOKEN_SM))
                        return IXGBE_SUCCESS;
 
                status = ixgbe_get_phy_token(hw);
+               if (status == IXGBE_ERR_TOKEN_RETRY)
+                       DEBUGOUT1("Could not acquire PHY token, Status = %d\n",
+                                 status);
+
                if (status == IXGBE_SUCCESS)
                        return IXGBE_SUCCESS;
 
                if (hmask)
                        ixgbe_release_swfw_sync_X540(hw, hmask);
-               if (status != IXGBE_ERR_TOKEN_RETRY)
+
+               if (status != IXGBE_ERR_TOKEN_RETRY) {
+                       DEBUGOUT1("Unable to retry acquiring the PHY token, Status = %d\n",
+                                 status);
                        return status;
-               msec_delay(FW_PHY_TOKEN_DELAY);
+               }
        }
 
+       DEBUGOUT1("Semaphore acquisition retries failed!: PHY ID = 0x%08X\n",
+                 hw->phy.id);
        return status;
 }
 
@@ -4068,7 +4418,7 @@ STATIC void ixgbe_release_swfw_sync_X550a(struct ixgbe_hw *hw, u32 mask)
  *  instances.
  **/
 s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
-                            u32 device_type, u16 *phy_data)
+                              u32 device_type, u16 *phy_data)
 {
        s32 status;
        u32 mask = hw->phy.phy_semaphore_mask | IXGBE_GSSR_TOKEN_SM;
@@ -4096,7 +4446,7 @@ s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
  *  The PHY Token is needed since the MDIO is shared between to MAC instances.
  **/
 s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
-                             u32 device_type, u16 phy_data)
+                               u32 device_type, u16 phy_data)
 {
        s32 status;
        u32 mask = hw->phy.phy_semaphore_mask | IXGBE_GSSR_TOKEN_SM;
@@ -4104,7 +4454,7 @@ s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
        DEBUGFUNC("ixgbe_write_phy_reg_x550a");
 
        if (hw->mac.ops.acquire_swfw_sync(hw, mask) == IXGBE_SUCCESS) {
-               status = ixgbe_write_phy_reg_mdi(hw, reg_addr, device_type,
+               status = hw->phy.ops.write_reg_mdi(hw, reg_addr, device_type,
                                                 phy_data);
                hw->mac.ops.release_swfw_sync(hw, mask);
        } else {
@@ -4169,8 +4519,10 @@ s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw,
        else
                force_speed = IXGBE_LINK_SPEED_1GB_FULL;
 
-       /* If internal link mode is XFI, then setup XFI internal link. */
-       if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
+       /* If X552 and internal link mode is XFI, then setup XFI internal link.
+        */
+       if (hw->mac.type == ixgbe_mac_X550EM_x &&
+           !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
                status = ixgbe_setup_ixfi_x550em(hw, &force_speed);
 
                if (status != IXGBE_SUCCESS)
@@ -4193,7 +4545,7 @@ s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
                              bool *link_up, bool link_up_wait_to_complete)
 {
        u32 status;
-       u16 autoneg_status;
+       u16 i, autoneg_status = 0;
 
        if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
                return IXGBE_ERR_CONFIG;
@@ -4206,21 +4558,18 @@ s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
                return status;
 
        /* MAC link is up, so check external PHY link.
-        * Read this twice back to back to indicate current status.
+        * X557 PHY. Link status is latching low, and can only be used to detect
+        * link drop, and not the current status of the link without performing
+        * back-to-back reads.
         */
-       status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                     &autoneg_status);
-
-       if (status != IXGBE_SUCCESS)
-               return status;
-
-       status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                     &autoneg_status);
+       for (i = 0; i < 2; i++) {
+               status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
+                                             IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+                                             &autoneg_status);
 
-       if (status != IXGBE_SUCCESS)
-               return status;
+               if (status != IXGBE_SUCCESS)
+                       return status;
+       }
 
        /* If external PHY link is not up, then indicate link not up */
        if (!(autoneg_status & IXGBE_MDIO_AUTO_NEG_LINK_STATUS))
@@ -4294,3 +4643,63 @@ s32 ixgbe_led_off_t_X550em(struct ixgbe_hw *hw, u32 led_idx)
        return IXGBE_SUCCESS;
 }
 
+/**
+ *  ixgbe_set_fw_drv_ver_x550 - Sends driver version to firmware
+ *  @hw: pointer to the HW structure
+ *  @maj: driver version major number
+ *  @min: driver version minor number
+ *  @build: driver version build number
+ *  @sub: driver version sub build number
+ *  @len: length of driver_ver string
+ *  @driver_ver: driver string
+ *
+ *  Sends driver version number to firmware through the manageability
+ *  block.  On success return IXGBE_SUCCESS
+ *  else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring
+ *  semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
+ **/
+s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min,
+                             u8 build, u8 sub, u16 len, const char *driver_ver)
+{
+       struct ixgbe_hic_drv_info2 fw_cmd;
+       s32 ret_val = IXGBE_SUCCESS;
+       int i;
+
+       DEBUGFUNC("ixgbe_set_fw_drv_ver_x550");
+
+       if ((len == 0) || (driver_ver == NULL) ||
+          (len > sizeof(fw_cmd.driver_string)))
+               return IXGBE_ERR_INVALID_ARGUMENT;
+
+       fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO;
+       fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN + len;
+       fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
+       fw_cmd.port_num = (u8)hw->bus.func;
+       fw_cmd.ver_maj = maj;
+       fw_cmd.ver_min = min;
+       fw_cmd.ver_build = build;
+       fw_cmd.ver_sub = sub;
+       fw_cmd.hdr.checksum = 0;
+       memcpy(fw_cmd.driver_string, driver_ver, len);
+       fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
+                               (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
+
+       for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
+               ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd,
+                                                      sizeof(fw_cmd),
+                                                      IXGBE_HI_COMMAND_TIMEOUT,
+                                                      true);
+               if (ret_val != IXGBE_SUCCESS)
+                       continue;
+
+               if (fw_cmd.hdr.cmd_or_resp.ret_status ==
+                   FW_CEM_RESP_STATUS_SUCCESS)
+                       ret_val = IXGBE_SUCCESS;
+               else
+                       ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+
+               break;
+       }
+
+       return ret_val;
+}
index 27d5d02..30ca5df 100644 (file)
@@ -36,49 +36,6 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include "ixgbe_type.h"
 
-/* More phy definitions */
-#define IXGBE_M88E1500_COPPER_CTRL             0x0/* Page 0 reg */
-#define IXGBE_M88E1500_COPPER_CTRL_RESET       0x8000
-#define IXGBE_M88E1500_COPPER_CTRL_AN_EN       0x1000
-#define IXGBE_M88E1500_COPPER_CTRL_RESTART_AN  0x0200
-#define IXGBE_M88E1500_COPPER_CTRL_FULL_DUPLEX 0x0100
-#define IXGBE_M88E1500_COPPER_CTRL_SPEED_MSB   0x0040
-#define IXGBE_M88E1500_1000T_CTRL              0x09 /* 1000Base-T Ctrl Reg */
-/* 1=Configure PHY as Master 0=Configure PHY as Slave */
-#define IXGBE_M88E1500_1000T_CTRL_MS_VALUE     0x0800
-/* 1=Master/Slave manual config value 0=Automatic Master/Slave config */
-#define IXGBE_M88E1500_1000T_CTRL_MS_ENABLE    0x1000
-#define IXGBE_M88E1500_1000T_STATUS            0x0A /* 1000Base-T Status Reg */
-#define IXGBE_M88E1500_AUTO_COPPER_SGMII       0x2
-#define IXGBE_M88E1500_AUTO_COPPER_BASEX       0x3
-#define IXGBE_M88E1500_STATUS_LINK             0x0004 /* Interface Link Bit */
-#define IXGBE_M88E1500_MAC_CTRL_1              0x10
-#define IXGBE_M88E1500_MAC_CTRL_1_MODE_MASK    0x0380 /* Mode Select */
-#define IXGBE_M88E1500_CFG_REG_1               0x0010
-#define IXGBE_M88E1500_CFG_REG_2               0x0011
-#define IXGBE_M88E1500_CFG_REG_3               0x0007
-#define IXGBE_M88E1500_MODE                    0x0014
-#define IXGBE_M88E1500_PAGE_ADDR               0x16/* Page Offset reg */
-#define IXGBE_M88E1500_FIBER_CTRL              0x0/* Page 1 reg */
-#define IXGBE_M88E1500_FIBER_CTRL_RESET                0x8000
-#define IXGBE_M88E1500_FIBER_CTRL_SPEED_LSB    0x2000
-#define IXGBE_M88E1500_FIBER_CTRL_POWER_DOWN   0x0800
-#define IXGBE_M88E1500_FIBER_CTRL_DUPLEX_FULL  0x0100
-#define IXGBE_M88E1500_FIBER_CTRL_SPEED_MSB    0x0040
-#define IXGBE_M88E1500_EEE_CTRL_1              0x0/* Page 18 reg */
-#define IXGBE_M88E1500_EEE_CTRL_1_MS           0x0001/* EEE Master/Slave */
-#define IXGBE_M88E1500_GEN_CTRL                        0x14/* Page 18 reg */
-#define IXGBE_M88E1500_GEN_CTRL_RESET          0x8000
-#define IXGBE_M88E1500_GEN_CTRL_SGMII_COPPER   0x0001/* Mode bits 0-2 */
-
-/* M88E1500 Specific Registers */
-#define IXGBE_M88E1500_PHY_SPEC_CTRL           0x10 /* PHY Specific Ctrl Reg */
-#define IXGBE_M88E1500_PHY_SPEC_STATUS         0x11 /* PHY Specific Stat Reg */
-
-#define IXGBE_M88E1500_PSCR_DOWNSHIFT_ENABLE   0x0800
-#define IXGBE_M88E1500_PSCR_DOWNSHIFT_MASK     0x7000
-#define IXGBE_M88E1500_PSCR_DOWNSHIFT_6X       0x5000
-
 s32 ixgbe_dmac_config_X550(struct ixgbe_hw *hw);
 s32 ixgbe_dmac_config_tcs_X550(struct ixgbe_hw *hw);
 s32 ixgbe_dmac_update_tcs_X550(struct ixgbe_hw *hw);
@@ -98,12 +55,8 @@ s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
                                     u16 offset, u16 words, u16 *data);
 s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset,
 u16                            *data);
-s32 ixgbe_read_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
-                                  u16 *data);
 s32 ixgbe_write_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
                                    u16 data);
-s32 ixgbe_set_eee_X550(struct ixgbe_hw *hw, bool enable_eee);
-s32 ixgbe_setup_eee_X550(struct ixgbe_hw *hw, bool enable_eee);
 void ixgbe_set_source_address_pruning_X550(struct ixgbe_hw *hw, bool enable,
                                           unsigned int pool);
 void ixgbe_set_ethertype_anti_spoofing_X550(struct ixgbe_hw *hw,
@@ -112,6 +65,8 @@ s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
                                 u32 device_type, u32 data);
 s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
        u32 device_type, u32 *data);
+s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min,
+                             u8 build, u8 ver, u16 len, const char *str);
 s32 ixgbe_get_phy_token(struct ixgbe_hw *);
 s32 ixgbe_put_phy_token(struct ixgbe_hw *);
 s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
@@ -144,14 +99,18 @@ s32 ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
                                    ixgbe_link_speed speed,
                                    bool autoneg_wait_to_complete);
 s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw,
-                                  ixgbe_link_speed speed,
-                                  bool autoneg_wait_to_complete);
+                                   ixgbe_link_speed speed,
+                                   bool autoneg_wait_to_complete);
 s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
-                            u32 device_type, u16 *phy_data);
+                              u32 device_type, u16 *phy_data);
 s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
-                             u32 device_type, u16 phy_data);
-s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw);
-void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw);
+                               u32 device_type, u16 phy_data);
+s32 ixgbe_setup_fc_fiber_x550em_a(struct ixgbe_hw *hw);
+s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw);
+s32 ixgbe_setup_fc_sgmii_x550em_a(struct ixgbe_hw *hw);
+void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *hw);
+void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw);
+void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw);
 s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw);
 s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw,
                                  ixgbe_link_speed speed,
index 72963a8..5b625a3 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -60,6 +60,7 @@
 #include <rte_malloc.h>
 #include <rte_random.h>
 #include <rte_dev.h>
+#include <rte_hash_crc.h>
 
 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -72,6 +73,8 @@
 #include "base/ixgbe_phy.h"
 #include "ixgbe_regs.h"
 
+#include "rte_pmd_ixgbe.h"
+
 /*
  * High threshold controlling when to start sending XOFF frames. Must be at
  * least 8 bytes less than receive packet buffer size. This value is in units
@@ -163,6 +166,11 @@ enum ixgbevf_xcast_modes {
 
 static int eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev);
+static int ixgbe_fdir_filter_init(struct rte_eth_dev *eth_dev);
+static int ixgbe_fdir_filter_uninit(struct rte_eth_dev *eth_dev);
+static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev);
+static int ixgbe_l2_tn_filter_uninit(struct rte_eth_dev *eth_dev);
+static int ixgbe_ntuple_filter_uninit(struct rte_eth_dev *eth_dev);
 static int  ixgbe_dev_configure(struct rte_eth_dev *dev);
 static int  ixgbe_dev_start(struct rte_eth_dev *dev);
 static void ixgbe_dev_stop(struct rte_eth_dev *dev);
@@ -191,6 +199,8 @@ static int ixgbe_dev_queue_stats_mapping_set(struct rte_eth_dev *eth_dev,
                                             uint16_t queue_id,
                                             uint8_t stat_idx,
                                             uint8_t is_rx);
+static int ixgbe_fw_version_get(struct rte_eth_dev *dev, char *fw_version,
+                                size_t fw_size);
 static void ixgbe_dev_info_get(struct rte_eth_dev *dev,
                               struct rte_eth_dev_info *dev_info);
 static const uint32_t *ixgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev);
@@ -229,9 +239,11 @@ static int ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev,
                        uint16_t reta_size);
 static void ixgbe_dev_link_status_print(struct rte_eth_dev *dev);
 static int ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev);
+static int ixgbe_dev_macsec_interrupt_setup(struct rte_eth_dev *dev);
 static int ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev);
 static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev);
-static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev);
+static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev,
+                                     struct rte_intr_handle *handle);
 static void ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle,
                void *param);
 static void ixgbe_dev_interrupt_delayed_handler(void *param);
@@ -241,6 +253,7 @@ static void ixgbe_remove_rar(struct rte_eth_dev *dev, uint32_t index);
 static void ixgbe_set_default_mac_addr(struct rte_eth_dev *dev,
                                           struct ether_addr *mac_addr);
 static void ixgbe_dcb_init(struct ixgbe_hw *hw, struct ixgbe_dcb_config *dcb_config);
+static int is_ixgbe_pmd(const char *driver_name);
 
 /* For Virtual Function support */
 static int eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev);
@@ -274,12 +287,6 @@ static void ixgbevf_dev_allmulticast_disable(struct rte_eth_dev *dev);
 static int ixgbe_uc_hash_table_set(struct rte_eth_dev *dev, struct
                ether_addr * mac_addr, uint8_t on);
 static int ixgbe_uc_all_hash_table_set(struct rte_eth_dev *dev, uint8_t on);
-static int  ixgbe_set_pool_rx_mode(struct rte_eth_dev *dev,  uint16_t pool,
-               uint16_t rx_mask, uint8_t on);
-static int ixgbe_set_pool_rx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on);
-static int ixgbe_set_pool_tx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on);
-static int ixgbe_set_pool_vlan_filter(struct rte_eth_dev *dev, uint16_t vlan,
-               uint64_t pool_mask, uint8_t vlan_on);
 static int ixgbe_mirror_rule_set(struct rte_eth_dev *dev,
                struct rte_eth_mirror_conf *mirror_conf,
                uint8_t rule_id, uint8_t on);
@@ -295,8 +302,6 @@ static void ixgbe_configure_msix(struct rte_eth_dev *dev);
 
 static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
                uint16_t queue_idx, uint16_t tx_rate);
-static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf,
-               uint16_t tx_rate, uint64_t q_msk);
 
 static void ixgbevf_add_mac_addr(struct rte_eth_dev *dev,
                                 struct ether_addr *mac_addr,
@@ -304,9 +309,6 @@ static void ixgbevf_add_mac_addr(struct rte_eth_dev *dev,
 static void ixgbevf_remove_mac_addr(struct rte_eth_dev *dev, uint32_t index);
 static void ixgbevf_set_default_mac_addr(struct rte_eth_dev *dev,
                                             struct ether_addr *mac_addr);
-static int ixgbe_syn_filter_set(struct rte_eth_dev *dev,
-                       struct rte_eth_syn_filter *filter,
-                       bool add);
 static int ixgbe_syn_filter_get(struct rte_eth_dev *dev,
                        struct rte_eth_syn_filter *filter);
 static int ixgbe_syn_filter_handle(struct rte_eth_dev *dev,
@@ -316,17 +318,11 @@ static int ixgbe_add_5tuple_filter(struct rte_eth_dev *dev,
                        struct ixgbe_5tuple_filter *filter);
 static void ixgbe_remove_5tuple_filter(struct rte_eth_dev *dev,
                        struct ixgbe_5tuple_filter *filter);
-static int ixgbe_add_del_ntuple_filter(struct rte_eth_dev *dev,
-                       struct rte_eth_ntuple_filter *filter,
-                       bool add);
 static int ixgbe_ntuple_filter_handle(struct rte_eth_dev *dev,
                                enum rte_filter_op filter_op,
                                void *arg);
 static int ixgbe_get_ntuple_filter(struct rte_eth_dev *dev,
                        struct rte_eth_ntuple_filter *filter);
-static int ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev,
-                       struct rte_eth_ethertype_filter *filter,
-                       bool add);
 static int ixgbe_ethertype_filter_handle(struct rte_eth_dev *dev,
                                enum rte_filter_op filter_op,
                                void *arg);
@@ -387,6 +383,8 @@ static int ixgbe_dev_udp_tunnel_port_add(struct rte_eth_dev *dev,
                                         struct rte_eth_udp_tunnel *udp_tunnel);
 static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
                                         struct rte_eth_udp_tunnel *udp_tunnel);
+static int ixgbe_filter_restore(struct rte_eth_dev *dev);
+static void ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev);
 
 /*
  * Define VF Stats MACRO for Non "cleared on read" register
@@ -429,23 +427,80 @@ static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
  * The set of PCI devices this driver supports
  */
 static const struct rte_pci_id pci_id_ixgbe_map[] = {
-
-#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
-#include "rte_pci_dev_ids.h"
-
-{ .vendor_id = 0, /* sentinel */ },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_BX) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KR) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_SFP) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_RNDC) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_560FLR) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_ECNA_DP) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_EM) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_LS) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_1G_T) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR_L) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP_N) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII_L) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_10G_T) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_QSFP) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_QSFP_N) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T_L) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR) },
+#ifdef RTE_NIC_BYPASS
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BYPASS) },
+#endif
+       { .vendor_id = 0, /* sentinel */ },
 };
 
-
 /*
  * The set of PCI devices this driver supports (for 82599 VF)
  */
 static const struct rte_pci_id pci_id_ixgbevf_map[] = {
-
-#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
-#include "rte_pci_dev_ids.h"
-{ .vendor_id = 0, /* sentinel */ },
-
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF_HV) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF_HV) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF_HV) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_VF) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_VF_HV) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF) },
+       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF_HV) },
+       { .vendor_id = 0, /* sentinel */ },
 };
 
 static const struct rte_eth_desc_lim rx_desc_lim = {
@@ -458,6 +513,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = {
        .nb_max = IXGBE_MAX_RING_DESC,
        .nb_min = IXGBE_MIN_RING_DESC,
        .nb_align = IXGBE_TXD_ALIGN,
+       .nb_seg_max = IXGBE_TX_MAX_SEG,
+       .nb_mtu_seg_max = IXGBE_TX_MAX_SEG,
 };
 
 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
@@ -478,6 +535,7 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = {
        .xstats_reset         = ixgbe_dev_xstats_reset,
        .xstats_get_names     = ixgbe_dev_xstats_get_names,
        .queue_stats_mapping_set = ixgbe_dev_queue_stats_mapping_set,
+       .fw_version_get       = ixgbe_fw_version_get,
        .dev_infos_get        = ixgbe_dev_info_get,
        .dev_supported_ptypes_get = ixgbe_dev_supported_ptypes_get,
        .mtu_set              = ixgbe_dev_mtu_set,
@@ -509,12 +567,7 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = {
        .uc_all_hash_table_set  = ixgbe_uc_all_hash_table_set,
        .mirror_rule_set      = ixgbe_mirror_rule_set,
        .mirror_rule_reset    = ixgbe_mirror_rule_reset,
-       .set_vf_rx_mode       = ixgbe_set_pool_rx_mode,
-       .set_vf_rx            = ixgbe_set_pool_rx,
-       .set_vf_tx            = ixgbe_set_pool_tx,
-       .set_vf_vlan_filter   = ixgbe_set_pool_vlan_filter,
        .set_queue_rate_limit = ixgbe_set_queue_rate_limit,
-       .set_vf_rate_limit    = ixgbe_set_vf_rate_limit,
        .reta_update          = ixgbe_dev_rss_reta_update,
        .reta_query           = ixgbe_dev_rss_reta_query,
 #ifdef RTE_NIC_BYPASS
@@ -685,6 +738,51 @@ static const struct rte_ixgbe_xstats_name_off rte_ixgbe_stats_strings[] = {
 #define IXGBE_NB_HW_STATS (sizeof(rte_ixgbe_stats_strings) / \
                           sizeof(rte_ixgbe_stats_strings[0]))
 
+/* MACsec statistics */
+static const struct rte_ixgbe_xstats_name_off rte_ixgbe_macsec_strings[] = {
+       {"out_pkts_untagged", offsetof(struct ixgbe_macsec_stats,
+               out_pkts_untagged)},
+       {"out_pkts_encrypted", offsetof(struct ixgbe_macsec_stats,
+               out_pkts_encrypted)},
+       {"out_pkts_protected", offsetof(struct ixgbe_macsec_stats,
+               out_pkts_protected)},
+       {"out_octets_encrypted", offsetof(struct ixgbe_macsec_stats,
+               out_octets_encrypted)},
+       {"out_octets_protected", offsetof(struct ixgbe_macsec_stats,
+               out_octets_protected)},
+       {"in_pkts_untagged", offsetof(struct ixgbe_macsec_stats,
+               in_pkts_untagged)},
+       {"in_pkts_badtag", offsetof(struct ixgbe_macsec_stats,
+               in_pkts_badtag)},
+       {"in_pkts_nosci", offsetof(struct ixgbe_macsec_stats,
+               in_pkts_nosci)},
+       {"in_pkts_unknownsci", offsetof(struct ixgbe_macsec_stats,
+               in_pkts_unknownsci)},
+       {"in_octets_decrypted", offsetof(struct ixgbe_macsec_stats,
+               in_octets_decrypted)},
+       {"in_octets_validated", offsetof(struct ixgbe_macsec_stats,
+               in_octets_validated)},
+       {"in_pkts_unchecked", offsetof(struct ixgbe_macsec_stats,
+               in_pkts_unchecked)},
+       {"in_pkts_delayed", offsetof(struct ixgbe_macsec_stats,
+               in_pkts_delayed)},
+       {"in_pkts_late", offsetof(struct ixgbe_macsec_stats,
+               in_pkts_late)},
+       {"in_pkts_ok", offsetof(struct ixgbe_macsec_stats,
+               in_pkts_ok)},
+       {"in_pkts_invalid", offsetof(struct ixgbe_macsec_stats,
+               in_pkts_invalid)},
+       {"in_pkts_notvalid", offsetof(struct ixgbe_macsec_stats,
+               in_pkts_notvalid)},
+       {"in_pkts_unusedsa", offsetof(struct ixgbe_macsec_stats,
+               in_pkts_unusedsa)},
+       {"in_pkts_notusingsa", offsetof(struct ixgbe_macsec_stats,
+               in_pkts_notusingsa)},
+};
+
+#define IXGBE_NB_MACSEC_STATS (sizeof(rte_ixgbe_macsec_strings) / \
+                          sizeof(rte_ixgbe_macsec_strings[0]))
+
 /* Per-queue statistics */
 static const struct rte_ixgbe_xstats_name_off rte_ixgbe_rxq_strings[] = {
        {"mbuf_allocation_errors", offsetof(struct ixgbe_hw_stats, rnbc)},
@@ -800,6 +898,8 @@ ixgbe_pf_reset_hw(struct ixgbe_hw *hw)
        IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
        IXGBE_WRITE_FLUSH(hw);
 
+       if (status == IXGBE_ERR_SFP_NOT_PRESENT)
+               status = IXGBE_SUCCESS;
        return status;
 }
 
@@ -1024,7 +1124,8 @@ ixgbe_swfw_lock_reset(struct ixgbe_hw *hw)
 static int
 eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
 {
-       struct rte_pci_device *pci_dev;
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct ixgbe_hw *hw =
                IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
        struct ixgbe_vfta *shadow_vfta =
@@ -1044,6 +1145,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
        eth_dev->dev_ops = &ixgbe_eth_dev_ops;
        eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
        eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+       eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts;
 
        /*
         * For secondary processes, we don't initialise any further as primary
@@ -1068,9 +1170,9 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
 
                return 0;
        }
-       pci_dev = eth_dev->pci_dev;
 
        rte_eth_copy_pci_info(eth_dev, pci_dev);
+       eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 
        /* Vendor and Device ID need to be set before init of shared code */
        hw->device_id = pci_dev->id.device_id;
@@ -1137,6 +1239,9 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
                diag = ixgbe_init_hw(hw);
        }
 
+       if (diag == IXGBE_ERR_SFP_NOT_PRESENT)
+               diag = IXGBE_SUCCESS;
+
        if (diag == IXGBE_ERR_EEPROM_VERSION) {
                PMD_INIT_LOG(ERR, "This device is a pre-production adapter/"
                             "LOM.  Please be aware there may be issues associated "
@@ -1213,20 +1318,34 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
                     eth_dev->data->port_id, pci_dev->id.vendor_id,
                     pci_dev->id.device_id);
 
-       rte_intr_callback_register(&pci_dev->intr_handle,
-                                  ixgbe_dev_interrupt_handler,
-                                  (void *)eth_dev);
+       rte_intr_callback_register(intr_handle,
+                                  ixgbe_dev_interrupt_handler, eth_dev);
 
        /* enable uio/vfio intr/eventfd mapping */
-       rte_intr_enable(&pci_dev->intr_handle);
+       rte_intr_enable(intr_handle);
 
        /* enable support intr */
        ixgbe_enable_intr(eth_dev);
 
+       /* initialize filter info */
+       memset(filter_info, 0,
+              sizeof(struct ixgbe_filter_info));
+
        /* initialize 5tuple filter list */
        TAILQ_INIT(&filter_info->fivetuple_list);
-       memset(filter_info->fivetuple_mask, 0,
-              sizeof(uint32_t) * IXGBE_5TUPLE_ARRAY_SIZE);
+
+       /* initialize flow director filter list & hash */
+       ixgbe_fdir_filter_init(eth_dev);
+
+       /* initialize l2 tunnel filter list & hash */
+       ixgbe_l2_tn_filter_init(eth_dev);
+
+       TAILQ_INIT(&filter_ntuple_list);
+       TAILQ_INIT(&filter_ethertype_list);
+       TAILQ_INIT(&filter_syn_list);
+       TAILQ_INIT(&filter_fdir_list);
+       TAILQ_INIT(&filter_l2_tunnel_list);
+       TAILQ_INIT(&ixgbe_flow_list);
 
        return 0;
 }
@@ -1234,7 +1353,8 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
 static int
 eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev)
 {
-       struct rte_pci_device *pci_dev;
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct ixgbe_hw *hw;
 
        PMD_INIT_FUNC_TRACE();
@@ -1243,7 +1363,6 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev)
                return -EPERM;
 
        hw = IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
-       pci_dev = eth_dev->pci_dev;
 
        if (hw->adapter_stopped == 0)
                ixgbe_dev_close(eth_dev);
@@ -1256,9 +1375,9 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev)
        ixgbe_swfw_lock_reset(hw);
 
        /* disable uio intr before callback unregister */
-       rte_intr_disable(&(pci_dev->intr_handle));
-       rte_intr_callback_unregister(&(pci_dev->intr_handle),
-               ixgbe_dev_interrupt_handler, (void *)eth_dev);
+       rte_intr_disable(intr_handle);
+       rte_intr_callback_unregister(intr_handle,
+                                    ixgbe_dev_interrupt_handler, eth_dev);
 
        /* uninitialize PF if max_vfs not zero */
        ixgbe_pf_host_uninit(eth_dev);
@@ -1269,9 +1388,154 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev)
        rte_free(eth_dev->data->hash_mac_addrs);
        eth_dev->data->hash_mac_addrs = NULL;
 
+       /* remove all the fdir filters & hash */
+       ixgbe_fdir_filter_uninit(eth_dev);
+
+       /* remove all the L2 tunnel filters & hash */
+       ixgbe_l2_tn_filter_uninit(eth_dev);
+
+       /* Remove all ntuple filters of the device */
+       ixgbe_ntuple_filter_uninit(eth_dev);
+
+       /* clear all the filters list */
+       ixgbe_filterlist_flush();
+
+       return 0;
+}
+
+static int ixgbe_ntuple_filter_uninit(struct rte_eth_dev *eth_dev)
+{
+       struct ixgbe_filter_info *filter_info =
+               IXGBE_DEV_PRIVATE_TO_FILTER_INFO(eth_dev->data->dev_private);
+       struct ixgbe_5tuple_filter *p_5tuple;
+
+       while ((p_5tuple = TAILQ_FIRST(&filter_info->fivetuple_list))) {
+               TAILQ_REMOVE(&filter_info->fivetuple_list,
+                            p_5tuple,
+                            entries);
+               rte_free(p_5tuple);
+       }
+       memset(filter_info->fivetuple_mask, 0,
+              sizeof(uint32_t) * IXGBE_5TUPLE_ARRAY_SIZE);
+
+       return 0;
+}
+
+static int ixgbe_fdir_filter_uninit(struct rte_eth_dev *eth_dev)
+{
+       struct ixgbe_hw_fdir_info *fdir_info =
+               IXGBE_DEV_PRIVATE_TO_FDIR_INFO(eth_dev->data->dev_private);
+       struct ixgbe_fdir_filter *fdir_filter;
+
+               if (fdir_info->hash_map)
+               rte_free(fdir_info->hash_map);
+       if (fdir_info->hash_handle)
+               rte_hash_free(fdir_info->hash_handle);
+
+       while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list))) {
+               TAILQ_REMOVE(&fdir_info->fdir_list,
+                            fdir_filter,
+                            entries);
+               rte_free(fdir_filter);
+       }
+
+       return 0;
+}
+
+static int ixgbe_l2_tn_filter_uninit(struct rte_eth_dev *eth_dev)
+{
+       struct ixgbe_l2_tn_info *l2_tn_info =
+               IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(eth_dev->data->dev_private);
+       struct ixgbe_l2_tn_filter *l2_tn_filter;
+
+       if (l2_tn_info->hash_map)
+               rte_free(l2_tn_info->hash_map);
+       if (l2_tn_info->hash_handle)
+               rte_hash_free(l2_tn_info->hash_handle);
+
+       while ((l2_tn_filter = TAILQ_FIRST(&l2_tn_info->l2_tn_list))) {
+               TAILQ_REMOVE(&l2_tn_info->l2_tn_list,
+                            l2_tn_filter,
+                            entries);
+               rte_free(l2_tn_filter);
+       }
+
+       return 0;
+}
+
+static int ixgbe_fdir_filter_init(struct rte_eth_dev *eth_dev)
+{
+       struct ixgbe_hw_fdir_info *fdir_info =
+               IXGBE_DEV_PRIVATE_TO_FDIR_INFO(eth_dev->data->dev_private);
+       char fdir_hash_name[RTE_HASH_NAMESIZE];
+       struct rte_hash_parameters fdir_hash_params = {
+               .name = fdir_hash_name,
+               .entries = IXGBE_MAX_FDIR_FILTER_NUM,
+               .key_len = sizeof(union ixgbe_atr_input),
+               .hash_func = rte_hash_crc,
+               .hash_func_init_val = 0,
+               .socket_id = rte_socket_id(),
+       };
+
+       TAILQ_INIT(&fdir_info->fdir_list);
+       snprintf(fdir_hash_name, RTE_HASH_NAMESIZE,
+                "fdir_%s", eth_dev->data->name);
+       fdir_info->hash_handle = rte_hash_create(&fdir_hash_params);
+       if (!fdir_info->hash_handle) {
+               PMD_INIT_LOG(ERR, "Failed to create fdir hash table!");
+               return -EINVAL;
+       }
+       fdir_info->hash_map = rte_zmalloc("ixgbe",
+                                         sizeof(struct ixgbe_fdir_filter *) *
+                                         IXGBE_MAX_FDIR_FILTER_NUM,
+                                         0);
+       if (!fdir_info->hash_map) {
+               PMD_INIT_LOG(ERR,
+                            "Failed to allocate memory for fdir hash map!");
+               return -ENOMEM;
+       }
+       fdir_info->mask_added = FALSE;
+
        return 0;
 }
 
+static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev)
+{
+       struct ixgbe_l2_tn_info *l2_tn_info =
+               IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(eth_dev->data->dev_private);
+       char l2_tn_hash_name[RTE_HASH_NAMESIZE];
+       struct rte_hash_parameters l2_tn_hash_params = {
+               .name = l2_tn_hash_name,
+               .entries = IXGBE_MAX_L2_TN_FILTER_NUM,
+               .key_len = sizeof(struct ixgbe_l2_tn_key),
+               .hash_func = rte_hash_crc,
+               .hash_func_init_val = 0,
+               .socket_id = rte_socket_id(),
+       };
+
+       TAILQ_INIT(&l2_tn_info->l2_tn_list);
+       snprintf(l2_tn_hash_name, RTE_HASH_NAMESIZE,
+                "l2_tn_%s", eth_dev->data->name);
+       l2_tn_info->hash_handle = rte_hash_create(&l2_tn_hash_params);
+       if (!l2_tn_info->hash_handle) {
+               PMD_INIT_LOG(ERR, "Failed to create L2 TN hash table!");
+               return -EINVAL;
+       }
+       l2_tn_info->hash_map = rte_zmalloc("ixgbe",
+                                  sizeof(struct ixgbe_l2_tn_filter *) *
+                                  IXGBE_MAX_L2_TN_FILTER_NUM,
+                                  0);
+       if (!l2_tn_info->hash_map) {
+               PMD_INIT_LOG(ERR,
+                       "Failed to allocate memory for L2 TN hash map!");
+               return -ENOMEM;
+       }
+       l2_tn_info->e_tag_en = FALSE;
+       l2_tn_info->e_tag_fwd_en = FALSE;
+       l2_tn_info->e_tag_ether_type = DEFAULT_ETAG_ETYPE;
+
+       return 0;
+}
 /*
  * Negotiate mailbox API version with the PF.
  * After reset API version is always set to the basic one (ixgbe_mbox_api_10).
@@ -1322,7 +1586,8 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
 {
        int diag;
        uint32_t tc, tcs;
-       struct rte_pci_device *pci_dev;
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct ixgbe_hw *hw =
                IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
        struct ixgbe_vfta *shadow_vfta =
@@ -1360,9 +1625,8 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
                return 0;
        }
 
-       pci_dev = eth_dev->pci_dev;
-
        rte_eth_copy_pci_info(eth_dev, pci_dev);
+       eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 
        hw->device_id = pci_dev->id.device_id;
        hw->vendor_id = pci_dev->id.vendor_id;
@@ -1454,10 +1718,9 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
                return -EIO;
        }
 
-       rte_intr_callback_register(&pci_dev->intr_handle,
-                                  ixgbevf_dev_interrupt_handler,
-                                  (void *)eth_dev);
-       rte_intr_enable(&pci_dev->intr_handle);
+       rte_intr_callback_register(intr_handle,
+                                  ixgbevf_dev_interrupt_handler, eth_dev);
+       rte_intr_enable(intr_handle);
        ixgbevf_intr_enable(hw);
 
        PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x mac.type=%s",
@@ -1472,8 +1735,9 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
 static int
 eth_ixgbevf_dev_uninit(struct rte_eth_dev *eth_dev)
 {
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct ixgbe_hw *hw;
-       struct rte_pci_device *pci_dev = eth_dev->pci_dev;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -1495,20 +1759,19 @@ eth_ixgbevf_dev_uninit(struct rte_eth_dev *eth_dev)
        rte_free(eth_dev->data->mac_addrs);
        eth_dev->data->mac_addrs = NULL;
 
-       rte_intr_disable(&pci_dev->intr_handle);
-       rte_intr_callback_unregister(&pci_dev->intr_handle,
-                                    ixgbevf_dev_interrupt_handler,
-                                    (void *)eth_dev);
+       rte_intr_disable(intr_handle);
+       rte_intr_callback_unregister(intr_handle,
+                                    ixgbevf_dev_interrupt_handler, eth_dev);
 
        return 0;
 }
 
 static struct eth_driver rte_ixgbe_pmd = {
        .pci_drv = {
-               .name = "rte_ixgbe_pmd",
                .id_table = pci_id_ixgbe_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
-                       RTE_PCI_DRV_DETACHABLE,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_ixgbe_dev_init,
        .eth_dev_uninit = eth_ixgbe_dev_uninit,
@@ -1520,43 +1783,16 @@ static struct eth_driver rte_ixgbe_pmd = {
  */
 static struct eth_driver rte_ixgbevf_pmd = {
        .pci_drv = {
-               .name = "rte_ixgbevf_pmd",
                .id_table = pci_id_ixgbevf_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_ixgbevf_dev_init,
        .eth_dev_uninit = eth_ixgbevf_dev_uninit,
        .dev_private_size = sizeof(struct ixgbe_adapter),
 };
 
-/*
- * Driver initialization routine.
- * Invoked once at EAL init time.
- * Register itself as the [Poll Mode] Driver of PCI IXGBE devices.
- */
-static int
-rte_ixgbe_pmd_init(const char *name __rte_unused, const char *params __rte_unused)
-{
-       PMD_INIT_FUNC_TRACE();
-
-       rte_eth_driver_register(&rte_ixgbe_pmd);
-       return 0;
-}
-
-/*
- * VF Driver initialization routine.
- * Invoked one at EAL init time.
- * Register itself as the [Virtual Poll Mode] Driver of PCI niantic devices.
- */
-static int
-rte_ixgbevf_pmd_init(const char *name __rte_unused, const char *param __rte_unused)
-{
-       PMD_INIT_FUNC_TRACE();
-
-       rte_eth_driver_register(&rte_ixgbevf_pmd);
-       return 0;
-}
-
 static int
 ixgbe_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 {
@@ -1768,6 +2004,7 @@ ixgbe_vlan_hw_strip_disable_all(struct rte_eth_dev *dev)
                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t ctrl;
        uint16_t i;
+       struct ixgbe_rx_queue *rxq;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -1778,9 +2015,10 @@ ixgbe_vlan_hw_strip_disable_all(struct rte_eth_dev *dev)
        } else {
                /* Other 10G NIC, the VLAN strip can be setup per queue in RXDCTL */
                for (i = 0; i < dev->data->nb_rx_queues; i++) {
-                       ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
+                       rxq = dev->data->rx_queues[i];
+                       ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
                        ctrl &= ~IXGBE_RXDCTL_VME;
-                       IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
+                       IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), ctrl);
 
                        /* record those setting for HW strip per queue */
                        ixgbe_vlan_hw_strip_bitmap_set(dev, i, 0);
@@ -1795,6 +2033,7 @@ ixgbe_vlan_hw_strip_enable_all(struct rte_eth_dev *dev)
                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t ctrl;
        uint16_t i;
+       struct ixgbe_rx_queue *rxq;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -1805,9 +2044,10 @@ ixgbe_vlan_hw_strip_enable_all(struct rte_eth_dev *dev)
        } else {
                /* Other 10G NIC, the VLAN strip can be setup per queue in RXDCTL */
                for (i = 0; i < dev->data->nb_rx_queues; i++) {
-                       ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
+                       rxq = dev->data->rx_queues[i];
+                       ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
                        ctrl |= IXGBE_RXDCTL_VME;
-                       IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
+                       IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), ctrl);
 
                        /* record those setting for HW strip per queue */
                        ixgbe_vlan_hw_strip_bitmap_set(dev, i, 1);
@@ -1910,6 +2150,8 @@ ixgbe_vmdq_vlan_hw_filter_enable(struct rte_eth_dev *dev)
 static int
 ixgbe_check_vf_rss_rxq_num(struct rte_eth_dev *dev, uint16_t nb_rx_q)
 {
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
+
        switch (nb_rx_q) {
        case 1:
        case 2:
@@ -1923,7 +2165,7 @@ ixgbe_check_vf_rss_rxq_num(struct rte_eth_dev *dev, uint16_t nb_rx_q)
        }
 
        RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q;
-       RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx = dev->pci_dev->max_vfs * nb_rx_q;
+       RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx = pci_dev->max_vfs * nb_rx_q;
 
        return 0;
 }
@@ -1940,6 +2182,8 @@ ixgbe_check_mq_mode(struct rte_eth_dev *dev)
                /* check multi-queue mode */
                switch (dev_conf->rxmode.mq_mode) {
                case ETH_MQ_RX_VMDQ_DCB:
+                       PMD_INIT_LOG(INFO, "ETH_MQ_RX_VMDQ_DCB mode supported in SRIOV");
+                       break;
                case ETH_MQ_RX_VMDQ_DCB_RSS:
                        /* DCB/RSS VMDQ in SRIOV mode, not implement yet */
                        PMD_INIT_LOG(ERR, "SRIOV active,"
@@ -1975,11 +2219,9 @@ ixgbe_check_mq_mode(struct rte_eth_dev *dev)
 
                switch (dev_conf->txmode.mq_mode) {
                case ETH_MQ_TX_VMDQ_DCB:
-                       /* DCB VMDQ in SRIOV mode, not implement yet */
-                       PMD_INIT_LOG(ERR, "SRIOV is active,"
-                                       " unsupported VMDQ mq_mode tx %d.",
-                                       dev_conf->txmode.mq_mode);
-                       return -EINVAL;
+                       PMD_INIT_LOG(INFO, "ETH_MQ_TX_VMDQ_DCB mode supported in SRIOV");
+                       dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
+                       break;
                default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */
                        dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY;
                        break;
@@ -2154,7 +2396,8 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct ixgbe_vf_info *vfinfo =
                *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        uint32_t intr_vector = 0;
        int err, link_up = 0, negotiate = 0;
        uint32_t speed = 0;
@@ -2216,7 +2459,7 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
                                    dev->data->nb_rx_queues * sizeof(int), 0);
                if (intr_handle->intr_vec == NULL) {
                        PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
-                                    " intr_vec\n", dev->data->nb_rx_queues);
+                                    " intr_vec", dev->data->nb_rx_queues);
                        return -ENOMEM;
                }
        }
@@ -2234,6 +2477,37 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
                goto error;
        }
 
+    mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK |
+               ETH_VLAN_EXTEND_MASK;
+       ixgbe_vlan_offload_set(dev, mask);
+
+       if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_ONLY) {
+               /* Enable vlan filtering for VMDq */
+               ixgbe_vmdq_vlan_hw_filter_enable(dev);
+       }
+
+       /* Configure DCB hw */
+       ixgbe_configure_dcb(dev);
+
+       if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE) {
+               err = ixgbe_fdir_configure(dev);
+               if (err)
+                       goto error;
+       }
+
+       /* Restore vf rate limit */
+       if (vfinfo != NULL) {
+               for (vf = 0; vf < pci_dev->max_vfs; vf++)
+                       for (idx = 0; idx < IXGBE_MAX_QUEUE_NUM_PER_VF; idx++)
+                               if (vfinfo[vf].tx_rate[idx] != 0)
+                                       rte_pmd_ixgbe_set_vf_rate_limit(
+                                               dev->data->port_id, vf,
+                                               vfinfo[vf].tx_rate[idx],
+                                               1 << idx);
+       }
+
+       ixgbe_restore_statistics_mapping(dev);
+
        err = ixgbe_dev_rxtx_start(dev);
        if (err < 0) {
                PMD_INIT_LOG(ERR, "Unable to start rxtx queues");
@@ -2299,13 +2573,13 @@ skip_link_setup:
                /* check if lsc interrupt is enabled */
                if (dev->data->dev_conf.intr_conf.lsc != 0)
                        ixgbe_dev_lsc_interrupt_setup(dev);
+               ixgbe_dev_macsec_interrupt_setup(dev);
        } else {
                rte_intr_callback_unregister(intr_handle,
-                                            ixgbe_dev_interrupt_handler,
-                                            (void *)dev);
+                                            ixgbe_dev_interrupt_handler, dev);
                if (dev->data->dev_conf.intr_conf.lsc != 0)
                        PMD_INIT_LOG(INFO, "lsc won't enable because of"
-                                    " no intr multiplex\n");
+                                    " no intr multiplex");
        }
 
        /* check if rxq interrupt is enabled */
@@ -2318,36 +2592,8 @@ skip_link_setup:
 
        /* resume enabled intr since hw reset */
        ixgbe_enable_intr(dev);
-
-       mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK |
-               ETH_VLAN_EXTEND_MASK;
-       ixgbe_vlan_offload_set(dev, mask);
-
-       if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_ONLY) {
-               /* Enable vlan filtering for VMDq */
-               ixgbe_vmdq_vlan_hw_filter_enable(dev);
-       }
-
-       /* Configure DCB hw */
-       ixgbe_configure_dcb(dev);
-
-       if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE) {
-               err = ixgbe_fdir_configure(dev);
-               if (err)
-                       goto error;
-       }
-
-       /* Restore vf rate limit */
-       if (vfinfo != NULL) {
-               for (vf = 0; vf < dev->pci_dev->max_vfs; vf++)
-                       for (idx = 0; idx < IXGBE_MAX_QUEUE_NUM_PER_VF; idx++)
-                               if (vfinfo[vf].tx_rate[idx] != 0)
-                                       ixgbe_set_vf_rate_limit(dev, vf,
-                                               vfinfo[vf].tx_rate[idx],
-                                               1 << idx);
-       }
-
-       ixgbe_restore_statistics_mapping(dev);
+       ixgbe_l2_tunnel_conf(dev);
+       ixgbe_filter_restore(dev);
 
        return 0;
 
@@ -2368,10 +2614,8 @@ ixgbe_dev_stop(struct rte_eth_dev *dev)
                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct ixgbe_vf_info *vfinfo =
                *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
-       struct ixgbe_filter_info *filter_info =
-               IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
-       struct ixgbe_5tuple_filter *p_5tuple, *p_5tuple_next;
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        int vf;
 
        PMD_INIT_FUNC_TRACE();
@@ -2386,8 +2630,7 @@ ixgbe_dev_stop(struct rte_eth_dev *dev)
        /* stop adapter */
        ixgbe_stop_adapter(hw);
 
-       for (vf = 0; vfinfo != NULL &&
-                    vf < dev->pci_dev->max_vfs; vf++)
+       for (vf = 0; vfinfo != NULL && vf < pci_dev->max_vfs; vf++)
                vfinfo[vf].clear_to_send = false;
 
        if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper) {
@@ -2408,17 +2651,6 @@ ixgbe_dev_stop(struct rte_eth_dev *dev)
        memset(&link, 0, sizeof(link));
        rte_ixgbe_dev_atomic_write_link_status(dev, &link);
 
-       /* Remove all ntuple filters of the device */
-       for (p_5tuple = TAILQ_FIRST(&filter_info->fivetuple_list);
-            p_5tuple != NULL; p_5tuple = p_5tuple_next) {
-               p_5tuple_next = TAILQ_NEXT(p_5tuple, entries);
-               TAILQ_REMOVE(&filter_info->fivetuple_list,
-                            p_5tuple, entries);
-               rte_free(p_5tuple);
-       }
-       memset(filter_info->fivetuple_mask, 0,
-               sizeof(uint32_t) * IXGBE_5TUPLE_ARRAY_SIZE);
-
        if (!rte_intr_allow_others(intr_handle))
                /* resume to the default handler */
                rte_intr_callback_register(intr_handle,
@@ -2520,6 +2752,7 @@ ixgbe_dev_close(struct rte_eth_dev *dev)
 static void
 ixgbe_read_stats_registers(struct ixgbe_hw *hw,
                           struct ixgbe_hw_stats *hw_stats,
+                          struct ixgbe_macsec_stats *macsec_stats,
                           uint64_t *total_missed_rx, uint64_t *total_qbrc,
                           uint64_t *total_qprc, uint64_t *total_qprdc)
 {
@@ -2527,9 +2760,9 @@ ixgbe_read_stats_registers(struct ixgbe_hw *hw,
        uint32_t delta_gprc = 0;
        unsigned i;
        /* Workaround for RX byte count not including CRC bytes when CRC
-+       * strip is enabled. CRC bytes are removed from counters when crc_strip
+        * strip is enabled. CRC bytes are removed from counters when crc_strip
         * is disabled.
-+       */
+        */
        int crc_strip = (IXGBE_READ_REG(hw, IXGBE_HLREG0) &
                        IXGBE_HLREG0_RXCRCSTRP);
 
@@ -2689,6 +2922,40 @@ ixgbe_read_stats_registers(struct ixgbe_hw *hw,
        /* Flow Director Stats registers */
        hw_stats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH);
        hw_stats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS);
+
+       /* MACsec Stats registers */
+       macsec_stats->out_pkts_untagged += IXGBE_READ_REG(hw, IXGBE_LSECTXUT);
+       macsec_stats->out_pkts_encrypted +=
+               IXGBE_READ_REG(hw, IXGBE_LSECTXPKTE);
+       macsec_stats->out_pkts_protected +=
+               IXGBE_READ_REG(hw, IXGBE_LSECTXPKTP);
+       macsec_stats->out_octets_encrypted +=
+               IXGBE_READ_REG(hw, IXGBE_LSECTXOCTE);
+       macsec_stats->out_octets_protected +=
+               IXGBE_READ_REG(hw, IXGBE_LSECTXOCTP);
+       macsec_stats->in_pkts_untagged += IXGBE_READ_REG(hw, IXGBE_LSECRXUT);
+       macsec_stats->in_pkts_badtag += IXGBE_READ_REG(hw, IXGBE_LSECRXBAD);
+       macsec_stats->in_pkts_nosci += IXGBE_READ_REG(hw, IXGBE_LSECRXNOSCI);
+       macsec_stats->in_pkts_unknownsci +=
+               IXGBE_READ_REG(hw, IXGBE_LSECRXUNSCI);
+       macsec_stats->in_octets_decrypted +=
+               IXGBE_READ_REG(hw, IXGBE_LSECRXOCTD);
+       macsec_stats->in_octets_validated +=
+               IXGBE_READ_REG(hw, IXGBE_LSECRXOCTV);
+       macsec_stats->in_pkts_unchecked += IXGBE_READ_REG(hw, IXGBE_LSECRXUNCH);
+       macsec_stats->in_pkts_delayed += IXGBE_READ_REG(hw, IXGBE_LSECRXDELAY);
+       macsec_stats->in_pkts_late += IXGBE_READ_REG(hw, IXGBE_LSECRXLATE);
+       for (i = 0; i < 2; i++) {
+               macsec_stats->in_pkts_ok +=
+                       IXGBE_READ_REG(hw, IXGBE_LSECRXOK(i));
+               macsec_stats->in_pkts_invalid +=
+                       IXGBE_READ_REG(hw, IXGBE_LSECRXINV(i));
+               macsec_stats->in_pkts_notvalid +=
+                       IXGBE_READ_REG(hw, IXGBE_LSECRXNV(i));
+       }
+       macsec_stats->in_pkts_unusedsa += IXGBE_READ_REG(hw, IXGBE_LSECRXUNSA);
+       macsec_stats->in_pkts_notusingsa +=
+               IXGBE_READ_REG(hw, IXGBE_LSECRXNUSA);
 }
 
 /*
@@ -2701,6 +2968,9 @@ ixgbe_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
                        IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct ixgbe_hw_stats *hw_stats =
                        IXGBE_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
+       struct ixgbe_macsec_stats *macsec_stats =
+                       IXGBE_DEV_PRIVATE_TO_MACSEC_STATS(
+                               dev->data->dev_private);
        uint64_t total_missed_rx, total_qbrc, total_qprc, total_qprdc;
        unsigned i;
 
@@ -2709,8 +2979,8 @@ ixgbe_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
        total_qprc = 0;
        total_qprdc = 0;
 
-       ixgbe_read_stats_registers(hw, hw_stats, &total_missed_rx, &total_qbrc,
-                       &total_qprc, &total_qprdc);
+       ixgbe_read_stats_registers(hw, hw_stats, macsec_stats, &total_missed_rx,
+                       &total_qbrc, &total_qprc, &total_qprdc);
 
        if (stats == NULL)
                return;
@@ -2762,7 +3032,7 @@ ixgbe_dev_stats_reset(struct rte_eth_dev *dev)
 /* This function calculates the number of xstats based on the current config */
 static unsigned
 ixgbe_xstats_calc_num(void) {
-       return IXGBE_NB_HW_STATS +
+       return IXGBE_NB_HW_STATS + IXGBE_NB_MACSEC_STATS +
                (IXGBE_NB_RXQ_PRIO_STATS * IXGBE_NB_RXQ_PRIO_VALUES) +
                (IXGBE_NB_TXQ_PRIO_STATS * IXGBE_NB_TXQ_PRIO_VALUES);
 }
@@ -2789,6 +3059,15 @@ static int ixgbe_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
                        count++;
                }
 
+               /* MACsec Stats */
+               for (i = 0; i < IXGBE_NB_MACSEC_STATS; i++) {
+                       snprintf(xstats_names[count].name,
+                               sizeof(xstats_names[count].name),
+                               "%s",
+                               rte_ixgbe_macsec_strings[i].name);
+                       count++;
+               }
+
                /* RX Priority Stats */
                for (stat = 0; stat < IXGBE_NB_RXQ_PRIO_STATS; stat++) {
                        for (i = 0; i < IXGBE_NB_RXQ_PRIO_VALUES; i++) {
@@ -2838,6 +3117,9 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
                        IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct ixgbe_hw_stats *hw_stats =
                        IXGBE_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
+       struct ixgbe_macsec_stats *macsec_stats =
+                       IXGBE_DEV_PRIVATE_TO_MACSEC_STATS(
+                               dev->data->dev_private);
        uint64_t total_missed_rx, total_qbrc, total_qprc, total_qprdc;
        unsigned i, stat, count = 0;
 
@@ -2851,8 +3133,8 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
        total_qprc = 0;
        total_qprdc = 0;
 
-       ixgbe_read_stats_registers(hw, hw_stats, &total_missed_rx, &total_qbrc,
-                                  &total_qprc, &total_qprdc);
+       ixgbe_read_stats_registers(hw, hw_stats, macsec_stats, &total_missed_rx,
+                       &total_qbrc, &total_qprc, &total_qprdc);
 
        /* If this is a reset xstats is NULL, and we have cleared the
         * registers by reading them.
@@ -2865,6 +3147,15 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
        for (i = 0; i < IXGBE_NB_HW_STATS; i++) {
                xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
                                rte_ixgbe_stats_strings[i].offset);
+               xstats[count].id = count;
+               count++;
+       }
+
+       /* MACsec Stats */
+       for (i = 0; i < IXGBE_NB_MACSEC_STATS; i++) {
+               xstats[count].value = *(uint64_t *)(((char *)macsec_stats) +
+                               rte_ixgbe_macsec_strings[i].offset);
+               xstats[count].id = count;
                count++;
        }
 
@@ -2874,6 +3165,7 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
                        xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
                                        rte_ixgbe_rxq_strings[stat].offset +
                                        (sizeof(uint64_t) * i));
+                       xstats[count].id = count;
                        count++;
                }
        }
@@ -2884,6 +3176,7 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
                        xstats[count].value = *(uint64_t *)(((char *)hw_stats) +
                                        rte_ixgbe_txq_strings[stat].offset +
                                        (sizeof(uint64_t) * i));
+                       xstats[count].id = count;
                        count++;
                }
        }
@@ -2895,6 +3188,9 @@ ixgbe_dev_xstats_reset(struct rte_eth_dev *dev)
 {
        struct ixgbe_hw_stats *stats =
                        IXGBE_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
+       struct ixgbe_macsec_stats *macsec_stats =
+                       IXGBE_DEV_PRIVATE_TO_MACSEC_STATS(
+                               dev->data->dev_private);
 
        unsigned count = ixgbe_xstats_calc_num();
 
@@ -2903,6 +3199,7 @@ ixgbe_dev_xstats_reset(struct rte_eth_dev *dev)
 
        /* Reset software totals */
        memset(stats, 0, sizeof(*stats));
+       memset(macsec_stats, 0, sizeof(*macsec_stats));
 }
 
 static void
@@ -2991,12 +3288,35 @@ ixgbevf_dev_stats_reset(struct rte_eth_dev *dev)
        hw_stats->vfgotc = 0;
 }
 
+static int
+ixgbe_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size)
+{
+       struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       u16 eeprom_verh, eeprom_verl;
+       u32 etrack_id;
+       int ret;
+
+       ixgbe_read_eeprom(hw, 0x2e, &eeprom_verh);
+       ixgbe_read_eeprom(hw, 0x2d, &eeprom_verl);
+
+       etrack_id = (eeprom_verh << 16) | eeprom_verl;
+       ret = snprintf(fw_version, fw_size, "0x%08x", etrack_id);
+
+       ret += 1; /* add the size of '\0' */
+       if (fw_size < (u32)ret)
+               return ret;
+       else
+               return 0;
+}
+
 static void
 ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
 
+       dev_info->pci_dev = pci_dev;
        dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues;
        dev_info->max_tx_queues = (uint16_t)hw->mac.max_tx_queues;
        if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
@@ -3012,7 +3332,7 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        dev_info->max_rx_pktlen = 15872; /* includes CRC, cf MAXFRS register */
        dev_info->max_mac_addrs = hw->mac.num_rar_entries;
        dev_info->max_hash_mac_addrs = IXGBE_VMDQ_NUM_UC_MAC;
-       dev_info->max_vfs = dev->pci_dev->max_vfs;
+       dev_info->max_vfs = pci_dev->max_vfs;
        if (hw->mac.type == ixgbe_mac_82598EB)
                dev_info->max_vmdq_pools = ETH_16_POOLS;
        else
@@ -3033,6 +3353,10 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
            !RTE_ETH_DEV_SRIOV(dev).active)
                dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
 
+       if (hw->mac.type == ixgbe_mac_82599EB ||
+           hw->mac.type == ixgbe_mac_X540)
+               dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_MACSEC_STRIP;
+
        if (hw->mac.type == ixgbe_mac_X550 ||
            hw->mac.type == ixgbe_mac_X550EM_x ||
            hw->mac.type == ixgbe_mac_X550EM_a)
@@ -3046,6 +3370,10 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                DEV_TX_OFFLOAD_SCTP_CKSUM  |
                DEV_TX_OFFLOAD_TCP_TSO;
 
+       if (hw->mac.type == ixgbe_mac_82599EB ||
+           hw->mac.type == ixgbe_mac_X540)
+               dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
+
        if (hw->mac.type == ixgbe_mac_X550 ||
            hw->mac.type == ixgbe_mac_X550EM_x ||
            hw->mac.type == ixgbe_mac_X550EM_a)
@@ -3126,15 +3454,17 @@ static void
 ixgbevf_dev_info_get(struct rte_eth_dev *dev,
                     struct rte_eth_dev_info *dev_info)
 {
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
+       dev_info->pci_dev = pci_dev;
        dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues;
        dev_info->max_tx_queues = (uint16_t)hw->mac.max_tx_queues;
        dev_info->min_rx_bufsize = 1024; /* cf BSIZEPACKET in SRRCTL reg */
-       dev_info->max_rx_pktlen = 15872; /* includes CRC, cf MAXFRS reg */
+       dev_info->max_rx_pktlen = 9728; /* includes CRC, cf MAXFRS reg */
        dev_info->max_mac_addrs = hw->mac.num_rar_entries;
        dev_info->max_hash_mac_addrs = IXGBE_VMDQ_NUM_UC_MAC;
-       dev_info->max_vfs = dev->pci_dev->max_vfs;
+       dev_info->max_vfs = pci_dev->max_vfs;
        if (hw->mac.type == ixgbe_mac_82598EB)
                dev_info->max_vmdq_pools = ETH_16_POOLS;
        else
@@ -3341,6 +3671,28 @@ ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev)
        return 0;
 }
 
+/**
+ * It clears the interrupt causes and enables the interrupt.
+ * It will be called once only during nic initialized.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+static int
+ixgbe_dev_macsec_interrupt_setup(struct rte_eth_dev *dev)
+{
+       struct ixgbe_interrupt *intr =
+               IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+       intr->mask |= IXGBE_EICR_LINKSEC;
+
+       return 0;
+}
+
 /*
  * It reads ICR and sets flag (IXGBE_EICR_LSC) for the link_update.
  *
@@ -3375,6 +3727,9 @@ ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev)
        if (eicr & IXGBE_EICR_MAILBOX)
                intr->flags |= IXGBE_FLAG_MAILBOX;
 
+       if (eicr & IXGBE_EICR_LINKSEC)
+               intr->flags |= IXGBE_FLAG_MACSEC;
+
        if (hw->mac.type ==  ixgbe_mac_X550EM_x &&
            hw->phy.type == ixgbe_phy_x550em_ext_t &&
            (eicr & IXGBE_EICR_GPI_SDP0_X550EM_x))
@@ -3396,6 +3751,7 @@ ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev)
 static void
 ixgbe_dev_link_status_print(struct rte_eth_dev *dev)
 {
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
        struct rte_eth_link link;
 
        memset(&link, 0, sizeof(link));
@@ -3410,11 +3766,11 @@ ixgbe_dev_link_status_print(struct rte_eth_dev *dev)
                PMD_INIT_LOG(INFO, " Port %d: Link Down",
                                (int)(dev->data->port_id));
        }
-       PMD_INIT_LOG(DEBUG, "PCI Address: %04d:%02d:%02d:%d",
-                               dev->pci_dev->addr.domain,
-                               dev->pci_dev->addr.bus,
-                               dev->pci_dev->addr.devid,
-                               dev->pci_dev->addr.function);
+       PMD_INIT_LOG(DEBUG, "PCI Address: " PCI_PRI_FMT,
+                               pci_dev->addr.domain,
+                               pci_dev->addr.bus,
+                               pci_dev->addr.devid,
+                               pci_dev->addr.function);
 }
 
 /*
@@ -3428,13 +3784,13 @@ ixgbe_dev_link_status_print(struct rte_eth_dev *dev)
  *  - On failure, a negative value.
  */
 static int
-ixgbe_dev_interrupt_action(struct rte_eth_dev *dev)
+ixgbe_dev_interrupt_action(struct rte_eth_dev *dev,
+                          struct rte_intr_handle *intr_handle)
 {
        struct ixgbe_interrupt *intr =
                IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
        int64_t timeout;
        struct rte_eth_link link;
-       int intr_enable_delay = false;
        struct ixgbe_hw *hw =
                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -3467,20 +3823,19 @@ ixgbe_dev_interrupt_action(struct rte_eth_dev *dev)
                        timeout = IXGBE_LINK_DOWN_CHECK_TIMEOUT;
 
                ixgbe_dev_link_status_print(dev);
-
-               intr_enable_delay = true;
-       }
-
-       if (intr_enable_delay) {
+               intr->mask_original = intr->mask;
+               /* only disable lsc interrupt */
+               intr->mask &= ~IXGBE_EIMS_LSC;
                if (rte_eal_alarm_set(timeout * 1000,
                                      ixgbe_dev_interrupt_delayed_handler, (void *)dev) < 0)
                        PMD_DRV_LOG(ERR, "Error setting alarm");
-       } else {
-               PMD_DRV_LOG(DEBUG, "enable intr immediately");
-               ixgbe_enable_intr(dev);
-               rte_intr_enable(&(dev->pci_dev->intr_handle));
+               else
+                       intr->mask = intr->mask_original;
        }
 
+       PMD_DRV_LOG(DEBUG, "enable intr immediately");
+       ixgbe_enable_intr(dev);
+       rte_intr_enable(intr_handle);
 
        return 0;
 }
@@ -3503,12 +3858,16 @@ static void
 ixgbe_dev_interrupt_delayed_handler(void *param)
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct ixgbe_interrupt *intr =
                IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
        struct ixgbe_hw *hw =
                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t eicr;
 
+       ixgbe_disable_intr(hw);
+
        eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
        if (eicr & IXGBE_EICR_MAILBOX)
                ixgbe_pf_mbx_process(dev);
@@ -3522,12 +3881,22 @@ ixgbe_dev_interrupt_delayed_handler(void *param)
                ixgbe_dev_link_update(dev, 0);
                intr->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
                ixgbe_dev_link_status_print(dev);
-               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
+       }
+
+       if (intr->flags & IXGBE_FLAG_MACSEC) {
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_MACSEC,
+                                             NULL);
+               intr->flags &= ~IXGBE_FLAG_MACSEC;
        }
 
+       /* restore original mask */
+       intr->mask = intr->mask_original;
+       intr->mask_original = 0;
+
        PMD_DRV_LOG(DEBUG, "enable intr in delayed handler S[%08x]", eicr);
        ixgbe_enable_intr(dev);
-       rte_intr_enable(&(dev->pci_dev->intr_handle));
+       rte_intr_enable(intr_handle);
 }
 
 /**
@@ -3543,13 +3912,13 @@ ixgbe_dev_interrupt_delayed_handler(void *param)
  *  void
  */
 static void
-ixgbe_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
+ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle,
                            void *param)
 {
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
 
        ixgbe_dev_interrupt_get_status(dev);
-       ixgbe_dev_interrupt_action(dev);
+       ixgbe_dev_interrupt_action(dev, handle);
 }
 
 static int
@@ -3913,7 +4282,7 @@ ixgbe_dev_rss_reta_update(struct rte_eth_dev *dev,
        if (reta_size != sp_reta_size) {
                PMD_DRV_LOG(ERR, "The size of hash lookup table configured "
                        "(%d) doesn't match the number hardware can supported "
-                       "(%d)\n", reta_size, sp_reta_size);
+                       "(%d)", reta_size, sp_reta_size);
                return -EINVAL;
        }
 
@@ -3960,7 +4329,7 @@ ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev,
        if (reta_size != sp_reta_size) {
                PMD_DRV_LOG(ERR, "The size of hash lookup table configured "
                        "(%d) doesn't match the number hardware can supported "
-                       "(%d)\n", reta_size, sp_reta_size);
+                       "(%d)", reta_size, sp_reta_size);
                return -EINVAL;
        }
 
@@ -4012,25 +4381,72 @@ ixgbe_set_default_mac_addr(struct rte_eth_dev *dev, struct ether_addr *addr)
 }
 
 static int
-ixgbe_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+is_ixgbe_pmd(const char *driver_name)
+{
+       if (!strstr(driver_name, "ixgbe"))
+               return -ENOTSUP;
+
+       if (strstr(driver_name, "ixgbe_vf"))
+               return -ENOTSUP;
+
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_set_vf_mac_addr(uint8_t port, uint16_t vf,
+               struct ether_addr *mac_addr)
 {
-       uint32_t hlreg0;
-       uint32_t maxfrs;
        struct ixgbe_hw *hw;
+       struct ixgbe_vf_info *vfinfo;
+       int rar_entry;
+       uint8_t *new_mac = (uint8_t *)(mac_addr);
+       struct rte_eth_dev *dev;
        struct rte_eth_dev_info dev_info;
-       uint32_t frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
 
-       ixgbe_dev_info_get(dev, &dev_info);
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
 
-       /* check that mtu is within the allowed range */
-       if ((mtu < ETHER_MIN_MTU) || (frame_size > dev_info.max_rx_pktlen))
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       if (vf >= dev_info.max_vfs)
                return -EINVAL;
 
-       /* refuse mtu that requires the support of scattered packets when this
-        * feature has not been enabled before.
-        */
-       if (!dev->data->scattered_rx &&
-           (frame_size + 2 * IXGBE_VLAN_TAG_SIZE >
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       vfinfo = *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
+       rar_entry = hw->mac.num_rar_entries - (vf + 1);
+
+       if (is_valid_assigned_ether_addr((struct ether_addr *)new_mac)) {
+               rte_memcpy(vfinfo[vf].vf_mac_addresses, new_mac,
+                               ETHER_ADDR_LEN);
+               return hw->mac.ops.set_rar(hw, rar_entry, new_mac, vf,
+                               IXGBE_RAH_AV);
+       }
+       return -EINVAL;
+}
+
+static int
+ixgbe_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+       uint32_t hlreg0;
+       uint32_t maxfrs;
+       struct ixgbe_hw *hw;
+       struct rte_eth_dev_info dev_info;
+       uint32_t frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
+
+       ixgbe_dev_info_get(dev, &dev_info);
+
+       /* check that mtu is within the allowed range */
+       if ((mtu < ETHER_MIN_MTU) || (frame_size > dev_info.max_rx_pktlen))
+               return -EINVAL;
+
+       /* refuse mtu that requires the support of scattered packets when this
+        * feature has not been enabled before.
+        */
+       if (!dev->data->scattered_rx &&
+           (frame_size + 2 * IXGBE_VLAN_TAG_SIZE >
             dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM))
                return -EINVAL;
 
@@ -4127,7 +4543,8 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
        struct ixgbe_hw *hw =
                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t intr_vector = 0;
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        int err, mask = 0;
 
@@ -4172,7 +4589,7 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
                                    dev->data->nb_rx_queues * sizeof(int), 0);
                if (intr_handle->intr_vec == NULL) {
                        PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
-                                    " intr_vec\n", dev->data->nb_rx_queues);
+                                    " intr_vec", dev->data->nb_rx_queues);
                        return -ENOMEM;
                }
        }
@@ -4190,7 +4607,8 @@ static void
 ixgbevf_dev_stop(struct rte_eth_dev *dev)
 {
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -4332,14 +4750,14 @@ ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 }
 
 static int
-ixgbe_vmdq_mode_check(struct ixgbe_hw *hw)
+ixgbe_vt_check(struct ixgbe_hw *hw)
 {
        uint32_t reg_val;
 
-       /* we only need to do this if VMDq is enabled */
+       /* if Virtualization Technology is enabled */
        reg_val = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
        if (!(reg_val & IXGBE_VT_CTL_VT_ENABLE)) {
-               PMD_INIT_LOG(ERR, "VMDq must be enabled for this setting");
+               PMD_INIT_LOG(ERR, "VT must be enabled for this setting");
                return -1;
        }
 
@@ -4477,22 +4895,274 @@ ixgbe_convert_vm_rx_mask_to_val(uint16_t rx_mask, uint32_t orig_val)
        return new_val;
 }
 
-static int
-ixgbe_set_pool_rx_mode(struct rte_eth_dev *dev, uint16_t pool,
-                              uint16_t rx_mask, uint8_t on)
+
+int
+rte_pmd_ixgbe_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on)
+{
+       struct ixgbe_hw *hw;
+       struct ixgbe_mac_info *mac;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       if (vf >= dev_info.max_vfs)
+               return -EINVAL;
+
+       if (on > 1)
+               return -EINVAL;
+
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       mac = &hw->mac;
+
+       mac->ops.set_vlan_anti_spoofing(hw, on, vf);
+
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on)
+{
+       struct ixgbe_hw *hw;
+       struct ixgbe_mac_info *mac;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       if (vf >= dev_info.max_vfs)
+               return -EINVAL;
+
+       if (on > 1)
+               return -EINVAL;
+
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       mac = &hw->mac;
+       mac->ops.set_mac_anti_spoofing(hw, on, vf);
+
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_set_vf_vlan_insert(uint8_t port, uint16_t vf, uint16_t vlan_id)
+{
+       struct ixgbe_hw *hw;
+       uint32_t ctrl;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       if (vf >= dev_info.max_vfs)
+               return -EINVAL;
+
+       if (vlan_id > ETHER_MAX_VLAN_ID)
+               return -EINVAL;
+
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       ctrl = IXGBE_READ_REG(hw, IXGBE_VMVIR(vf));
+       if (vlan_id) {
+               ctrl = vlan_id;
+               ctrl |= IXGBE_VMVIR_VLANA_DEFAULT;
+       } else {
+               ctrl = 0;
+       }
+
+       IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf), ctrl);
+
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_set_tx_loopback(uint8_t port, uint8_t on)
+{
+       struct ixgbe_hw *hw;
+       uint32_t ctrl;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       if (on > 1)
+               return -EINVAL;
+
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       ctrl = IXGBE_READ_REG(hw, IXGBE_PFDTXGSWC);
+       /* enable or disable VMDQ loopback */
+       if (on)
+               ctrl |= IXGBE_PFDTXGSWC_VT_LBEN;
+       else
+               ctrl &= ~IXGBE_PFDTXGSWC_VT_LBEN;
+
+       IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, ctrl);
+
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_set_all_queues_drop_en(uint8_t port, uint8_t on)
+{
+       struct ixgbe_hw *hw;
+       uint32_t reg_value;
+       int i;
+       int num_queues = (int)(IXGBE_QDE_IDX_MASK >> IXGBE_QDE_IDX_SHIFT);
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       if (on > 1)
+               return -EINVAL;
+
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       for (i = 0; i <= num_queues; i++) {
+               reg_value = IXGBE_QDE_WRITE |
+                               (i << IXGBE_QDE_IDX_SHIFT) |
+                               (on & IXGBE_QDE_ENABLE);
+               IXGBE_WRITE_REG(hw, IXGBE_QDE, reg_value);
+       }
+
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_set_vf_split_drop_en(uint8_t port, uint16_t vf, uint8_t on)
+{
+       struct ixgbe_hw *hw;
+       uint32_t reg_value;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       /* only support VF's 0 to 63 */
+       if ((vf >= dev_info.max_vfs) || (vf > 63))
+               return -EINVAL;
+
+       if (on > 1)
+               return -EINVAL;
+
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       reg_value = IXGBE_READ_REG(hw, IXGBE_SRRCTL(vf));
+       if (on)
+               reg_value |= IXGBE_SRRCTL_DROP_EN;
+       else
+               reg_value &= ~IXGBE_SRRCTL_DROP_EN;
+
+       IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(vf), reg_value);
+
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on)
+{
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+       uint16_t queues_per_pool;
+       uint32_t q;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       if (vf >= dev_info.max_vfs)
+               return -EINVAL;
+
+       if (on > 1)
+               return -EINVAL;
+
+       RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_strip_queue_set, -ENOTSUP);
+
+       /* The PF has 128 queue pairs and in SRIOV configuration
+        * those queues will be assigned to VF's, so RXDCTL
+        * registers will be dealing with queues which will be
+        * assigned to VF's.
+        * Let's say we have SRIOV configured with 31 VF's then the
+        * first 124 queues 0-123 will be allocated to VF's and only
+        * the last 4 queues 123-127 will be assigned to the PF.
+        */
+
+       queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
+
+       for (q = 0; q < queues_per_pool; q++)
+               (*dev->dev_ops->vlan_strip_queue_set)(dev,
+                               q + vf * queues_per_pool, on);
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mask, uint8_t on)
 {
        int val = 0;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+       struct ixgbe_hw *hw;
+       uint32_t vmolr;
 
-       struct ixgbe_hw *hw =
-               IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       uint32_t vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(pool));
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       if (vf >= dev_info.max_vfs)
+               return -EINVAL;
+
+       if (on > 1)
+               return -EINVAL;
+
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
 
        if (hw->mac.type == ixgbe_mac_82598EB) {
                PMD_INIT_LOG(ERR, "setting VF receive mode set should be done"
                             " on 82599 hardware and newer");
                return -ENOTSUP;
        }
-       if (ixgbe_vmdq_mode_check(hw) < 0)
+       if (ixgbe_vt_check(hw) < 0)
                return -ENOTSUP;
 
        val = ixgbe_convert_vm_rx_mask_to_val(rx_mask, val);
@@ -4502,34 +5172,47 @@ ixgbe_set_pool_rx_mode(struct rte_eth_dev *dev, uint16_t pool,
        else
                vmolr &= ~val;
 
-       IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr);
+       IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
 
        return 0;
 }
 
-static int
-ixgbe_set_pool_rx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on)
+int
+rte_pmd_ixgbe_set_vf_rx(uint8_t port, uint16_t vf, uint8_t on)
 {
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
        uint32_t reg, addr;
        uint32_t val;
        const uint8_t bit1 = 0x1;
+       struct ixgbe_hw *hw;
 
-       struct ixgbe_hw *hw =
-               IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
 
-       if (ixgbe_vmdq_mode_check(hw) < 0)
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
                return -ENOTSUP;
 
-       if (pool >= ETH_64_POOLS)
+       if (vf >= dev_info.max_vfs)
+               return -EINVAL;
+
+       if (on > 1)
                return -EINVAL;
 
-       /* for pool >= 32, set bit in PFVFRE[1], otherwise PFVFRE[0] */
-       if (pool >= 32) {
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (ixgbe_vt_check(hw) < 0)
+               return -ENOTSUP;
+
+       /* for vf >= 32, set bit in PFVFRE[1], otherwise PFVFRE[0] */
+       if (vf >= 32) {
                addr = IXGBE_VFRE(1);
-               val = bit1 << (pool - 32);
+               val = bit1 << (vf - 32);
        } else {
                addr = IXGBE_VFRE(0);
-               val = bit1 << pool;
+               val = bit1 << vf;
        }
 
        reg = IXGBE_READ_REG(hw, addr);
@@ -4544,29 +5227,42 @@ ixgbe_set_pool_rx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on)
        return 0;
 }
 
-static int
-ixgbe_set_pool_tx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on)
+int
+rte_pmd_ixgbe_set_vf_tx(uint8_t port, uint16_t vf, uint8_t on)
 {
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
        uint32_t reg, addr;
        uint32_t val;
        const uint8_t bit1 = 0x1;
 
-       struct ixgbe_hw *hw =
-               IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct ixgbe_hw *hw;
 
-       if (ixgbe_vmdq_mode_check(hw) < 0)
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
                return -ENOTSUP;
 
-       if (pool >= ETH_64_POOLS)
+       if (vf >= dev_info.max_vfs)
+               return -EINVAL;
+
+       if (on > 1)
                return -EINVAL;
 
-       /* for pool >= 32, set bit in PFVFTE[1], otherwise PFVFTE[0] */
-       if (pool >= 32) {
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       if (ixgbe_vt_check(hw) < 0)
+               return -ENOTSUP;
+
+       /* for vf >= 32, set bit in PFVFTE[1], otherwise PFVFTE[0] */
+       if (vf >= 32) {
                addr = IXGBE_VFTE(1);
-               val = bit1 << (pool - 32);
+               val = bit1 << (vf - 32);
        } else {
                addr = IXGBE_VFTE(0);
-               val = bit1 << pool;
+               val = bit1 << vf;
        }
 
        reg = IXGBE_READ_REG(hw, addr);
@@ -4581,20 +5277,34 @@ ixgbe_set_pool_tx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on)
        return 0;
 }
 
-static int
-ixgbe_set_pool_vlan_filter(struct rte_eth_dev *dev, uint16_t vlan,
-                       uint64_t pool_mask, uint8_t vlan_on)
+int
+rte_pmd_ixgbe_set_vf_vlan_filter(uint8_t port, uint16_t vlan,
+                       uint64_t vf_mask, uint8_t vlan_on)
 {
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
        int ret = 0;
-       uint16_t pool_idx;
-       struct ixgbe_hw *hw =
-               IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint16_t vf_idx;
+       struct ixgbe_hw *hw;
 
-       if (ixgbe_vmdq_mode_check(hw) < 0)
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       if ((vlan > ETHER_MAX_VLAN_ID) || (vf_mask == 0))
+               return -EINVAL;
+
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       if (ixgbe_vt_check(hw) < 0)
                return -ENOTSUP;
-       for (pool_idx = 0; pool_idx < ETH_64_POOLS; pool_idx++) {
-               if (pool_mask & ((uint64_t)(1ULL << pool_idx))) {
-                       ret = hw->mac.ops.set_vfta(hw, vlan, pool_idx,
+
+       for (vf_idx = 0; vf_idx < 64; vf_idx++) {
+               if (vf_mask & ((uint64_t)(1ULL << vf_idx))) {
+                       ret = hw->mac.ops.set_vfta(hw, vlan, vf_idx,
                                                   vlan_on, false);
                        if (ret < 0)
                                return ret;
@@ -4604,59 +5314,141 @@ ixgbe_set_pool_vlan_filter(struct rte_eth_dev *dev, uint16_t vlan,
        return ret;
 }
 
-#define IXGBE_MRCTL_VPME  0x01 /* Virtual Pool Mirroring. */
-#define IXGBE_MRCTL_UPME  0x02 /* Uplink Port Mirroring. */
-#define IXGBE_MRCTL_DPME  0x04 /* Downlink Port Mirroring. */
-#define IXGBE_MRCTL_VLME  0x08 /* VLAN Mirroring. */
-#define IXGBE_INVALID_MIRROR_TYPE(mirror_type) \
-       ((mirror_type) & ~(uint8_t)(ETH_MIRROR_VIRTUAL_POOL_UP | \
-       ETH_MIRROR_UPLINK_PORT | ETH_MIRROR_DOWNLINK_PORT | ETH_MIRROR_VLAN))
-
-static int
-ixgbe_mirror_rule_set(struct rte_eth_dev *dev,
-                       struct rte_eth_mirror_conf *mirror_conf,
-                       uint8_t rule_id, uint8_t on)
+int rte_pmd_ixgbe_set_vf_rate_limit(uint8_t port, uint16_t vf,
+       uint16_t tx_rate, uint64_t q_msk)
 {
-       uint32_t mr_ctl, vlvf;
-       uint32_t mp_lsb = 0;
-       uint32_t mv_msb = 0;
-       uint32_t mv_lsb = 0;
-       uint32_t mp_msb = 0;
-       uint8_t i = 0;
-       int reg_index = 0;
-       uint64_t vlan_mask = 0;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+       struct ixgbe_hw *hw;
+       struct ixgbe_vf_info *vfinfo;
+       struct rte_eth_link link;
+       uint8_t  nb_q_per_pool;
+       uint32_t queue_stride;
+       uint32_t queue_idx, idx = 0, vf_idx;
+       uint32_t queue_end;
+       uint16_t total_rate = 0;
+       struct rte_pci_device *pci_dev;
 
-       const uint8_t pool_mask_offset = 32;
-       const uint8_t vlan_mask_offset = 32;
-       const uint8_t dst_pool_offset = 8;
-       const uint8_t rule_mr_offset  = 4;
-       const uint8_t mirror_rule_mask = 0x0F;
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
 
-       struct ixgbe_mirror_info *mr_info =
-                       (IXGBE_DEV_PRIVATE_TO_PFDATA(dev->data->dev_private));
-       struct ixgbe_hw *hw =
-               IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       uint8_t mirror_type = 0;
+       dev = &rte_eth_devices[port];
+       rte_eth_dev_info_get(port, &dev_info);
+       rte_eth_link_get_nowait(port, &link);
 
-       if (ixgbe_vmdq_mode_check(hw) < 0)
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
                return -ENOTSUP;
 
-       if (rule_id >= IXGBE_MAX_MIRROR_RULES)
+       if (vf >= dev_info.max_vfs)
                return -EINVAL;
 
-       if (IXGBE_INVALID_MIRROR_TYPE(mirror_conf->rule_type)) {
-               PMD_DRV_LOG(ERR, "unsupported mirror type 0x%x.",
-                       mirror_conf->rule_type);
+       if (tx_rate > link.link_speed)
                return -EINVAL;
-       }
 
-       if (mirror_conf->rule_type & ETH_MIRROR_VLAN) {
-               mirror_type |= IXGBE_MRCTL_VLME;
-               /* Check if vlan id is valid and find conresponding VLAN ID index in VLVF */
-               for (i = 0; i < IXGBE_VLVF_ENTRIES; i++) {
-                       if (mirror_conf->vlan.vlan_mask & (1ULL << i)) {
-                               /* search vlan id related pool vlan filter index */
-                               reg_index = ixgbe_find_vlvf_slot(hw,
+       if (q_msk == 0)
+               return 0;
+
+       pci_dev = IXGBE_DEV_TO_PCI(dev);
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       vfinfo = *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
+       nb_q_per_pool = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+       queue_stride = IXGBE_MAX_RX_QUEUE_NUM / RTE_ETH_DEV_SRIOV(dev).active;
+       queue_idx = vf * queue_stride;
+       queue_end = queue_idx + nb_q_per_pool - 1;
+       if (queue_end >= hw->mac.max_tx_queues)
+               return -EINVAL;
+
+       if (vfinfo) {
+               for (vf_idx = 0; vf_idx < pci_dev->max_vfs; vf_idx++) {
+                       if (vf_idx == vf)
+                               continue;
+                       for (idx = 0; idx < RTE_DIM(vfinfo[vf_idx].tx_rate);
+                               idx++)
+                               total_rate += vfinfo[vf_idx].tx_rate[idx];
+               }
+       } else {
+               return -EINVAL;
+       }
+
+       /* Store tx_rate for this vf. */
+       for (idx = 0; idx < nb_q_per_pool; idx++) {
+               if (((uint64_t)0x1 << idx) & q_msk) {
+                       if (vfinfo[vf].tx_rate[idx] != tx_rate)
+                               vfinfo[vf].tx_rate[idx] = tx_rate;
+                       total_rate += tx_rate;
+               }
+       }
+
+       if (total_rate > dev->data->dev_link.link_speed) {
+               /* Reset stored TX rate of the VF if it causes exceed
+                * link speed.
+                */
+               memset(vfinfo[vf].tx_rate, 0, sizeof(vfinfo[vf].tx_rate));
+               return -EINVAL;
+       }
+
+       /* Set RTTBCNRC of each queue/pool for vf X  */
+       for (; queue_idx <= queue_end; queue_idx++) {
+               if (0x1 & q_msk)
+                       ixgbe_set_queue_rate_limit(dev, queue_idx, tx_rate);
+               q_msk = q_msk >> 1;
+       }
+
+       return 0;
+}
+
+#define IXGBE_MRCTL_VPME  0x01 /* Virtual Pool Mirroring. */
+#define IXGBE_MRCTL_UPME  0x02 /* Uplink Port Mirroring. */
+#define IXGBE_MRCTL_DPME  0x04 /* Downlink Port Mirroring. */
+#define IXGBE_MRCTL_VLME  0x08 /* VLAN Mirroring. */
+#define IXGBE_INVALID_MIRROR_TYPE(mirror_type) \
+       ((mirror_type) & ~(uint8_t)(ETH_MIRROR_VIRTUAL_POOL_UP | \
+       ETH_MIRROR_UPLINK_PORT | ETH_MIRROR_DOWNLINK_PORT | ETH_MIRROR_VLAN))
+
+static int
+ixgbe_mirror_rule_set(struct rte_eth_dev *dev,
+                       struct rte_eth_mirror_conf *mirror_conf,
+                       uint8_t rule_id, uint8_t on)
+{
+       uint32_t mr_ctl, vlvf;
+       uint32_t mp_lsb = 0;
+       uint32_t mv_msb = 0;
+       uint32_t mv_lsb = 0;
+       uint32_t mp_msb = 0;
+       uint8_t i = 0;
+       int reg_index = 0;
+       uint64_t vlan_mask = 0;
+
+       const uint8_t pool_mask_offset = 32;
+       const uint8_t vlan_mask_offset = 32;
+       const uint8_t dst_pool_offset = 8;
+       const uint8_t rule_mr_offset  = 4;
+       const uint8_t mirror_rule_mask = 0x0F;
+
+       struct ixgbe_mirror_info *mr_info =
+                       (IXGBE_DEV_PRIVATE_TO_PFDATA(dev->data->dev_private));
+       struct ixgbe_hw *hw =
+               IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint8_t mirror_type = 0;
+
+       if (ixgbe_vt_check(hw) < 0)
+               return -ENOTSUP;
+
+       if (rule_id >= IXGBE_MAX_MIRROR_RULES)
+               return -EINVAL;
+
+       if (IXGBE_INVALID_MIRROR_TYPE(mirror_conf->rule_type)) {
+               PMD_DRV_LOG(ERR, "unsupported mirror type 0x%x.",
+                       mirror_conf->rule_type);
+               return -EINVAL;
+       }
+
+       if (mirror_conf->rule_type & ETH_MIRROR_VLAN) {
+               mirror_type |= IXGBE_MRCTL_VLME;
+               /* Check if vlan id is valid and find conresponding VLAN ID index in VLVF */
+               for (i = 0; i < IXGBE_VLVF_ENTRIES; i++) {
+                       if (mirror_conf->vlan.vlan_mask & (1ULL << i)) {
+                               /* search vlan id related pool vlan filter index */
+                               reg_index = ixgbe_find_vlvf_slot(hw,
                                                 mirror_conf->vlan.vlan_id[i],
                                                 false);
                                if (reg_index < 0)
@@ -4759,7 +5551,7 @@ ixgbe_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t rule_id)
        struct ixgbe_mirror_info *mr_info =
                (IXGBE_DEV_PRIVATE_TO_PFDATA(dev->data->dev_private));
 
-       if (ixgbe_vmdq_mode_check(hw) < 0)
+       if (ixgbe_vt_check(hw) < 0)
                return -ENOTSUP;
 
        memset(&mr_info->mr_conf[rule_id], 0,
@@ -4782,6 +5574,8 @@ ixgbe_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t rule_id)
 static int
 ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        uint32_t mask;
        struct ixgbe_hw *hw =
                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -4791,7 +5585,7 @@ ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
        RTE_SET_USED(queue_id);
        IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask);
 
-       rte_intr_enable(&dev->pci_dev->intr_handle);
+       rte_intr_enable(intr_handle);
 
        return 0;
 }
@@ -4814,6 +5608,8 @@ ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
 static int
 ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        uint32_t mask;
        struct ixgbe_hw *hw =
                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -4833,7 +5629,7 @@ ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
                mask &= (1 << (queue_id - 32));
                IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
        }
-       rte_intr_enable(&dev->pci_dev->intr_handle);
+       rte_intr_enable(intr_handle);
 
        return 0;
 }
@@ -4937,7 +5733,8 @@ ixgbe_set_ivar_map(struct ixgbe_hw *hw, int8_t direction,
 static void
 ixgbevf_configure_msix(struct rte_eth_dev *dev)
 {
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct ixgbe_hw *hw =
                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t q_idx;
@@ -4970,7 +5767,8 @@ ixgbevf_configure_msix(struct rte_eth_dev *dev)
 static void
 ixgbe_configure_msix(struct rte_eth_dev *dev)
 {
-       struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct ixgbe_hw *hw =
                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t queue_id, base = IXGBE_MISC_VEC_ID;
@@ -5085,61 +5883,6 @@ static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
        return 0;
 }
 
-static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf,
-       uint16_t tx_rate, uint64_t q_msk)
-{
-       struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct ixgbe_vf_info *vfinfo =
-               *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
-       uint8_t  nb_q_per_pool = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
-       uint32_t queue_stride =
-               IXGBE_MAX_RX_QUEUE_NUM / RTE_ETH_DEV_SRIOV(dev).active;
-       uint32_t queue_idx = vf * queue_stride, idx = 0, vf_idx;
-       uint32_t queue_end = queue_idx + nb_q_per_pool - 1;
-       uint16_t total_rate = 0;
-
-       if (queue_end >= hw->mac.max_tx_queues)
-               return -EINVAL;
-
-       if (vfinfo != NULL) {
-               for (vf_idx = 0; vf_idx < dev->pci_dev->max_vfs; vf_idx++) {
-                       if (vf_idx == vf)
-                               continue;
-                       for (idx = 0; idx < RTE_DIM(vfinfo[vf_idx].tx_rate);
-                               idx++)
-                               total_rate += vfinfo[vf_idx].tx_rate[idx];
-               }
-       } else
-               return -EINVAL;
-
-       /* Store tx_rate for this vf. */
-       for (idx = 0; idx < nb_q_per_pool; idx++) {
-               if (((uint64_t)0x1 << idx) & q_msk) {
-                       if (vfinfo[vf].tx_rate[idx] != tx_rate)
-                               vfinfo[vf].tx_rate[idx] = tx_rate;
-                       total_rate += tx_rate;
-               }
-       }
-
-       if (total_rate > dev->data->dev_link.link_speed) {
-               /*
-                * Reset stored TX rate of the VF if it causes exceed
-                * link speed.
-                */
-               memset(vfinfo[vf].tx_rate, 0, sizeof(vfinfo[vf].tx_rate));
-               return -EINVAL;
-       }
-
-       /* Set RTTBCNRC of each queue/pool for vf X  */
-       for (; queue_idx <= queue_end; queue_idx++) {
-               if (0x1 & q_msk)
-                       ixgbe_set_queue_rate_limit(dev, queue_idx, tx_rate);
-               q_msk = q_msk >> 1;
-       }
-
-       return 0;
-}
-
 static void
 ixgbevf_add_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
                     __attribute__((unused)) uint32_t index,
@@ -5224,21 +5967,24 @@ ixgbevf_set_default_mac_addr(struct rte_eth_dev *dev, struct ether_addr *addr)
                return -ENOTSUP;\
 } while (0)
 
-static int
+int
 ixgbe_syn_filter_set(struct rte_eth_dev *dev,
                        struct rte_eth_syn_filter *filter,
                        bool add)
 {
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct ixgbe_filter_info *filter_info =
+               IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
+       uint32_t syn_info;
        uint32_t synqf;
 
        if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM)
                return -EINVAL;
 
-       synqf = IXGBE_READ_REG(hw, IXGBE_SYNQF);
+       syn_info = filter_info->syn_info;
 
        if (add) {
-               if (synqf & IXGBE_SYN_FILTER_ENABLE)
+               if (syn_info & IXGBE_SYN_FILTER_ENABLE)
                        return -EINVAL;
                synqf = (uint32_t)(((filter->queue << IXGBE_SYN_FILTER_QUEUE_SHIFT) &
                        IXGBE_SYN_FILTER_QUEUE) | IXGBE_SYN_FILTER_ENABLE);
@@ -5248,10 +5994,13 @@ ixgbe_syn_filter_set(struct rte_eth_dev *dev,
                else
                        synqf &= ~IXGBE_SYN_FILTER_SYNQFP;
        } else {
-               if (!(synqf & IXGBE_SYN_FILTER_ENABLE))
+               synqf = IXGBE_READ_REG(hw, IXGBE_SYNQF);
+               if (!(syn_info & IXGBE_SYN_FILTER_ENABLE))
                        return -ENOENT;
                synqf &= ~(IXGBE_SYN_FILTER_QUEUE | IXGBE_SYN_FILTER_ENABLE);
        }
+
+       filter_info->syn_info = synqf;
        IXGBE_WRITE_REG(hw, IXGBE_SYNQF, synqf);
        IXGBE_WRITE_FLUSH(hw);
        return 0;
@@ -5307,7 +6056,7 @@ ixgbe_syn_filter_handle(struct rte_eth_dev *dev,
                                (struct rte_eth_syn_filter *)arg);
                break;
        default:
-               PMD_DRV_LOG(ERR, "unsupported operation %u\n", filter_op);
+               PMD_DRV_LOG(ERR, "unsupported operation %u", filter_op);
                ret = -EINVAL;
                break;
        }
@@ -5329,6 +6078,52 @@ convert_protocol_type(uint8_t protocol_value)
                return IXGBE_FILTER_PROTOCOL_NONE;
 }
 
+/* inject a 5-tuple filter to HW */
+static inline void
+ixgbe_inject_5tuple_filter(struct rte_eth_dev *dev,
+                          struct ixgbe_5tuple_filter *filter)
+{
+       struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       int i;
+       uint32_t ftqf, sdpqf;
+       uint32_t l34timir = 0;
+       uint8_t mask = 0xff;
+
+       i = filter->index;
+
+       sdpqf = (uint32_t)(filter->filter_info.dst_port <<
+                               IXGBE_SDPQF_DSTPORT_SHIFT);
+       sdpqf = sdpqf | (filter->filter_info.src_port & IXGBE_SDPQF_SRCPORT);
+
+       ftqf = (uint32_t)(filter->filter_info.proto &
+               IXGBE_FTQF_PROTOCOL_MASK);
+       ftqf |= (uint32_t)((filter->filter_info.priority &
+               IXGBE_FTQF_PRIORITY_MASK) << IXGBE_FTQF_PRIORITY_SHIFT);
+       if (filter->filter_info.src_ip_mask == 0) /* 0 means compare. */
+               mask &= IXGBE_FTQF_SOURCE_ADDR_MASK;
+       if (filter->filter_info.dst_ip_mask == 0)
+               mask &= IXGBE_FTQF_DEST_ADDR_MASK;
+       if (filter->filter_info.src_port_mask == 0)
+               mask &= IXGBE_FTQF_SOURCE_PORT_MASK;
+       if (filter->filter_info.dst_port_mask == 0)
+               mask &= IXGBE_FTQF_DEST_PORT_MASK;
+       if (filter->filter_info.proto_mask == 0)
+               mask &= IXGBE_FTQF_PROTOCOL_COMP_MASK;
+       ftqf |= mask << IXGBE_FTQF_5TUPLE_MASK_SHIFT;
+       ftqf |= IXGBE_FTQF_POOL_MASK_EN;
+       ftqf |= IXGBE_FTQF_QUEUE_ENABLE;
+
+       IXGBE_WRITE_REG(hw, IXGBE_DAQF(i), filter->filter_info.dst_ip);
+       IXGBE_WRITE_REG(hw, IXGBE_SAQF(i), filter->filter_info.src_ip);
+       IXGBE_WRITE_REG(hw, IXGBE_SDPQF(i), sdpqf);
+       IXGBE_WRITE_REG(hw, IXGBE_FTQF(i), ftqf);
+
+       l34timir |= IXGBE_L34T_IMIR_RESERVE;
+       l34timir |= (uint32_t)(filter->queue <<
+                               IXGBE_L34T_IMIR_QUEUE_SHIFT);
+       IXGBE_WRITE_REG(hw, IXGBE_L34T_IMIR(i), l34timir);
+}
+
 /*
  * add a 5tuple filter
  *
@@ -5346,13 +6141,9 @@ static int
 ixgbe_add_5tuple_filter(struct rte_eth_dev *dev,
                        struct ixgbe_5tuple_filter *filter)
 {
-       struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct ixgbe_filter_info *filter_info =
                IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
        int i, idx, shift;
-       uint32_t ftqf, sdpqf;
-       uint32_t l34timir = 0;
-       uint8_t mask = 0xff;
 
        /*
         * look for an unused 5tuple filter index,
@@ -5375,37 +6166,8 @@ ixgbe_add_5tuple_filter(struct rte_eth_dev *dev,
                return -ENOSYS;
        }
 
-       sdpqf = (uint32_t)(filter->filter_info.dst_port <<
-                               IXGBE_SDPQF_DSTPORT_SHIFT);
-       sdpqf = sdpqf | (filter->filter_info.src_port & IXGBE_SDPQF_SRCPORT);
-
-       ftqf = (uint32_t)(filter->filter_info.proto &
-               IXGBE_FTQF_PROTOCOL_MASK);
-       ftqf |= (uint32_t)((filter->filter_info.priority &
-               IXGBE_FTQF_PRIORITY_MASK) << IXGBE_FTQF_PRIORITY_SHIFT);
-       if (filter->filter_info.src_ip_mask == 0) /* 0 means compare. */
-               mask &= IXGBE_FTQF_SOURCE_ADDR_MASK;
-       if (filter->filter_info.dst_ip_mask == 0)
-               mask &= IXGBE_FTQF_DEST_ADDR_MASK;
-       if (filter->filter_info.src_port_mask == 0)
-               mask &= IXGBE_FTQF_SOURCE_PORT_MASK;
-       if (filter->filter_info.dst_port_mask == 0)
-               mask &= IXGBE_FTQF_DEST_PORT_MASK;
-       if (filter->filter_info.proto_mask == 0)
-               mask &= IXGBE_FTQF_PROTOCOL_COMP_MASK;
-       ftqf |= mask << IXGBE_FTQF_5TUPLE_MASK_SHIFT;
-       ftqf |= IXGBE_FTQF_POOL_MASK_EN;
-       ftqf |= IXGBE_FTQF_QUEUE_ENABLE;
-
-       IXGBE_WRITE_REG(hw, IXGBE_DAQF(i), filter->filter_info.dst_ip);
-       IXGBE_WRITE_REG(hw, IXGBE_SAQF(i), filter->filter_info.src_ip);
-       IXGBE_WRITE_REG(hw, IXGBE_SDPQF(i), sdpqf);
-       IXGBE_WRITE_REG(hw, IXGBE_FTQF(i), ftqf);
+       ixgbe_inject_5tuple_filter(dev, filter);
 
-       l34timir |= IXGBE_L34T_IMIR_RESERVE;
-       l34timir |= (uint32_t)(filter->queue <<
-                               IXGBE_L34T_IMIR_QUEUE_SHIFT);
-       IXGBE_WRITE_REG(hw, IXGBE_L34T_IMIR(i), l34timir);
        return 0;
 }
 
@@ -5584,7 +6346,7 @@ ntuple_filter_to_5tuple(struct rte_eth_ntuple_filter *filter,
  *    - On success, zero.
  *    - On failure, a negative value.
  */
-static int
+int
 ixgbe_add_del_ntuple_filter(struct rte_eth_dev *dev,
                        struct rte_eth_ntuple_filter *ntuple_filter,
                        bool add)
@@ -5729,48 +6491,7 @@ ixgbe_ntuple_filter_handle(struct rte_eth_dev *dev,
        return ret;
 }
 
-static inline int
-ixgbe_ethertype_filter_lookup(struct ixgbe_filter_info *filter_info,
-                       uint16_t ethertype)
-{
-       int i;
-
-       for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) {
-               if (filter_info->ethertype_filters[i] == ethertype &&
-                   (filter_info->ethertype_mask & (1 << i)))
-                       return i;
-       }
-       return -1;
-}
-
-static inline int
-ixgbe_ethertype_filter_insert(struct ixgbe_filter_info *filter_info,
-                       uint16_t ethertype)
-{
-       int i;
-
-       for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) {
-               if (!(filter_info->ethertype_mask & (1 << i))) {
-                       filter_info->ethertype_mask |= 1 << i;
-                       filter_info->ethertype_filters[i] = ethertype;
-                       return i;
-               }
-       }
-       return -1;
-}
-
-static inline int
-ixgbe_ethertype_filter_remove(struct ixgbe_filter_info *filter_info,
-                       uint8_t idx)
-{
-       if (idx >= IXGBE_MAX_ETQF_FILTERS)
-               return -1;
-       filter_info->ethertype_mask &= ~(1 << idx);
-       filter_info->ethertype_filters[idx] = 0;
-       return idx;
-}
-
-static int
+int
 ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev,
                        struct rte_eth_ethertype_filter *filter,
                        bool add)
@@ -5781,20 +6502,17 @@ ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev,
        uint32_t etqf = 0;
        uint32_t etqs = 0;
        int ret;
+       struct ixgbe_ethertype_filter ethertype_filter;
 
        if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM)
                return -EINVAL;
-#define TREX_PATCH
-#ifndef TREX_PATCH
-    // no real reason to block this.
-    // We configure rules using FDIR and ethertype that point to same queue, so there are no race condition issues.
+
        if (filter->ether_type == ETHER_TYPE_IPv4 ||
                filter->ether_type == ETHER_TYPE_IPv6) {
                PMD_DRV_LOG(ERR, "unsupported ether_type(0x%04x) in"
                        " ethertype filter.", filter->ether_type);
                return -EINVAL;
        }
-#endif
 
        if (filter->flags & RTE_ETHTYPE_FLAGS_MAC) {
                PMD_DRV_LOG(ERR, "mac compare is unsupported.");
@@ -5818,21 +6536,26 @@ ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev,
        }
 
        if (add) {
-               ret = ixgbe_ethertype_filter_insert(filter_info,
-                       filter->ether_type);
-               if (ret < 0) {
-                       PMD_DRV_LOG(ERR, "ethertype filters are full.");
-                       return -ENOSYS;
-               }
                etqf = IXGBE_ETQF_FILTER_EN;
                etqf |= (uint32_t)filter->ether_type;
                etqs |= (uint32_t)((filter->queue <<
                                    IXGBE_ETQS_RX_QUEUE_SHIFT) &
                                    IXGBE_ETQS_RX_QUEUE);
                etqs |= IXGBE_ETQS_QUEUE_EN;
-       } else {
-               ret = ixgbe_ethertype_filter_remove(filter_info, (uint8_t)ret);
-               if (ret < 0)
+
+               ethertype_filter.ethertype = filter->ether_type;
+               ethertype_filter.etqf = etqf;
+               ethertype_filter.etqs = etqs;
+               ethertype_filter.conf = FALSE;
+               ret = ixgbe_ethertype_filter_insert(filter_info,
+                                                   &ethertype_filter);
+               if (ret < 0) {
+                       PMD_DRV_LOG(ERR, "ethertype filters are full.");
+                       return -ENOSPC;
+               }
+       } else {
+               ret = ixgbe_ethertype_filter_remove(filter_info, (uint8_t)ret);
+               if (ret < 0)
                        return -ENOSYS;
        }
        IXGBE_WRITE_REG(hw, IXGBE_ETQF(ret), etqf);
@@ -5925,7 +6648,7 @@ ixgbe_dev_filter_ctrl(struct rte_eth_dev *dev,
                     enum rte_filter_op filter_op,
                     void *arg)
 {
-       int ret = -EINVAL;
+       int ret = 0;
 
        switch (filter_type) {
        case RTE_ETH_FILTER_NTUPLE:
@@ -5943,9 +6666,15 @@ ixgbe_dev_filter_ctrl(struct rte_eth_dev *dev,
        case RTE_ETH_FILTER_L2_TUNNEL:
                ret = ixgbe_dev_l2_tunnel_filter_handle(dev, filter_op, arg);
                break;
+       case RTE_ETH_FILTER_GENERIC:
+               if (filter_op != RTE_ETH_FILTER_GET)
+                       return -EINVAL;
+               *(const void **)arg = &ixgbe_flow_ops;
+               break;
        default:
                PMD_DRV_LOG(WARNING, "Filter type (%d) not supported",
                                                        filter_type);
+               ret = -EINVAL;
                break;
        }
 
@@ -6593,12 +7322,15 @@ ixgbe_dev_l2_tunnel_eth_type_conf(struct rte_eth_dev *dev,
 {
        int ret = 0;
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct ixgbe_l2_tn_info *l2_tn_info =
+               IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private);
 
        if (l2_tunnel == NULL)
                return -EINVAL;
 
        switch (l2_tunnel->l2_tunnel_type) {
        case RTE_L2_TUNNEL_TYPE_E_TAG:
+               l2_tn_info->e_tag_ether_type = l2_tunnel->ether_type;
                ret = ixgbe_update_e_tag_eth_type(hw, l2_tunnel->ether_type);
                break;
        default:
@@ -6637,9 +7369,12 @@ ixgbe_dev_l2_tunnel_enable(struct rte_eth_dev *dev,
 {
        int ret = 0;
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct ixgbe_l2_tn_info *l2_tn_info =
+               IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private);
 
        switch (l2_tunnel_type) {
        case RTE_L2_TUNNEL_TYPE_E_TAG:
+               l2_tn_info->e_tag_en = TRUE;
                ret = ixgbe_e_tag_enable(hw);
                break;
        default:
@@ -6678,9 +7413,12 @@ ixgbe_dev_l2_tunnel_disable(struct rte_eth_dev *dev,
 {
        int ret = 0;
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct ixgbe_l2_tn_info *l2_tn_info =
+               IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private);
 
        switch (l2_tunnel_type) {
        case RTE_L2_TUNNEL_TYPE_E_TAG:
+               l2_tn_info->e_tag_en = FALSE;
                ret = ixgbe_e_tag_disable(hw);
                break;
        default:
@@ -6769,12 +7507,108 @@ ixgbe_e_tag_filter_add(struct rte_eth_dev *dev,
        return -EINVAL;
 }
 
+static inline struct ixgbe_l2_tn_filter *
+ixgbe_l2_tn_filter_lookup(struct ixgbe_l2_tn_info *l2_tn_info,
+                         struct ixgbe_l2_tn_key *key)
+{
+       int ret;
+
+       ret = rte_hash_lookup(l2_tn_info->hash_handle, (const void *)key);
+       if (ret < 0)
+               return NULL;
+
+       return l2_tn_info->hash_map[ret];
+}
+
+static inline int
+ixgbe_insert_l2_tn_filter(struct ixgbe_l2_tn_info *l2_tn_info,
+                         struct ixgbe_l2_tn_filter *l2_tn_filter)
+{
+       int ret;
+
+       ret = rte_hash_add_key(l2_tn_info->hash_handle,
+                              &l2_tn_filter->key);
+
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR,
+                           "Failed to insert L2 tunnel filter"
+                           " to hash table %d!",
+                           ret);
+               return ret;
+       }
+
+       l2_tn_info->hash_map[ret] = l2_tn_filter;
+
+       TAILQ_INSERT_TAIL(&l2_tn_info->l2_tn_list, l2_tn_filter, entries);
+
+       return 0;
+}
+
+static inline int
+ixgbe_remove_l2_tn_filter(struct ixgbe_l2_tn_info *l2_tn_info,
+                         struct ixgbe_l2_tn_key *key)
+{
+       int ret;
+       struct ixgbe_l2_tn_filter *l2_tn_filter;
+
+       ret = rte_hash_del_key(l2_tn_info->hash_handle, key);
+
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR,
+                           "No such L2 tunnel filter to delete %d!",
+                           ret);
+               return ret;
+       }
+
+       l2_tn_filter = l2_tn_info->hash_map[ret];
+       l2_tn_info->hash_map[ret] = NULL;
+
+       TAILQ_REMOVE(&l2_tn_info->l2_tn_list, l2_tn_filter, entries);
+       rte_free(l2_tn_filter);
+
+       return 0;
+}
+
 /* Add l2 tunnel filter */
-static int
+int
 ixgbe_dev_l2_tunnel_filter_add(struct rte_eth_dev *dev,
-                              struct rte_eth_l2_tunnel_conf *l2_tunnel)
+                              struct rte_eth_l2_tunnel_conf *l2_tunnel,
+                              bool restore)
 {
-       int ret = 0;
+       int ret;
+       struct ixgbe_l2_tn_info *l2_tn_info =
+               IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private);
+       struct ixgbe_l2_tn_key key;
+       struct ixgbe_l2_tn_filter *node;
+
+       if (!restore) {
+               key.l2_tn_type = l2_tunnel->l2_tunnel_type;
+               key.tn_id = l2_tunnel->tunnel_id;
+
+               node = ixgbe_l2_tn_filter_lookup(l2_tn_info, &key);
+
+               if (node) {
+                       PMD_DRV_LOG(ERR,
+                                   "The L2 tunnel filter already exists!");
+                       return -EINVAL;
+               }
+
+               node = rte_zmalloc("ixgbe_l2_tn",
+                                  sizeof(struct ixgbe_l2_tn_filter),
+                                  0);
+               if (!node)
+                       return -ENOMEM;
+
+               (void)rte_memcpy(&node->key,
+                                &key,
+                                sizeof(struct ixgbe_l2_tn_key));
+               node->pool = l2_tunnel->pool;
+               ret = ixgbe_insert_l2_tn_filter(l2_tn_info, node);
+               if (ret < 0) {
+                       rte_free(node);
+                       return ret;
+               }
+       }
 
        switch (l2_tunnel->l2_tunnel_type) {
        case RTE_L2_TUNNEL_TYPE_E_TAG:
@@ -6786,15 +7620,27 @@ ixgbe_dev_l2_tunnel_filter_add(struct rte_eth_dev *dev,
                break;
        }
 
+       if ((!restore) && (ret < 0))
+               (void)ixgbe_remove_l2_tn_filter(l2_tn_info, &key);
+
        return ret;
 }
 
 /* Delete l2 tunnel filter */
-static int
+int
 ixgbe_dev_l2_tunnel_filter_del(struct rte_eth_dev *dev,
                               struct rte_eth_l2_tunnel_conf *l2_tunnel)
 {
-       int ret = 0;
+       int ret;
+       struct ixgbe_l2_tn_info *l2_tn_info =
+               IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private);
+       struct ixgbe_l2_tn_key key;
+
+       key.l2_tn_type = l2_tunnel->l2_tunnel_type;
+       key.tn_id = l2_tunnel->tunnel_id;
+       ret = ixgbe_remove_l2_tn_filter(l2_tn_info, &key);
+       if (ret < 0)
+               return ret;
 
        switch (l2_tunnel->l2_tunnel_type) {
        case RTE_L2_TUNNEL_TYPE_E_TAG:
@@ -6820,7 +7666,7 @@ ixgbe_dev_l2_tunnel_filter_handle(struct rte_eth_dev *dev,
                                  enum rte_filter_op filter_op,
                                  void *arg)
 {
-       int ret = 0;
+       int ret;
 
        if (filter_op == RTE_ETH_FILTER_NOP)
                return 0;
@@ -6835,7 +7681,8 @@ ixgbe_dev_l2_tunnel_filter_handle(struct rte_eth_dev *dev,
        case RTE_ETH_FILTER_ADD:
                ret = ixgbe_dev_l2_tunnel_filter_add
                        (dev,
-                        (struct rte_eth_l2_tunnel_conf *)arg);
+                        (struct rte_eth_l2_tunnel_conf *)arg,
+                        FALSE);
                break;
        case RTE_ETH_FILTER_DELETE:
                ret = ixgbe_dev_l2_tunnel_filter_del
@@ -6878,10 +7725,13 @@ ixgbe_dev_l2_tunnel_forwarding_enable
        (struct rte_eth_dev *dev,
         enum rte_eth_tunnel_type l2_tunnel_type)
 {
+       struct ixgbe_l2_tn_info *l2_tn_info =
+               IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private);
        int ret = 0;
 
        switch (l2_tunnel_type) {
        case RTE_L2_TUNNEL_TYPE_E_TAG:
+               l2_tn_info->e_tag_fwd_en = TRUE;
                ret = ixgbe_e_tag_forwarding_en_dis(dev, 1);
                break;
        default:
@@ -6899,10 +7749,13 @@ ixgbe_dev_l2_tunnel_forwarding_disable
        (struct rte_eth_dev *dev,
         enum rte_eth_tunnel_type l2_tunnel_type)
 {
+       struct ixgbe_l2_tn_info *l2_tn_info =
+               IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private);
        int ret = 0;
 
        switch (l2_tunnel_type) {
        case RTE_L2_TUNNEL_TYPE_E_TAG:
+               l2_tn_info->e_tag_fwd_en = FALSE;
                ret = ixgbe_e_tag_forwarding_en_dis(dev, 0);
                break;
        default:
@@ -6919,15 +7772,16 @@ ixgbe_e_tag_insertion_en_dis(struct rte_eth_dev *dev,
                             struct rte_eth_l2_tunnel_conf *l2_tunnel,
                             bool en)
 {
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev);
        int ret = 0;
        uint32_t vmtir, vmvir;
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
-       if (l2_tunnel->vf_id >= dev->pci_dev->max_vfs) {
+       if (l2_tunnel->vf_id >= pci_dev->max_vfs) {
                PMD_DRV_LOG(ERR,
                            "VF id %u should be less than %u",
                            l2_tunnel->vf_id,
-                           dev->pci_dev->max_vfs);
+                           pci_dev->max_vfs);
                return -EINVAL;
        }
 
@@ -7240,51 +8094,12 @@ ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
        return ret;
 }
 
-/* ixgbevf_update_xcast_mode - Update Multicast mode
- * @hw: pointer to the HW structure
- * @netdev: pointer to net device structure
- * @xcast_mode: new multicast mode
- *
- * Updates the Multicast Mode of VF.
- */
-static int ixgbevf_update_xcast_mode(struct ixgbe_hw *hw,
-                                    int xcast_mode)
-{
-       struct ixgbe_mbx_info *mbx = &hw->mbx;
-       u32 msgbuf[2];
-       s32 err;
-
-       switch (hw->api_version) {
-       case ixgbe_mbox_api_12:
-               break;
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       msgbuf[0] = IXGBE_VF_UPDATE_XCAST_MODE;
-       msgbuf[1] = xcast_mode;
-
-       err = mbx->ops.write_posted(hw, msgbuf, 2, 0);
-       if (err)
-               return err;
-
-       err = mbx->ops.read_posted(hw, msgbuf, 2, 0);
-       if (err)
-               return err;
-
-       msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
-       if (msgbuf[0] == (IXGBE_VF_UPDATE_XCAST_MODE | IXGBE_VT_MSGTYPE_NACK))
-               return -EPERM;
-
-       return 0;
-}
-
 static void
 ixgbevf_dev_allmulticast_enable(struct rte_eth_dev *dev)
 {
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
-       ixgbevf_update_xcast_mode(hw, IXGBEVF_XCAST_MODE_ALLMULTI);
+       hw->mac.ops.update_xcast_mode(hw, IXGBEVF_XCAST_MODE_ALLMULTI);
 }
 
 static void
@@ -7292,7 +8107,7 @@ ixgbevf_dev_allmulticast_disable(struct rte_eth_dev *dev)
 {
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
-       ixgbevf_update_xcast_mode(hw, IXGBEVF_XCAST_MODE_NONE);
+       hw->mac.ops.update_xcast_mode(hw, IXGBEVF_XCAST_MODE_NONE);
 }
 
 static void ixgbevf_mbx_process(struct rte_eth_dev *dev)
@@ -7305,7 +8120,7 @@ static void ixgbevf_mbx_process(struct rte_eth_dev *dev)
 
        /* PF reset VF event */
        if (in_msg == IXGBE_PF_CONTROL_MSG)
-               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET);
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, NULL);
 }
 
 static int
@@ -7356,17 +8171,529 @@ ixgbevf_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
        ixgbevf_dev_interrupt_action(dev);
 }
 
-static struct rte_driver rte_ixgbe_driver = {
-       .type = PMD_PDEV,
-       .init = rte_ixgbe_pmd_init,
-};
+/**
+ *  ixgbe_disable_sec_tx_path_generic - Stops the transmit data path
+ *  @hw: pointer to hardware structure
+ *
+ *  Stops the transmit data path and waits for the HW to internally empty
+ *  the Tx security block
+ **/
+int ixgbe_disable_sec_tx_path_generic(struct ixgbe_hw *hw)
+{
+#define IXGBE_MAX_SECTX_POLL 40
 
-static struct rte_driver rte_ixgbevf_driver = {
-       .type = PMD_PDEV,
-       .init = rte_ixgbevf_pmd_init,
-};
+       int i;
+       int sectxreg;
+
+       sectxreg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+       sectxreg |= IXGBE_SECTXCTRL_TX_DIS;
+       IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, sectxreg);
+       for (i = 0; i < IXGBE_MAX_SECTX_POLL; i++) {
+               sectxreg = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT);
+               if (sectxreg & IXGBE_SECTXSTAT_SECTX_RDY)
+                       break;
+               /* Use interrupt-safe sleep just in case */
+               usec_delay(1000);
+       }
+
+       /* For informational purposes only */
+       if (i >= IXGBE_MAX_SECTX_POLL)
+               PMD_DRV_LOG(DEBUG, "Tx unit being enabled before security "
+                        "path fully disabled.  Continuing with init.");
+
+       return IXGBE_SUCCESS;
+}
+
+/**
+ *  ixgbe_enable_sec_tx_path_generic - Enables the transmit data path
+ *  @hw: pointer to hardware structure
+ *
+ *  Enables the transmit data path.
+ **/
+int ixgbe_enable_sec_tx_path_generic(struct ixgbe_hw *hw)
+{
+       uint32_t sectxreg;
+
+       sectxreg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+       sectxreg &= ~IXGBE_SECTXCTRL_TX_DIS;
+       IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, sectxreg);
+       IXGBE_WRITE_FLUSH(hw);
+
+       return IXGBE_SUCCESS;
+}
+
+int
+rte_pmd_ixgbe_macsec_enable(uint8_t port, uint8_t en, uint8_t rp)
+{
+       struct ixgbe_hw *hw;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+       uint32_t ctrl;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       rte_eth_dev_info_get(port, &dev_info);
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       dev = &rte_eth_devices[port];
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       /* Stop the data paths */
+       if (ixgbe_disable_sec_rx_path(hw) != IXGBE_SUCCESS)
+               return -ENOTSUP;
+       /*
+        * Workaround:
+        * As no ixgbe_disable_sec_rx_path equivalent is
+        * implemented for tx in the base code, and we are
+        * not allowed to modify the base code in DPDK, so
+        * just call the hand-written one directly for now.
+        * The hardware support has been checked by
+        * ixgbe_disable_sec_rx_path().
+        */
+       ixgbe_disable_sec_tx_path_generic(hw);
+
+       /* Enable Ethernet CRC (required by MACsec offload) */
+       ctrl = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+       ctrl |= IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_RXCRCSTRP;
+       IXGBE_WRITE_REG(hw, IXGBE_HLREG0, ctrl);
+
+       /* Enable the TX and RX crypto engines */
+       ctrl = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+       ctrl &= ~IXGBE_SECTXCTRL_SECTX_DIS;
+       IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, ctrl);
+
+       ctrl = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+       ctrl &= ~IXGBE_SECRXCTRL_SECRX_DIS;
+       IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, ctrl);
+
+       ctrl = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+       ctrl &= ~IXGBE_SECTX_MINSECIFG_MASK;
+       ctrl |= 0x3;
+       IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, ctrl);
+
+       /* Enable SA lookup */
+       ctrl = IXGBE_READ_REG(hw, IXGBE_LSECTXCTRL);
+       ctrl &= ~IXGBE_LSECTXCTRL_EN_MASK;
+       ctrl |= en ? IXGBE_LSECTXCTRL_AUTH_ENCRYPT :
+                    IXGBE_LSECTXCTRL_AUTH;
+       ctrl |= IXGBE_LSECTXCTRL_AISCI;
+       ctrl &= ~IXGBE_LSECTXCTRL_PNTHRSH_MASK;
+       ctrl |= IXGBE_MACSEC_PNTHRSH & IXGBE_LSECTXCTRL_PNTHRSH_MASK;
+       IXGBE_WRITE_REG(hw, IXGBE_LSECTXCTRL, ctrl);
+
+       ctrl = IXGBE_READ_REG(hw, IXGBE_LSECRXCTRL);
+       ctrl &= ~IXGBE_LSECRXCTRL_EN_MASK;
+       ctrl |= IXGBE_LSECRXCTRL_STRICT << IXGBE_LSECRXCTRL_EN_SHIFT;
+       ctrl &= ~IXGBE_LSECRXCTRL_PLSH;
+       if (rp)
+               ctrl |= IXGBE_LSECRXCTRL_RP;
+       else
+               ctrl &= ~IXGBE_LSECRXCTRL_RP;
+       IXGBE_WRITE_REG(hw, IXGBE_LSECRXCTRL, ctrl);
+
+       /* Start the data paths */
+       ixgbe_enable_sec_rx_path(hw);
+       /*
+        * Workaround:
+        * As no ixgbe_enable_sec_rx_path equivalent is
+        * implemented for tx in the base code, and we are
+        * not allowed to modify the base code in DPDK, so
+        * just call the hand-written one directly for now.
+        */
+       ixgbe_enable_sec_tx_path_generic(hw);
+
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_macsec_disable(uint8_t port)
+{
+       struct ixgbe_hw *hw;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+       uint32_t ctrl;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       rte_eth_dev_info_get(port, &dev_info);
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       dev = &rte_eth_devices[port];
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       /* Stop the data paths */
+       if (ixgbe_disable_sec_rx_path(hw) != IXGBE_SUCCESS)
+               return -ENOTSUP;
+       /*
+        * Workaround:
+        * As no ixgbe_disable_sec_rx_path equivalent is
+        * implemented for tx in the base code, and we are
+        * not allowed to modify the base code in DPDK, so
+        * just call the hand-written one directly for now.
+        * The hardware support has been checked by
+        * ixgbe_disable_sec_rx_path().
+        */
+       ixgbe_disable_sec_tx_path_generic(hw);
+
+       /* Disable the TX and RX crypto engines */
+       ctrl = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+       ctrl |= IXGBE_SECTXCTRL_SECTX_DIS;
+       IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, ctrl);
+
+       ctrl = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+       ctrl |= IXGBE_SECRXCTRL_SECRX_DIS;
+       IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, ctrl);
+
+       /* Disable SA lookup */
+       ctrl = IXGBE_READ_REG(hw, IXGBE_LSECTXCTRL);
+       ctrl &= ~IXGBE_LSECTXCTRL_EN_MASK;
+       ctrl |= IXGBE_LSECTXCTRL_DISABLE;
+       IXGBE_WRITE_REG(hw, IXGBE_LSECTXCTRL, ctrl);
+
+       ctrl = IXGBE_READ_REG(hw, IXGBE_LSECRXCTRL);
+       ctrl &= ~IXGBE_LSECRXCTRL_EN_MASK;
+       ctrl |= IXGBE_LSECRXCTRL_DISABLE << IXGBE_LSECRXCTRL_EN_SHIFT;
+       IXGBE_WRITE_REG(hw, IXGBE_LSECRXCTRL, ctrl);
+
+       /* Start the data paths */
+       ixgbe_enable_sec_rx_path(hw);
+       /*
+        * Workaround:
+        * As no ixgbe_enable_sec_rx_path equivalent is
+        * implemented for tx in the base code, and we are
+        * not allowed to modify the base code in DPDK, so
+        * just call the hand-written one directly for now.
+        */
+       ixgbe_enable_sec_tx_path_generic(hw);
+
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_macsec_config_txsc(uint8_t port, uint8_t *mac)
+{
+       struct ixgbe_hw *hw;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+       uint32_t ctrl;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       rte_eth_dev_info_get(port, &dev_info);
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       dev = &rte_eth_devices[port];
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       ctrl = mac[0] | (mac[1] << 8) | (mac[2] << 16) | (mac[3] << 24);
+       IXGBE_WRITE_REG(hw, IXGBE_LSECTXSCL, ctrl);
+
+       ctrl = mac[4] | (mac[5] << 8);
+       IXGBE_WRITE_REG(hw, IXGBE_LSECTXSCH, ctrl);
+
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_macsec_config_rxsc(uint8_t port, uint8_t *mac, uint16_t pi)
+{
+       struct ixgbe_hw *hw;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+       uint32_t ctrl;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       rte_eth_dev_info_get(port, &dev_info);
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       dev = &rte_eth_devices[port];
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       ctrl = mac[0] | (mac[1] << 8) | (mac[2] << 16) | (mac[3] << 24);
+       IXGBE_WRITE_REG(hw, IXGBE_LSECRXSCL, ctrl);
+
+       pi = rte_cpu_to_be_16(pi);
+       ctrl = mac[4] | (mac[5] << 8) | (pi << 16);
+       IXGBE_WRITE_REG(hw, IXGBE_LSECRXSCH, ctrl);
+
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_macsec_select_txsa(uint8_t port, uint8_t idx, uint8_t an,
+                                uint32_t pn, uint8_t *key)
+{
+       struct ixgbe_hw *hw;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+       uint32_t ctrl, i;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       rte_eth_dev_info_get(port, &dev_info);
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       dev = &rte_eth_devices[port];
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (idx != 0 && idx != 1)
+               return -EINVAL;
+
+       if (an >= 4)
+               return -EINVAL;
+
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       /* Set the PN and key */
+       pn = rte_cpu_to_be_32(pn);
+       if (idx == 0) {
+               IXGBE_WRITE_REG(hw, IXGBE_LSECTXPN0, pn);
+
+               for (i = 0; i < 4; i++) {
+                       ctrl = (key[i * 4 + 0] <<  0) |
+                              (key[i * 4 + 1] <<  8) |
+                              (key[i * 4 + 2] << 16) |
+                              (key[i * 4 + 3] << 24);
+                       IXGBE_WRITE_REG(hw, IXGBE_LSECTXKEY0(i), ctrl);
+               }
+       } else {
+               IXGBE_WRITE_REG(hw, IXGBE_LSECTXPN1, pn);
+
+               for (i = 0; i < 4; i++) {
+                       ctrl = (key[i * 4 + 0] <<  0) |
+                              (key[i * 4 + 1] <<  8) |
+                              (key[i * 4 + 2] << 16) |
+                              (key[i * 4 + 3] << 24);
+                       IXGBE_WRITE_REG(hw, IXGBE_LSECTXKEY1(i), ctrl);
+               }
+       }
+
+       /* Set AN and select the SA */
+       ctrl = (an << idx * 2) | (idx << 4);
+       IXGBE_WRITE_REG(hw, IXGBE_LSECTXSA, ctrl);
+
+       return 0;
+}
+
+int
+rte_pmd_ixgbe_macsec_select_rxsa(uint8_t port, uint8_t idx, uint8_t an,
+                                uint32_t pn, uint8_t *key)
+{
+       struct ixgbe_hw *hw;
+       struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
+       uint32_t ctrl, i;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+       rte_eth_dev_info_get(port, &dev_info);
+       if (is_ixgbe_pmd(dev_info.driver_name) != 0)
+               return -ENOTSUP;
+
+       dev = &rte_eth_devices[port];
+       hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (idx != 0 && idx != 1)
+               return -EINVAL;
+
+       if (an >= 4)
+               return -EINVAL;
+
+       /* Set the PN */
+       pn = rte_cpu_to_be_32(pn);
+       IXGBE_WRITE_REG(hw, IXGBE_LSECRXPN(idx), pn);
+
+       /* Set the key */
+       for (i = 0; i < 4; i++) {
+               ctrl = (key[i * 4 + 0] <<  0) |
+                      (key[i * 4 + 1] <<  8) |
+                      (key[i * 4 + 2] << 16) |
+                      (key[i * 4 + 3] << 24);
+               IXGBE_WRITE_REG(hw, IXGBE_LSECRXKEY(idx, i), ctrl);
+       }
+
+       /* Set the AN and validate the SA */
+       ctrl = an | (1 << 2);
+       IXGBE_WRITE_REG(hw, IXGBE_LSECRXSA(idx), ctrl);
+
+       return 0;
+}
+
+/* restore n-tuple filter */
+static inline void
+ixgbe_ntuple_filter_restore(struct rte_eth_dev *dev)
+{
+       struct ixgbe_filter_info *filter_info =
+               IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
+       struct ixgbe_5tuple_filter *node;
+
+       TAILQ_FOREACH(node, &filter_info->fivetuple_list, entries) {
+               ixgbe_inject_5tuple_filter(dev, node);
+       }
+}
+
+/* restore ethernet type filter */
+static inline void
+ixgbe_ethertype_filter_restore(struct rte_eth_dev *dev)
+{
+       struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct ixgbe_filter_info *filter_info =
+               IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
+       int i;
+
+       for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) {
+               if (filter_info->ethertype_mask & (1 << i)) {
+                       IXGBE_WRITE_REG(hw, IXGBE_ETQF(i),
+                                       filter_info->ethertype_filters[i].etqf);
+                       IXGBE_WRITE_REG(hw, IXGBE_ETQS(i),
+                                       filter_info->ethertype_filters[i].etqs);
+                       IXGBE_WRITE_FLUSH(hw);
+               }
+       }
+}
+
+/* restore SYN filter */
+static inline void
+ixgbe_syn_filter_restore(struct rte_eth_dev *dev)
+{
+       struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct ixgbe_filter_info *filter_info =
+               IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
+       uint32_t synqf;
+
+       synqf = filter_info->syn_info;
+
+       if (synqf & IXGBE_SYN_FILTER_ENABLE) {
+               IXGBE_WRITE_REG(hw, IXGBE_SYNQF, synqf);
+               IXGBE_WRITE_FLUSH(hw);
+       }
+}
+
+/* restore L2 tunnel filter */
+static inline void
+ixgbe_l2_tn_filter_restore(struct rte_eth_dev *dev)
+{
+       struct ixgbe_l2_tn_info *l2_tn_info =
+               IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private);
+       struct ixgbe_l2_tn_filter *node;
+       struct rte_eth_l2_tunnel_conf l2_tn_conf;
+
+       TAILQ_FOREACH(node, &l2_tn_info->l2_tn_list, entries) {
+               l2_tn_conf.l2_tunnel_type = node->key.l2_tn_type;
+               l2_tn_conf.tunnel_id      = node->key.tn_id;
+               l2_tn_conf.pool           = node->pool;
+               (void)ixgbe_dev_l2_tunnel_filter_add(dev, &l2_tn_conf, TRUE);
+       }
+}
+
+static int
+ixgbe_filter_restore(struct rte_eth_dev *dev)
+{
+       ixgbe_ntuple_filter_restore(dev);
+       ixgbe_ethertype_filter_restore(dev);
+       ixgbe_syn_filter_restore(dev);
+       ixgbe_fdir_filter_restore(dev);
+       ixgbe_l2_tn_filter_restore(dev);
+
+       return 0;
+}
+
+static void
+ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev)
+{
+       struct ixgbe_l2_tn_info *l2_tn_info =
+               IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private);
+       struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (l2_tn_info->e_tag_en)
+               (void)ixgbe_e_tag_enable(hw);
+
+       if (l2_tn_info->e_tag_fwd_en)
+               (void)ixgbe_e_tag_forwarding_en_dis(dev, 1);
+
+       (void)ixgbe_update_e_tag_eth_type(hw, l2_tn_info->e_tag_ether_type);
+}
+
+/* remove all the n-tuple filters */
+void
+ixgbe_clear_all_ntuple_filter(struct rte_eth_dev *dev)
+{
+       struct ixgbe_filter_info *filter_info =
+               IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
+       struct ixgbe_5tuple_filter *p_5tuple;
+
+       while ((p_5tuple = TAILQ_FIRST(&filter_info->fivetuple_list)))
+               ixgbe_remove_5tuple_filter(dev, p_5tuple);
+}
+
+/* remove all the ether type filters */
+void
+ixgbe_clear_all_ethertype_filter(struct rte_eth_dev *dev)
+{
+       struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct ixgbe_filter_info *filter_info =
+               IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
+       int i;
+
+       for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) {
+               if (filter_info->ethertype_mask & (1 << i) &&
+                   !filter_info->ethertype_filters[i].conf) {
+                       (void)ixgbe_ethertype_filter_remove(filter_info,
+                                                           (uint8_t)i);
+                       IXGBE_WRITE_REG(hw, IXGBE_ETQF(i), 0);
+                       IXGBE_WRITE_REG(hw, IXGBE_ETQS(i), 0);
+                       IXGBE_WRITE_FLUSH(hw);
+               }
+       }
+}
+
+/* remove the SYN filter */
+void
+ixgbe_clear_syn_filter(struct rte_eth_dev *dev)
+{
+       struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct ixgbe_filter_info *filter_info =
+               IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
+
+       if (filter_info->syn_info & IXGBE_SYN_FILTER_ENABLE) {
+               filter_info->syn_info = 0;
+
+               IXGBE_WRITE_REG(hw, IXGBE_SYNQF, 0);
+               IXGBE_WRITE_FLUSH(hw);
+       }
+}
+
+/* remove all the L2 tunnel filters */
+int
+ixgbe_clear_all_l2_tn_filter(struct rte_eth_dev *dev)
+{
+       struct ixgbe_l2_tn_info *l2_tn_info =
+               IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private);
+       struct ixgbe_l2_tn_filter *l2_tn_filter;
+       struct rte_eth_l2_tunnel_conf l2_tn_conf;
+       int ret = 0;
+
+       while ((l2_tn_filter = TAILQ_FIRST(&l2_tn_info->l2_tn_list))) {
+               l2_tn_conf.l2_tunnel_type = l2_tn_filter->key.l2_tn_type;
+               l2_tn_conf.tunnel_id      = l2_tn_filter->key.tn_id;
+               l2_tn_conf.pool           = l2_tn_filter->pool;
+               ret = ixgbe_dev_l2_tunnel_filter_del(dev, &l2_tn_conf);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
 
-PMD_REGISTER_DRIVER(rte_ixgbe_driver, ixgbe);
-DRIVER_REGISTER_PCI_TABLE(ixgbe, pci_id_ixgbe_map);
-PMD_REGISTER_DRIVER(rte_ixgbevf_driver, ixgbevf);
-DRIVER_REGISTER_PCI_TABLE(ixgbevf, pci_id_ixgbevf_map);
+RTE_PMD_REGISTER_PCI(net_ixgbe, rte_ixgbe_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_ixgbe, pci_id_ixgbe_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_ixgbe, "* igb_uio | uio_pci_generic | vfio");
+RTE_PMD_REGISTER_PCI(net_ixgbe_vf, rte_ixgbevf_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_ixgbe_vf, pci_id_ixgbevf_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_ixgbe_vf, "* igb_uio | vfio");
index 4ff6338..680d5d9 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
 #include "base/ixgbe_dcb_82598.h"
 #include "ixgbe_bypass.h"
 #include <rte_time.h>
+#include <rte_hash.h>
 
 /* need update link, bit flag */
 #define IXGBE_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
 #define IXGBE_FLAG_MAILBOX          (uint32_t)(1 << 1)
 #define IXGBE_FLAG_PHY_INTERRUPT    (uint32_t)(1 << 2)
+#define IXGBE_FLAG_MACSEC           (uint32_t)(1 << 3)
 
 /*
  * Defines that were not part of ixgbe_type.h as they are not used by the
 #define IXGBE_MISC_VEC_ID               RTE_INTR_VEC_ZERO_OFFSET
 #define IXGBE_RX_VEC_START              RTE_INTR_VEC_RXTX_OFFSET
 
+#define IXGBE_SECTX_MINSECIFG_MASK      0x0000000F
+
+#define IXGBE_MACSEC_PNTHRSH            0xFFFFFE00
+
+#define IXGBE_MAX_FDIR_FILTER_NUM       (1024 * 32)
+#define IXGBE_MAX_L2_TN_FILTER_NUM      128
+
 /*
  * Information about the fdir mode.
  */
-
 struct ixgbe_hw_fdir_mask {
        uint16_t vlan_tci_mask;
        uint32_t src_ipv4_mask;
@@ -148,6 +156,28 @@ struct ixgbe_hw_fdir_mask {
        uint8_t  tunnel_type_mask;
 };
 
+struct ixgbe_fdir_filter {
+       TAILQ_ENTRY(ixgbe_fdir_filter) entries;
+       union ixgbe_atr_input ixgbe_fdir; /* key of fdir filter*/
+       uint32_t fdirflags; /* drop or forward */
+       uint32_t fdirhash; /* hash value for fdir */
+       uint8_t queue; /* assigned rx queue */
+};
+
+/* list of fdir filters */
+TAILQ_HEAD(ixgbe_fdir_filter_list, ixgbe_fdir_filter);
+
+struct ixgbe_fdir_rule {
+       struct ixgbe_hw_fdir_mask mask;
+       union ixgbe_atr_input ixgbe_fdir; /* key of fdir filter*/
+       bool b_spec; /* If TRUE, ixgbe_fdir, fdirflags, queue have meaning. */
+       bool b_mask; /* If TRUE, mask has meaning. */
+       enum rte_fdir_mode mode; /* IP, MAC VLAN, Tunnel */
+       uint32_t fdirflags; /* drop or forward */
+       uint32_t soft_id; /* an unique value for this rule */
+       uint8_t queue; /* assigned rx queue */
+};
+
 struct ixgbe_hw_fdir_info {
        struct ixgbe_hw_fdir_mask mask;
        uint8_t     flex_bytes_offset;
@@ -159,12 +189,19 @@ struct ixgbe_hw_fdir_info {
        uint64_t    remove;
        uint64_t    f_add;
        uint64_t    f_remove;
+       struct ixgbe_fdir_filter_list fdir_list; /* filter list*/
+       /* store the pointers of the filters, index is the hash value. */
+       struct ixgbe_fdir_filter **hash_map;
+       struct rte_hash *hash_handle; /* cuckoo hash handler */
+       bool mask_added; /* If already got mask from consistent filter */
 };
 
 /* structure for interrupt relative data */
 struct ixgbe_interrupt {
        uint32_t flags;
        uint32_t mask;
+       /*to save original mask during delayed handler */
+       uint32_t mask_original;
 };
 
 struct ixgbe_stat_mapping_registers {
@@ -252,16 +289,131 @@ struct ixgbe_5tuple_filter {
        (RTE_ALIGN(IXGBE_MAX_FTQF_FILTERS, (sizeof(uint32_t) * NBBY)) / \
         (sizeof(uint32_t) * NBBY))
 
+struct ixgbe_ethertype_filter {
+       uint16_t ethertype;
+       uint32_t etqf;
+       uint32_t etqs;
+       /**
+        * If this filter is added by configuration,
+        * it should not be removed.
+        */
+       bool     conf;
+};
+
 /*
  * Structure to store filters' info.
  */
 struct ixgbe_filter_info {
        uint8_t ethertype_mask;  /* Bit mask for every used ethertype filter */
        /* store used ethertype filters*/
-       uint16_t ethertype_filters[IXGBE_MAX_ETQF_FILTERS];
+       struct ixgbe_ethertype_filter ethertype_filters[IXGBE_MAX_ETQF_FILTERS];
        /* Bit mask for every used 5tuple filter */
        uint32_t fivetuple_mask[IXGBE_5TUPLE_ARRAY_SIZE];
        struct ixgbe_5tuple_filter_list fivetuple_list;
+       /* store the SYN filter info */
+       uint32_t syn_info;
+};
+
+struct ixgbe_l2_tn_key {
+       enum rte_eth_tunnel_type          l2_tn_type;
+       uint32_t                          tn_id;
+};
+
+struct ixgbe_l2_tn_filter {
+       TAILQ_ENTRY(ixgbe_l2_tn_filter)    entries;
+       struct ixgbe_l2_tn_key             key;
+       uint32_t                           pool;
+};
+
+TAILQ_HEAD(ixgbe_l2_tn_filter_list, ixgbe_l2_tn_filter);
+
+struct ixgbe_l2_tn_info {
+       struct ixgbe_l2_tn_filter_list      l2_tn_list;
+       struct ixgbe_l2_tn_filter         **hash_map;
+       struct rte_hash                    *hash_handle;
+       bool e_tag_en; /* e-tag enabled */
+       bool e_tag_fwd_en; /* e-tag based forwarding enabled */
+       bool e_tag_ether_type; /* ether type for e-tag */
+};
+
+struct rte_flow {
+       enum rte_filter_type filter_type;
+       void *rule;
+};
+/* ntuple filter list structure */
+struct ixgbe_ntuple_filter_ele {
+       TAILQ_ENTRY(ixgbe_ntuple_filter_ele) entries;
+       struct rte_eth_ntuple_filter filter_info;
+};
+/* ethertype filter list structure */
+struct ixgbe_ethertype_filter_ele {
+       TAILQ_ENTRY(ixgbe_ethertype_filter_ele) entries;
+       struct rte_eth_ethertype_filter filter_info;
+};
+/* syn filter list structure */
+struct ixgbe_eth_syn_filter_ele {
+       TAILQ_ENTRY(ixgbe_eth_syn_filter_ele) entries;
+       struct rte_eth_syn_filter filter_info;
+};
+/* fdir filter list structure */
+struct ixgbe_fdir_rule_ele {
+       TAILQ_ENTRY(ixgbe_fdir_rule_ele) entries;
+       struct ixgbe_fdir_rule filter_info;
+};
+/* l2_tunnel filter list structure */
+struct ixgbe_eth_l2_tunnel_conf_ele {
+       TAILQ_ENTRY(ixgbe_eth_l2_tunnel_conf_ele) entries;
+       struct rte_eth_l2_tunnel_conf filter_info;
+};
+/* ixgbe_flow memory list structure */
+struct ixgbe_flow_mem {
+       TAILQ_ENTRY(ixgbe_flow_mem) entries;
+       struct rte_flow *flow;
+};
+
+TAILQ_HEAD(ixgbe_ntuple_filter_list, ixgbe_ntuple_filter_ele);
+struct ixgbe_ntuple_filter_list filter_ntuple_list;
+TAILQ_HEAD(ixgbe_ethertype_filter_list, ixgbe_ethertype_filter_ele);
+struct ixgbe_ethertype_filter_list filter_ethertype_list;
+TAILQ_HEAD(ixgbe_syn_filter_list, ixgbe_eth_syn_filter_ele);
+struct ixgbe_syn_filter_list filter_syn_list;
+TAILQ_HEAD(ixgbe_fdir_rule_filter_list, ixgbe_fdir_rule_ele);
+struct ixgbe_fdir_rule_filter_list filter_fdir_list;
+TAILQ_HEAD(ixgbe_l2_tunnel_filter_list, ixgbe_eth_l2_tunnel_conf_ele);
+struct ixgbe_l2_tunnel_filter_list filter_l2_tunnel_list;
+TAILQ_HEAD(ixgbe_flow_mem_list, ixgbe_flow_mem);
+struct ixgbe_flow_mem_list ixgbe_flow_list;
+
+/*
+ * Statistics counters collected by the MACsec
+ */
+struct ixgbe_macsec_stats {
+       /* TX port statistics */
+       uint64_t out_pkts_untagged;
+       uint64_t out_pkts_encrypted;
+       uint64_t out_pkts_protected;
+       uint64_t out_octets_encrypted;
+       uint64_t out_octets_protected;
+
+       /* RX port statistics */
+       uint64_t in_pkts_untagged;
+       uint64_t in_pkts_badtag;
+       uint64_t in_pkts_nosci;
+       uint64_t in_pkts_unknownsci;
+       uint64_t in_octets_decrypted;
+       uint64_t in_octets_validated;
+
+       /* RX SC statistics */
+       uint64_t in_pkts_unchecked;
+       uint64_t in_pkts_delayed;
+       uint64_t in_pkts_late;
+
+       /* RX SA statistics */
+       uint64_t in_pkts_ok;
+       uint64_t in_pkts_invalid;
+       uint64_t in_pkts_notvalid;
+       uint64_t in_pkts_unusedsa;
+       uint64_t in_pkts_notusingsa;
 };
 
 /*
@@ -270,6 +422,7 @@ struct ixgbe_filter_info {
 struct ixgbe_adapter {
        struct ixgbe_hw             hw;
        struct ixgbe_hw_stats       stats;
+       struct ixgbe_macsec_stats   macsec_stats;
        struct ixgbe_hw_fdir_info   fdir;
        struct ixgbe_interrupt      intr;
        struct ixgbe_stat_mapping_registers stat_mappings;
@@ -283,6 +436,7 @@ struct ixgbe_adapter {
        struct ixgbe_bypass_info    bps;
 #endif /* RTE_NIC_BYPASS */
        struct ixgbe_filter_info    filter;
+       struct ixgbe_l2_tn_info     l2_tn;
 
        bool rx_bulk_alloc_allowed;
        bool rx_vec_allowed;
@@ -291,12 +445,18 @@ struct ixgbe_adapter {
        struct rte_timecounter      tx_tstamp_tc;
 };
 
+#define IXGBE_DEV_TO_PCI(eth_dev) \
+       RTE_DEV_TO_PCI((eth_dev)->device)
+
 #define IXGBE_DEV_PRIVATE_TO_HW(adapter)\
        (&((struct ixgbe_adapter *)adapter)->hw)
 
 #define IXGBE_DEV_PRIVATE_TO_STATS(adapter) \
        (&((struct ixgbe_adapter *)adapter)->stats)
 
+#define IXGBE_DEV_PRIVATE_TO_MACSEC_STATS(adapter) \
+       (&((struct ixgbe_adapter *)adapter)->macsec_stats)
+
 #define IXGBE_DEV_PRIVATE_TO_INTR(adapter) \
        (&((struct ixgbe_adapter *)adapter)->intr)
 
@@ -327,6 +487,9 @@ struct ixgbe_adapter {
 #define IXGBE_DEV_PRIVATE_TO_FILTER_INFO(adapter) \
        (&((struct ixgbe_adapter *)adapter)->filter)
 
+#define IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(adapter) \
+       (&((struct ixgbe_adapter *)adapter)->l2_tn)
+
 /*
  * RX/TX function prototypes
  */
@@ -396,6 +559,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
                uint16_t nb_pkts);
 
+uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
                              struct rte_eth_rss_conf *rss_conf);
 
@@ -412,10 +578,31 @@ uint32_t ixgbe_rssrk_reg_get(enum ixgbe_mac_type mac_type, uint8_t i);
 
 bool ixgbe_rss_update_sp(enum ixgbe_mac_type mac_type);
 
+int ixgbe_add_del_ntuple_filter(struct rte_eth_dev *dev,
+                       struct rte_eth_ntuple_filter *filter,
+                       bool add);
+int ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev,
+                       struct rte_eth_ethertype_filter *filter,
+                       bool add);
+int ixgbe_syn_filter_set(struct rte_eth_dev *dev,
+                       struct rte_eth_syn_filter *filter,
+                       bool add);
+int
+ixgbe_dev_l2_tunnel_filter_add(struct rte_eth_dev *dev,
+                              struct rte_eth_l2_tunnel_conf *l2_tunnel,
+                              bool restore);
+int
+ixgbe_dev_l2_tunnel_filter_del(struct rte_eth_dev *dev,
+                              struct rte_eth_l2_tunnel_conf *l2_tunnel);
+void ixgbe_filterlist_flush(void);
 /*
  * Flow director function prototypes
  */
 int ixgbe_fdir_configure(struct rte_eth_dev *dev);
+int ixgbe_fdir_set_input_mask(struct rte_eth_dev *dev);
+int ixgbe_fdir_filter_program(struct rte_eth_dev *dev,
+                             struct ixgbe_fdir_rule *rule,
+                             bool del, bool update);
 
 void ixgbe_configure_dcb(struct rte_eth_dev *dev);
 
@@ -442,4 +629,69 @@ uint32_t ixgbe_convert_vm_rx_mask_to_val(uint16_t rx_mask, uint32_t orig_val);
 
 int ixgbe_fdir_ctrl_func(struct rte_eth_dev *dev,
                        enum rte_filter_op filter_op, void *arg);
+void ixgbe_fdir_filter_restore(struct rte_eth_dev *dev);
+int ixgbe_clear_all_fdir_filter(struct rte_eth_dev *dev);
+
+extern const struct rte_flow_ops ixgbe_flow_ops;
+
+void ixgbe_clear_all_ethertype_filter(struct rte_eth_dev *dev);
+void ixgbe_clear_all_ntuple_filter(struct rte_eth_dev *dev);
+void ixgbe_clear_syn_filter(struct rte_eth_dev *dev);
+int ixgbe_clear_all_l2_tn_filter(struct rte_eth_dev *dev);
+
+int ixgbe_disable_sec_tx_path_generic(struct ixgbe_hw *hw);
+
+int ixgbe_enable_sec_tx_path_generic(struct ixgbe_hw *hw);
+
+static inline int
+ixgbe_ethertype_filter_lookup(struct ixgbe_filter_info *filter_info,
+                             uint16_t ethertype)
+{
+       int i;
+
+       for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) {
+               if (filter_info->ethertype_filters[i].ethertype == ethertype &&
+                   (filter_info->ethertype_mask & (1 << i)))
+                       return i;
+       }
+       return -1;
+}
+
+static inline int
+ixgbe_ethertype_filter_insert(struct ixgbe_filter_info *filter_info,
+                             struct ixgbe_ethertype_filter *ethertype_filter)
+{
+       int i;
+
+       for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) {
+               if (!(filter_info->ethertype_mask & (1 << i))) {
+                       filter_info->ethertype_mask |= 1 << i;
+                       filter_info->ethertype_filters[i].ethertype =
+                               ethertype_filter->ethertype;
+                       filter_info->ethertype_filters[i].etqf =
+                               ethertype_filter->etqf;
+                       filter_info->ethertype_filters[i].etqs =
+                               ethertype_filter->etqs;
+                       filter_info->ethertype_filters[i].conf =
+                               ethertype_filter->conf;
+                       return i;
+               }
+       }
+       return -1;
+}
+
+static inline int
+ixgbe_ethertype_filter_remove(struct ixgbe_filter_info *filter_info,
+                             uint8_t idx)
+{
+       if (idx >= IXGBE_MAX_ETQF_FILTERS)
+               return -1;
+       filter_info->ethertype_mask &= ~(1 << idx);
+       filter_info->ethertype_filters[idx].ethertype = 0;
+       filter_info->ethertype_filters[idx].etqf = 0;
+       filter_info->ethertype_filters[idx].etqs = 0;
+       filter_info->ethertype_filters[idx].etqs = FALSE;
+       return idx;
+}
+
 #endif /* _IXGBE_ETHDEV_H_ */
index c38ac97..3b9d60c 100644 (file)
@@ -43,6 +43,7 @@
 #include <rte_pci.h>
 #include <rte_ether.h>
 #include <rte_ethdev.h>
+#include <rte_malloc.h>
 
 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
 static int fdir_erase_filter_82599(struct ixgbe_hw *hw, uint32_t fdirhash);
 static int fdir_set_input_mask(struct rte_eth_dev *dev,
                               const struct rte_eth_fdir_masks *input_mask);
-static int fdir_set_input_mask_82599(struct rte_eth_dev *dev,
-               const struct rte_eth_fdir_masks *input_mask);
-static int fdir_set_input_mask_x550(struct rte_eth_dev *dev,
-                                   const struct rte_eth_fdir_masks *input_mask);
+static int fdir_set_input_mask_82599(struct rte_eth_dev *dev);
+static int fdir_set_input_mask_x550(struct rte_eth_dev *dev);
 static int ixgbe_set_fdir_flex_conf(struct rte_eth_dev *dev,
                const struct rte_eth_fdir_flex_conf *conf, uint32_t *fdirctrl);
 static int fdir_enable_82599(struct ixgbe_hw *hw, uint32_t fdirctrl);
@@ -248,13 +247,8 @@ configure_fdir_flags(const struct rte_fdir_conf *conf, uint32_t *fdirctrl)
                return -EINVAL;
        };
 
-#define TREX_PATCH
-#ifdef TREX_PATCH
-       *fdirctrl |= (conf->flexbytes_offset << IXGBE_FDIRCTRL_FLEX_SHIFT);
-#else
        *fdirctrl |= (IXGBE_DEFAULT_FLEXBYTES_OFFSET / sizeof(uint16_t)) <<
                     IXGBE_FDIRCTRL_FLEX_SHIFT;
-#endif
 
        if (conf->mode >= RTE_FDIR_MODE_PERFECT &&
            conf->mode <= RTE_FDIR_MODE_PERFECT_TUNNEL) {
@@ -299,8 +293,7 @@ reverse_fdir_bitmasks(uint16_t hi_dword, uint16_t lo_dword)
  * but makes use of the rte_fdir_masks structure to see which bits to set.
  */
 static int
-fdir_set_input_mask_82599(struct rte_eth_dev *dev,
-               const struct rte_eth_fdir_masks *input_mask)
+fdir_set_input_mask_82599(struct rte_eth_dev *dev)
 {
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct ixgbe_hw_fdir_info *info =
@@ -312,8 +305,6 @@ fdir_set_input_mask_82599(struct rte_eth_dev *dev,
        uint32_t fdirm = IXGBE_FDIRM_POOL | IXGBE_FDIRM_DIPv6 | IXGBE_FDIRM_FLEX;
        uint32_t fdirtcpm;  /* TCP source and destination port masks. */
        uint32_t fdiripv6m; /* IPv6 source and destination masks. */
-       uint16_t dst_ipv6m = 0;
-       uint16_t src_ipv6m = 0;
        volatile uint32_t *reg;
 
        PMD_INIT_FUNC_TRACE();
@@ -324,31 +315,30 @@ fdir_set_input_mask_82599(struct rte_eth_dev *dev,
         * a VLAN of 0 is unspecified, so mask that out as well.  L4type
         * cannot be masked out in this implementation.
         */
-       if (input_mask->dst_port_mask == 0 && input_mask->src_port_mask == 0)
+       if (info->mask.dst_port_mask == 0 && info->mask.src_port_mask == 0)
                /* use the L4 protocol mask for raw IPv4/IPv6 traffic */
                fdirm |= IXGBE_FDIRM_L4P;
 
-       if (input_mask->vlan_tci_mask == rte_cpu_to_be_16(0x0FFF))
+       if (info->mask.vlan_tci_mask == rte_cpu_to_be_16(0x0FFF))
                /* mask VLAN Priority */
                fdirm |= IXGBE_FDIRM_VLANP;
-       else if (input_mask->vlan_tci_mask == rte_cpu_to_be_16(0xE000))
+       else if (info->mask.vlan_tci_mask == rte_cpu_to_be_16(0xE000))
                /* mask VLAN ID */
                fdirm |= IXGBE_FDIRM_VLANID;
-       else if (input_mask->vlan_tci_mask == 0)
+       else if (info->mask.vlan_tci_mask == 0)
                /* mask VLAN ID and Priority */
                fdirm |= IXGBE_FDIRM_VLANID | IXGBE_FDIRM_VLANP;
-       else if (input_mask->vlan_tci_mask != rte_cpu_to_be_16(0xEFFF)) {
+       else if (info->mask.vlan_tci_mask != rte_cpu_to_be_16(0xEFFF)) {
                PMD_INIT_LOG(ERR, "invalid vlan_tci_mask");
                return -EINVAL;
        }
-       info->mask.vlan_tci_mask = input_mask->vlan_tci_mask;
 
        IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm);
 
        /* store the TCP/UDP port masks, bit reversed from port layout */
        fdirtcpm = reverse_fdir_bitmasks(
-                       rte_be_to_cpu_16(input_mask->dst_port_mask),
-                       rte_be_to_cpu_16(input_mask->src_port_mask));
+                       rte_be_to_cpu_16(info->mask.dst_port_mask),
+                       rte_be_to_cpu_16(info->mask.src_port_mask));
 
        /* write all the same so that UDP, TCP and SCTP use the same mask
         * (little-endian)
@@ -356,30 +346,23 @@ fdir_set_input_mask_82599(struct rte_eth_dev *dev,
        IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm);
        IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, ~fdirtcpm);
        IXGBE_WRITE_REG(hw, IXGBE_FDIRSCTPM, ~fdirtcpm);
-       info->mask.src_port_mask = input_mask->src_port_mask;
-       info->mask.dst_port_mask = input_mask->dst_port_mask;
 
        /* Store source and destination IPv4 masks (big-endian),
         * can not use IXGBE_WRITE_REG.
         */
        reg = IXGBE_PCI_REG_ADDR(hw, IXGBE_FDIRSIP4M);
-       *reg = ~(input_mask->ipv4_mask.src_ip);
+       *reg = ~(info->mask.src_ipv4_mask);
        reg = IXGBE_PCI_REG_ADDR(hw, IXGBE_FDIRDIP4M);
-       *reg = ~(input_mask->ipv4_mask.dst_ip);
-       info->mask.src_ipv4_mask = input_mask->ipv4_mask.src_ip;
-       info->mask.dst_ipv4_mask = input_mask->ipv4_mask.dst_ip;
+       *reg = ~(info->mask.dst_ipv4_mask);
 
        if (dev->data->dev_conf.fdir_conf.mode == RTE_FDIR_MODE_SIGNATURE) {
                /*
                 * Store source and destination IPv6 masks (bit reversed)
                 */
-               IPV6_ADDR_TO_MASK(input_mask->ipv6_mask.src_ip, src_ipv6m);
-               IPV6_ADDR_TO_MASK(input_mask->ipv6_mask.dst_ip, dst_ipv6m);
-               fdiripv6m = (dst_ipv6m << 16) | src_ipv6m;
+               fdiripv6m = (info->mask.dst_ipv6_mask << 16) |
+                           info->mask.src_ipv6_mask;
 
                IXGBE_WRITE_REG(hw, IXGBE_FDIRIP6M, ~fdiripv6m);
-               info->mask.src_ipv6_mask = src_ipv6m;
-               info->mask.dst_ipv6_mask = dst_ipv6m;
        }
 
        return IXGBE_SUCCESS;
@@ -390,8 +373,7 @@ fdir_set_input_mask_82599(struct rte_eth_dev *dev,
  * but makes use of the rte_fdir_masks structure to see which bits to set.
  */
 static int
-fdir_set_input_mask_x550(struct rte_eth_dev *dev,
-                        const struct rte_eth_fdir_masks *input_mask)
+fdir_set_input_mask_x550(struct rte_eth_dev *dev)
 {
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct ixgbe_hw_fdir_info *info =
@@ -414,20 +396,19 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev,
        /* some bits must be set for mac vlan or tunnel mode */
        fdirm |= IXGBE_FDIRM_L4P | IXGBE_FDIRM_L3P;
 
-       if (input_mask->vlan_tci_mask == rte_cpu_to_be_16(0x0FFF))
+       if (info->mask.vlan_tci_mask == rte_cpu_to_be_16(0x0FFF))
                /* mask VLAN Priority */
                fdirm |= IXGBE_FDIRM_VLANP;
-       else if (input_mask->vlan_tci_mask == rte_cpu_to_be_16(0xE000))
+       else if (info->mask.vlan_tci_mask == rte_cpu_to_be_16(0xE000))
                /* mask VLAN ID */
                fdirm |= IXGBE_FDIRM_VLANID;
-       else if (input_mask->vlan_tci_mask == 0)
+       else if (info->mask.vlan_tci_mask == 0)
                /* mask VLAN ID and Priority */
                fdirm |= IXGBE_FDIRM_VLANID | IXGBE_FDIRM_VLANP;
-       else if (input_mask->vlan_tci_mask != rte_cpu_to_be_16(0xEFFF)) {
+       else if (info->mask.vlan_tci_mask != rte_cpu_to_be_16(0xEFFF)) {
                PMD_INIT_LOG(ERR, "invalid vlan_tci_mask");
                return -EINVAL;
        }
-       info->mask.vlan_tci_mask = input_mask->vlan_tci_mask;
 
        IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm);
 
@@ -437,13 +418,12 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev,
                fdiripv6m |= IXGBE_FDIRIP6M_TUNNEL_TYPE |
                                IXGBE_FDIRIP6M_TNI_VNI;
 
-       mac_mask = input_mask->mac_addr_byte_mask;
-       fdiripv6m |= (mac_mask << IXGBE_FDIRIP6M_INNER_MAC_SHIFT)
-                       & IXGBE_FDIRIP6M_INNER_MAC;
-       info->mask.mac_addr_byte_mask = input_mask->mac_addr_byte_mask;
-
        if (mode == RTE_FDIR_MODE_PERFECT_TUNNEL) {
-               switch (input_mask->tunnel_type_mask) {
+               mac_mask = info->mask.mac_addr_byte_mask;
+               fdiripv6m |= (mac_mask << IXGBE_FDIRIP6M_INNER_MAC_SHIFT)
+                               & IXGBE_FDIRIP6M_INNER_MAC;
+
+               switch (info->mask.tunnel_type_mask) {
                case 0:
                        /* Mask turnnel type */
                        fdiripv6m |= IXGBE_FDIRIP6M_TUNNEL_TYPE;
@@ -454,10 +434,8 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev,
                        PMD_INIT_LOG(ERR, "invalid tunnel_type_mask");
                        return -EINVAL;
                }
-               info->mask.tunnel_type_mask =
-                       input_mask->tunnel_type_mask;
 
-               switch (rte_be_to_cpu_32(input_mask->tunnel_id_mask)) {
+               switch (rte_be_to_cpu_32(info->mask.tunnel_id_mask)) {
                case 0x0:
                        /* Mask vxlan id */
                        fdiripv6m |= IXGBE_FDIRIP6M_TNI_VNI;
@@ -471,8 +449,6 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev,
                        PMD_INIT_LOG(ERR, "invalid tunnel_id_mask");
                        return -EINVAL;
                }
-               info->mask.tunnel_id_mask =
-                       input_mask->tunnel_id_mask;
        }
 
        IXGBE_WRITE_REG(hw, IXGBE_FDIRIP6M, fdiripv6m);
@@ -486,22 +462,90 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev,
 }
 
 static int
-fdir_set_input_mask(struct rte_eth_dev *dev,
-                   const struct rte_eth_fdir_masks *input_mask)
+ixgbe_fdir_store_input_mask_82599(struct rte_eth_dev *dev,
+                                 const struct rte_eth_fdir_masks *input_mask)
+{
+       struct ixgbe_hw_fdir_info *info =
+               IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private);
+       uint16_t dst_ipv6m = 0;
+       uint16_t src_ipv6m = 0;
+
+       memset(&info->mask, 0, sizeof(struct ixgbe_hw_fdir_mask));
+       info->mask.vlan_tci_mask = input_mask->vlan_tci_mask;
+       info->mask.src_port_mask = input_mask->src_port_mask;
+       info->mask.dst_port_mask = input_mask->dst_port_mask;
+       info->mask.src_ipv4_mask = input_mask->ipv4_mask.src_ip;
+       info->mask.dst_ipv4_mask = input_mask->ipv4_mask.dst_ip;
+       IPV6_ADDR_TO_MASK(input_mask->ipv6_mask.src_ip, src_ipv6m);
+       IPV6_ADDR_TO_MASK(input_mask->ipv6_mask.dst_ip, dst_ipv6m);
+       info->mask.src_ipv6_mask = src_ipv6m;
+       info->mask.dst_ipv6_mask = dst_ipv6m;
+
+       return IXGBE_SUCCESS;
+}
+
+static int
+ixgbe_fdir_store_input_mask_x550(struct rte_eth_dev *dev,
+                                const struct rte_eth_fdir_masks *input_mask)
+{
+       struct ixgbe_hw_fdir_info *info =
+               IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private);
+
+       memset(&info->mask, 0, sizeof(struct ixgbe_hw_fdir_mask));
+       info->mask.vlan_tci_mask = input_mask->vlan_tci_mask;
+       info->mask.mac_addr_byte_mask = input_mask->mac_addr_byte_mask;
+       info->mask.tunnel_type_mask = input_mask->tunnel_type_mask;
+       info->mask.tunnel_id_mask = input_mask->tunnel_id_mask;
+
+       return IXGBE_SUCCESS;
+}
+
+static int
+ixgbe_fdir_store_input_mask(struct rte_eth_dev *dev,
+                           const struct rte_eth_fdir_masks *input_mask)
+{
+       enum rte_fdir_mode mode = dev->data->dev_conf.fdir_conf.mode;
+
+       if (mode >= RTE_FDIR_MODE_SIGNATURE &&
+           mode <= RTE_FDIR_MODE_PERFECT)
+               return ixgbe_fdir_store_input_mask_82599(dev, input_mask);
+       else if (mode >= RTE_FDIR_MODE_PERFECT_MAC_VLAN &&
+                mode <= RTE_FDIR_MODE_PERFECT_TUNNEL)
+               return ixgbe_fdir_store_input_mask_x550(dev, input_mask);
+
+       PMD_DRV_LOG(ERR, "Not supported fdir mode - %d!", mode);
+       return -ENOTSUP;
+}
+
+int
+ixgbe_fdir_set_input_mask(struct rte_eth_dev *dev)
 {
        enum rte_fdir_mode mode = dev->data->dev_conf.fdir_conf.mode;
 
        if (mode >= RTE_FDIR_MODE_SIGNATURE &&
            mode <= RTE_FDIR_MODE_PERFECT)
-               return fdir_set_input_mask_82599(dev, input_mask);
+               return fdir_set_input_mask_82599(dev);
        else if (mode >= RTE_FDIR_MODE_PERFECT_MAC_VLAN &&
                 mode <= RTE_FDIR_MODE_PERFECT_TUNNEL)
-               return fdir_set_input_mask_x550(dev, input_mask);
+               return fdir_set_input_mask_x550(dev);
 
        PMD_DRV_LOG(ERR, "Not supported fdir mode - %d!", mode);
        return -ENOTSUP;
 }
 
+static int
+fdir_set_input_mask(struct rte_eth_dev *dev,
+                   const struct rte_eth_fdir_masks *input_mask)
+{
+       int ret;
+
+       ret = ixgbe_fdir_store_input_mask(dev, input_mask);
+       if (ret)
+               return ret;
+
+       return ixgbe_fdir_set_input_mask(dev);
+}
+
 /*
  * ixgbe_check_fdir_flex_conf -check if the flex payload and mask configuration
  * arguments are valid
@@ -520,7 +564,7 @@ ixgbe_set_fdir_flex_conf(struct rte_eth_dev *dev,
        uint16_t i;
 
        fdirm = IXGBE_READ_REG(hw, IXGBE_FDIRM);
-#ifndef TREX_PATCH
+
        if (conf == NULL) {
                PMD_DRV_LOG(ERR, "NULL pointer.");
                return -EINVAL;
@@ -561,11 +605,6 @@ ixgbe_set_fdir_flex_conf(struct rte_eth_dev *dev,
                        return -EINVAL;
                }
        }
-#else
-        fdirm &= ~IXGBE_FDIRM_FLEX;
-        flexbytes = 1;
-        // fdirctrl gets flex_bytes_offset in configure_fdir_flags
-#endif
        IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm);
        info->mask.flex_bytes_mask = flexbytes ? UINT16_MAX : 0;
        info->flex_bytes_offset = (uint8_t)((*fdirctrl &
@@ -597,9 +636,6 @@ ixgbe_fdir_configure(struct rte_eth_dev *dev)
            hw->mac.type != ixgbe_mac_X550EM_x &&
            hw->mac.type != ixgbe_mac_X550EM_a &&
            mode != RTE_FDIR_MODE_SIGNATURE &&
-#ifdef TREX_PATCH
-           mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN &&
-#endif
            mode != RTE_FDIR_MODE_PERFECT)
                return -ENOSYS;
 
@@ -1088,31 +1124,110 @@ fdir_erase_filter_82599(struct ixgbe_hw *hw, uint32_t fdirhash)
 
 }
 
-/*
- * ixgbe_add_del_fdir_filter - add or remove a flow diretor filter.
- * @dev: pointer to the structure rte_eth_dev
- * @fdir_filter: fdir filter entry
- * @del: 1 - delete, 0 - add
- * @update: 1 - update
- */
+static inline struct ixgbe_fdir_filter *
+ixgbe_fdir_filter_lookup(struct ixgbe_hw_fdir_info *fdir_info,
+                        union ixgbe_atr_input *key)
+{
+       int ret;
+
+       ret = rte_hash_lookup(fdir_info->hash_handle, (const void *)key);
+       if (ret < 0)
+               return NULL;
+
+       return fdir_info->hash_map[ret];
+}
+
+static inline int
+ixgbe_insert_fdir_filter(struct ixgbe_hw_fdir_info *fdir_info,
+                        struct ixgbe_fdir_filter *fdir_filter)
+{
+       int ret;
+
+       ret = rte_hash_add_key(fdir_info->hash_handle,
+                              &fdir_filter->ixgbe_fdir);
+
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR,
+                           "Failed to insert fdir filter to hash table %d!",
+                           ret);
+               return ret;
+       }
+
+       fdir_info->hash_map[ret] = fdir_filter;
+
+       TAILQ_INSERT_TAIL(&fdir_info->fdir_list, fdir_filter, entries);
+
+       return 0;
+}
+
+static inline int
+ixgbe_remove_fdir_filter(struct ixgbe_hw_fdir_info *fdir_info,
+                        union ixgbe_atr_input *key)
+{
+       int ret;
+       struct ixgbe_fdir_filter *fdir_filter;
+
+       ret = rte_hash_del_key(fdir_info->hash_handle, key);
+
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR, "No such fdir filter to delete %d!", ret);
+               return ret;
+       }
+
+       fdir_filter = fdir_info->hash_map[ret];
+       fdir_info->hash_map[ret] = NULL;
+
+       TAILQ_REMOVE(&fdir_info->fdir_list, fdir_filter, entries);
+       rte_free(fdir_filter);
+
+       return 0;
+}
+
 static int
-ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev,
-                         const struct rte_eth_fdir_filter *fdir_filter,
+ixgbe_interpret_fdir_filter(struct rte_eth_dev *dev,
+                           const struct rte_eth_fdir_filter *fdir_filter,
+                           struct ixgbe_fdir_rule *rule)
+{
+       enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode;
+       int err;
+
+       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+
+       err = ixgbe_fdir_filter_to_atr_input(fdir_filter,
+                                            &rule->ixgbe_fdir,
+                                            fdir_mode);
+       if (err)
+               return err;
+
+       rule->mode = fdir_mode;
+       if (fdir_filter->action.behavior == RTE_ETH_FDIR_REJECT)
+               rule->fdirflags = IXGBE_FDIRCMD_DROP;
+       rule->queue = fdir_filter->action.rx_queue;
+       rule->soft_id = fdir_filter->soft_id;
+
+       return 0;
+}
+
+int
+ixgbe_fdir_filter_program(struct rte_eth_dev *dev,
+                         struct ixgbe_fdir_rule *rule,
                          bool del,
                          bool update)
 {
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t fdircmd_flags;
        uint32_t fdirhash;
-       union ixgbe_atr_input input;
        uint8_t queue;
        bool is_perfect = FALSE;
        int err;
        struct ixgbe_hw_fdir_info *info =
                IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private);
        enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode;
+       struct ixgbe_fdir_filter *node;
+       bool add_node = FALSE;
 
-       if (fdir_mode == RTE_FDIR_MODE_NONE)
+       if (fdir_mode == RTE_FDIR_MODE_NONE ||
+           fdir_mode != rule->mode)
                return -ENOTSUP;
 
        /*
@@ -1125,7 +1240,7 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev,
            (hw->mac.type == ixgbe_mac_X550 ||
             hw->mac.type == ixgbe_mac_X550EM_x ||
             hw->mac.type == ixgbe_mac_X550EM_a) &&
-           (fdir_filter->input.flow_type ==
+           (rule->ixgbe_fdir.formatted.flow_type ==
             RTE_ETH_FLOW_NONFRAG_IPV4_OTHER) &&
            (info->mask.src_port_mask != 0 ||
             info->mask.dst_port_mask != 0)) {
@@ -1139,31 +1254,26 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev,
            fdir_mode <= RTE_FDIR_MODE_PERFECT_TUNNEL)
                is_perfect = TRUE;
 
-       memset(&input, 0, sizeof(input));
-
-       err = ixgbe_fdir_filter_to_atr_input(fdir_filter, &input,
-                                            fdir_mode);
-       if (err)
-               return err;
-
        if (is_perfect) {
-#ifndef TREX_PATCH
-        // No reason not to use IPV6 in perfect filters. It is working.
-               if (input.formatted.flow_type & IXGBE_ATR_L4TYPE_IPV6_MASK) {
+               if (rule->ixgbe_fdir.formatted.flow_type &
+                   IXGBE_ATR_L4TYPE_IPV6_MASK) {
                        PMD_DRV_LOG(ERR, "IPv6 is not supported in"
                                    " perfect mode!");
                        return -ENOTSUP;
                }
-#endif
-               fdirhash = atr_compute_perfect_hash_82599(&input,
+               fdirhash = atr_compute_perfect_hash_82599(&rule->ixgbe_fdir,
                                                          dev->data->dev_conf.fdir_conf.pballoc);
-               fdirhash |= fdir_filter->soft_id <<
+               fdirhash |= rule->soft_id <<
                        IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
        } else
-               fdirhash = atr_compute_sig_hash_82599(&input,
+               fdirhash = atr_compute_sig_hash_82599(&rule->ixgbe_fdir,
                                                      dev->data->dev_conf.fdir_conf.pballoc);
 
        if (del) {
+               err = ixgbe_remove_fdir_filter(info, &rule->ixgbe_fdir);
+               if (err < 0)
+                       return err;
+
                err = fdir_erase_filter_82599(hw, fdirhash);
                if (err < 0)
                        PMD_DRV_LOG(ERR, "Fail to delete FDIR filter!");
@@ -1173,7 +1283,7 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev,
        }
        /* add or update an fdir filter*/
        fdircmd_flags = (update) ? IXGBE_FDIRCMD_FILTER_UPDATE : 0;
-       if (fdir_filter->action.behavior == RTE_ETH_FDIR_REJECT) {
+       if (rule->fdirflags & IXGBE_FDIRCMD_DROP) {
                if (is_perfect) {
                        queue = dev->data->dev_conf.fdir_conf.drop_queue;
                        fdircmd_flags |= IXGBE_FDIRCMD_DROP;
@@ -1182,28 +1292,86 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev,
                                    " signature mode.");
                        return -EINVAL;
                }
-       } else if (fdir_filter->action.behavior == RTE_ETH_FDIR_ACCEPT &&
-                  fdir_filter->action.rx_queue < IXGBE_MAX_RX_QUEUE_NUM)
-               queue = (uint8_t)fdir_filter->action.rx_queue;
+       } else if (rule->queue < IXGBE_MAX_RX_QUEUE_NUM)
+               queue = (uint8_t)rule->queue;
        else
                return -EINVAL;
 
+       node = ixgbe_fdir_filter_lookup(info, &rule->ixgbe_fdir);
+       if (node) {
+               if (update) {
+                       node->fdirflags = fdircmd_flags;
+                       node->fdirhash = fdirhash;
+                       node->queue = queue;
+               } else {
+                       PMD_DRV_LOG(ERR, "Conflict with existing fdir filter!");
+                       return -EINVAL;
+               }
+       } else {
+               add_node = TRUE;
+               node = rte_zmalloc("ixgbe_fdir",
+                                  sizeof(struct ixgbe_fdir_filter),
+                                  0);
+               if (!node)
+                       return -ENOMEM;
+               (void)rte_memcpy(&node->ixgbe_fdir,
+                                &rule->ixgbe_fdir,
+                                sizeof(union ixgbe_atr_input));
+               node->fdirflags = fdircmd_flags;
+               node->fdirhash = fdirhash;
+               node->queue = queue;
+
+               err = ixgbe_insert_fdir_filter(info, node);
+               if (err < 0) {
+                       rte_free(node);
+                       return err;
+               }
+       }
+
        if (is_perfect) {
-               err = fdir_write_perfect_filter_82599(hw, &input, queue,
-                                                     fdircmd_flags, fdirhash,
-                                                     fdir_mode);
+               err = fdir_write_perfect_filter_82599(hw, &rule->ixgbe_fdir,
+                                                     queue, fdircmd_flags,
+                                                     fdirhash, fdir_mode);
        } else {
-               err = fdir_add_signature_filter_82599(hw, &input, queue,
-                                                     fdircmd_flags, fdirhash);
+               err = fdir_add_signature_filter_82599(hw, &rule->ixgbe_fdir,
+                                                     queue, fdircmd_flags,
+                                                     fdirhash);
        }
-       if (err < 0)
+       if (err < 0) {
                PMD_DRV_LOG(ERR, "Fail to add FDIR filter!");
-       else
+
+               if (add_node)
+                       (void)ixgbe_remove_fdir_filter(info, &rule->ixgbe_fdir);
+       } else {
                PMD_DRV_LOG(DEBUG, "Success to add FDIR filter");
+       }
 
        return err;
 }
 
+/* ixgbe_add_del_fdir_filter - add or remove a flow diretor filter.
+ * @dev: pointer to the structure rte_eth_dev
+ * @fdir_filter: fdir filter entry
+ * @del: 1 - delete, 0 - add
+ * @update: 1 - update
+ */
+static int
+ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev,
+                         const struct rte_eth_fdir_filter *fdir_filter,
+                         bool del,
+                         bool update)
+{
+       struct ixgbe_fdir_rule rule;
+       int err;
+
+       err = ixgbe_interpret_fdir_filter(dev, fdir_filter, &rule);
+
+       if (err)
+               return err;
+
+       return ixgbe_fdir_filter_program(dev, &rule, del, update);
+}
+
 static int
 ixgbe_fdir_flush(struct rte_eth_dev *dev)
 {
@@ -1394,3 +1562,66 @@ ixgbe_fdir_ctrl_func(struct rte_eth_dev *dev,
        }
        return ret;
 }
+
+/* restore flow director filter */
+void
+ixgbe_fdir_filter_restore(struct rte_eth_dev *dev)
+{
+       struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct ixgbe_hw_fdir_info *fdir_info =
+               IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private);
+       struct ixgbe_fdir_filter *node;
+       bool is_perfect = FALSE;
+       enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode;
+
+       if (fdir_mode >= RTE_FDIR_MODE_PERFECT &&
+           fdir_mode <= RTE_FDIR_MODE_PERFECT_TUNNEL)
+               is_perfect = TRUE;
+
+       if (is_perfect) {
+               TAILQ_FOREACH(node, &fdir_info->fdir_list, entries) {
+                       (void)fdir_write_perfect_filter_82599(hw,
+                                                             &node->ixgbe_fdir,
+                                                             node->queue,
+                                                             node->fdirflags,
+                                                             node->fdirhash,
+                                                             fdir_mode);
+               }
+       } else {
+               TAILQ_FOREACH(node, &fdir_info->fdir_list, entries) {
+                       (void)fdir_add_signature_filter_82599(hw,
+                                                             &node->ixgbe_fdir,
+                                                             node->queue,
+                                                             node->fdirflags,
+                                                             node->fdirhash);
+               }
+       }
+}
+
+/* remove all the flow director filters */
+int
+ixgbe_clear_all_fdir_filter(struct rte_eth_dev *dev)
+{
+       struct ixgbe_hw_fdir_info *fdir_info =
+               IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private);
+       struct ixgbe_fdir_filter *fdir_filter;
+       struct ixgbe_fdir_filter *filter_flag;
+       int ret = 0;
+
+       /* flush flow director */
+       rte_hash_reset(fdir_info->hash_handle);
+       memset(fdir_info->hash_map, 0,
+              sizeof(struct ixgbe_fdir_filter *) * IXGBE_MAX_FDIR_FILTER_NUM);
+       filter_flag = TAILQ_FIRST(&fdir_info->fdir_list);
+       while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list))) {
+               TAILQ_REMOVE(&fdir_info->fdir_list,
+                            fdir_filter,
+                            entries);
+               rte_free(fdir_filter);
+       }
+
+       if (filter_flag != NULL)
+               ret = ixgbe_fdir_flush(dev);
+
+       return ret;
+}
diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_flow.c b/src/dpdk/drivers/net/ixgbe/ixgbe_flow.c
new file mode 100644 (file)
index 0000000..82aceed
--- /dev/null
@@ -0,0 +1,2878 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <netinet/in.h>
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+
+#include <rte_interrupts.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_pci.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_alarm.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_atomic.h>
+#include <rte_malloc.h>
+#include <rte_random.h>
+#include <rte_dev.h>
+#include <rte_hash_crc.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
+#include "ixgbe_logs.h"
+#include "base/ixgbe_api.h"
+#include "base/ixgbe_vf.h"
+#include "base/ixgbe_common.h"
+#include "ixgbe_ethdev.h"
+#include "ixgbe_bypass.h"
+#include "ixgbe_rxtx.h"
+#include "base/ixgbe_type.h"
+#include "base/ixgbe_phy.h"
+#include "rte_pmd_ixgbe.h"
+
+static int ixgbe_flow_flush(struct rte_eth_dev *dev,
+               struct rte_flow_error *error);
+static int
+cons_parse_ntuple_filter(const struct rte_flow_attr *attr,
+                                       const struct rte_flow_item pattern[],
+                                       const struct rte_flow_action actions[],
+                                       struct rte_eth_ntuple_filter *filter,
+                                       struct rte_flow_error *error);
+static int
+ixgbe_parse_ntuple_filter(const struct rte_flow_attr *attr,
+                                       const struct rte_flow_item pattern[],
+                                       const struct rte_flow_action actions[],
+                                       struct rte_eth_ntuple_filter *filter,
+                                       struct rte_flow_error *error);
+static int
+cons_parse_ethertype_filter(const struct rte_flow_attr *attr,
+                           const struct rte_flow_item *pattern,
+                           const struct rte_flow_action *actions,
+                           struct rte_eth_ethertype_filter *filter,
+                           struct rte_flow_error *error);
+static int
+ixgbe_parse_ethertype_filter(const struct rte_flow_attr *attr,
+                               const struct rte_flow_item pattern[],
+                               const struct rte_flow_action actions[],
+                               struct rte_eth_ethertype_filter *filter,
+                               struct rte_flow_error *error);
+static int
+cons_parse_syn_filter(const struct rte_flow_attr *attr,
+               const struct rte_flow_item pattern[],
+               const struct rte_flow_action actions[],
+               struct rte_eth_syn_filter *filter,
+               struct rte_flow_error *error);
+static int
+ixgbe_parse_syn_filter(const struct rte_flow_attr *attr,
+                               const struct rte_flow_item pattern[],
+                               const struct rte_flow_action actions[],
+                               struct rte_eth_syn_filter *filter,
+                               struct rte_flow_error *error);
+static int
+cons_parse_l2_tn_filter(const struct rte_flow_attr *attr,
+               const struct rte_flow_item pattern[],
+               const struct rte_flow_action actions[],
+               struct rte_eth_l2_tunnel_conf *filter,
+               struct rte_flow_error *error);
+static int
+ixgbe_validate_l2_tn_filter(struct rte_eth_dev *dev,
+                       const struct rte_flow_attr *attr,
+                       const struct rte_flow_item pattern[],
+                       const struct rte_flow_action actions[],
+                       struct rte_eth_l2_tunnel_conf *rule,
+                       struct rte_flow_error *error);
+static int
+ixgbe_validate_fdir_filter(struct rte_eth_dev *dev,
+                       const struct rte_flow_attr *attr,
+                       const struct rte_flow_item pattern[],
+                       const struct rte_flow_action actions[],
+                       struct ixgbe_fdir_rule *rule,
+                       struct rte_flow_error *error);
+static int
+ixgbe_parse_fdir_filter_normal(const struct rte_flow_attr *attr,
+               const struct rte_flow_item pattern[],
+               const struct rte_flow_action actions[],
+               struct ixgbe_fdir_rule *rule,
+               struct rte_flow_error *error);
+static int
+ixgbe_parse_fdir_filter_tunnel(const struct rte_flow_attr *attr,
+               const struct rte_flow_item pattern[],
+               const struct rte_flow_action actions[],
+               struct ixgbe_fdir_rule *rule,
+               struct rte_flow_error *error);
+static int
+ixgbe_parse_fdir_filter(const struct rte_flow_attr *attr,
+               const struct rte_flow_item pattern[],
+               const struct rte_flow_action actions[],
+               struct ixgbe_fdir_rule *rule,
+               struct rte_flow_error *error);
+static int
+ixgbe_flow_validate(__rte_unused struct rte_eth_dev *dev,
+               const struct rte_flow_attr *attr,
+               const struct rte_flow_item pattern[],
+               const struct rte_flow_action actions[],
+               struct rte_flow_error *error);
+static struct rte_flow *ixgbe_flow_create(struct rte_eth_dev *dev,
+               const struct rte_flow_attr *attr,
+               const struct rte_flow_item pattern[],
+               const struct rte_flow_action actions[],
+               struct rte_flow_error *error);
+static int ixgbe_flow_destroy(struct rte_eth_dev *dev,
+               struct rte_flow *flow,
+               struct rte_flow_error *error);
+
+const struct rte_flow_ops ixgbe_flow_ops = {
+       ixgbe_flow_validate,
+       ixgbe_flow_create,
+       ixgbe_flow_destroy,
+       ixgbe_flow_flush,
+       NULL,
+};
+
+#define IXGBE_MIN_N_TUPLE_PRIO 1
+#define IXGBE_MAX_N_TUPLE_PRIO 7
+#define NEXT_ITEM_OF_PATTERN(item, pattern, index)\
+       do {            \
+               item = pattern + index;\
+               while (item->type == RTE_FLOW_ITEM_TYPE_VOID) {\
+               index++;                                \
+               item = pattern + index;         \
+               }                                               \
+       } while (0)
+
+#define NEXT_ITEM_OF_ACTION(act, actions, index)\
+       do {                                                            \
+               act = actions + index;                                  \
+               while (act->type == RTE_FLOW_ACTION_TYPE_VOID) {\
+               index++;                                        \
+               act = actions + index;                          \
+               }                                                       \
+       } while (0)
+
+/**
+ * Please aware there's an asumption for all the parsers.
+ * rte_flow_item is using big endian, rte_flow_attr and
+ * rte_flow_action are using CPU order.
+ * Because the pattern is used to describe the packets,
+ * normally the packets should use network order.
+ */
+
+/**
+ * Parse the rule to see if it is a n-tuple rule.
+ * And get the n-tuple filter info BTW.
+ * pattern:
+ * The first not void item can be ETH or IPV4.
+ * The second not void item must be IPV4 if the first one is ETH.
+ * The third not void item must be UDP or TCP.
+ * The next not void item must be END.
+ * action:
+ * The first not void action should be QUEUE.
+ * The next not void action should be END.
+ * pattern example:
+ * ITEM                Spec                    Mask
+ * ETH         NULL                    NULL
+ * IPV4                src_addr 192.168.1.20   0xFFFFFFFF
+ *             dst_addr 192.167.3.50   0xFFFFFFFF
+ *             next_proto_id   17      0xFF
+ * UDP/TCP     src_port        80      0xFFFF
+ *             dst_port        80      0xFFFF
+ * END
+ * other members in mask and spec should set to 0x00.
+ * item->last should be NULL.
+ */
+static int
+cons_parse_ntuple_filter(const struct rte_flow_attr *attr,
+                        const struct rte_flow_item pattern[],
+                        const struct rte_flow_action actions[],
+                        struct rte_eth_ntuple_filter *filter,
+                        struct rte_flow_error *error)
+{
+       const struct rte_flow_item *item;
+       const struct rte_flow_action *act;
+       const struct rte_flow_item_ipv4 *ipv4_spec;
+       const struct rte_flow_item_ipv4 *ipv4_mask;
+       const struct rte_flow_item_tcp *tcp_spec;
+       const struct rte_flow_item_tcp *tcp_mask;
+       const struct rte_flow_item_udp *udp_spec;
+       const struct rte_flow_item_udp *udp_mask;
+       uint32_t index;
+
+       if (!pattern) {
+               rte_flow_error_set(error,
+                       EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+                       NULL, "NULL pattern.");
+               return -rte_errno;
+       }
+
+       if (!actions) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ACTION_NUM,
+                                  NULL, "NULL action.");
+               return -rte_errno;
+       }
+       if (!attr) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR,
+                                  NULL, "NULL attribute.");
+               return -rte_errno;
+       }
+
+       /* parse pattern */
+       index = 0;
+
+       /* the first not void item can be MAC or IPv4 */
+       NEXT_ITEM_OF_PATTERN(item, pattern, index);
+
+       if (item->type != RTE_FLOW_ITEM_TYPE_ETH &&
+           item->type != RTE_FLOW_ITEM_TYPE_IPV4) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by ntuple filter");
+               return -rte_errno;
+       }
+       /* Skip Ethernet */
+       if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
+               /*Not supported last point for range*/
+               if (item->last) {
+                       rte_flow_error_set(error,
+                         EINVAL,
+                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                         item, "Not supported last point for range");
+                       return -rte_errno;
+
+               }
+               /* if the first item is MAC, the content should be NULL */
+               if (item->spec || item->mask) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by ntuple filter");
+                       return -rte_errno;
+               }
+               /* check if the next not void item is IPv4 */
+               index++;
+               NEXT_ITEM_OF_PATTERN(item, pattern, index);
+               if (item->type != RTE_FLOW_ITEM_TYPE_IPV4) {
+                       rte_flow_error_set(error,
+                         EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
+                         item, "Not supported by ntuple filter");
+                         return -rte_errno;
+               }
+       }
+
+       /* get the IPv4 info */
+       if (!item->spec || !item->mask) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Invalid ntuple mask");
+               return -rte_errno;
+       }
+       /*Not supported last point for range*/
+       if (item->last) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       item, "Not supported last point for range");
+               return -rte_errno;
+
+       }
+
+       ipv4_mask = (const struct rte_flow_item_ipv4 *)item->mask;
+       /**
+        * Only support src & dst addresses, protocol,
+        * others should be masked.
+        */
+       if (ipv4_mask->hdr.version_ihl ||
+           ipv4_mask->hdr.type_of_service ||
+           ipv4_mask->hdr.total_length ||
+           ipv4_mask->hdr.packet_id ||
+           ipv4_mask->hdr.fragment_offset ||
+           ipv4_mask->hdr.time_to_live ||
+           ipv4_mask->hdr.hdr_checksum) {
+                       rte_flow_error_set(error,
+                       EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by ntuple filter");
+               return -rte_errno;
+       }
+
+       filter->dst_ip_mask = ipv4_mask->hdr.dst_addr;
+       filter->src_ip_mask = ipv4_mask->hdr.src_addr;
+       filter->proto_mask  = ipv4_mask->hdr.next_proto_id;
+
+       ipv4_spec = (const struct rte_flow_item_ipv4 *)item->spec;
+       filter->dst_ip = ipv4_spec->hdr.dst_addr;
+       filter->src_ip = ipv4_spec->hdr.src_addr;
+       filter->proto  = ipv4_spec->hdr.next_proto_id;
+
+       /* check if the next not void item is TCP or UDP */
+       index++;
+       NEXT_ITEM_OF_PATTERN(item, pattern, index);
+       if (item->type != RTE_FLOW_ITEM_TYPE_TCP &&
+           item->type != RTE_FLOW_ITEM_TYPE_UDP) {
+               memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by ntuple filter");
+               return -rte_errno;
+       }
+
+       /* get the TCP/UDP info */
+       if (!item->spec || !item->mask) {
+               memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Invalid ntuple mask");
+               return -rte_errno;
+       }
+
+       /*Not supported last point for range*/
+       if (item->last) {
+               memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       item, "Not supported last point for range");
+               return -rte_errno;
+
+       }
+
+       if (item->type == RTE_FLOW_ITEM_TYPE_TCP) {
+               tcp_mask = (const struct rte_flow_item_tcp *)item->mask;
+
+               /**
+                * Only support src & dst ports, tcp flags,
+                * others should be masked.
+                */
+               if (tcp_mask->hdr.sent_seq ||
+                   tcp_mask->hdr.recv_ack ||
+                   tcp_mask->hdr.data_off ||
+                   tcp_mask->hdr.rx_win ||
+                   tcp_mask->hdr.cksum ||
+                   tcp_mask->hdr.tcp_urp) {
+                       memset(filter, 0,
+                               sizeof(struct rte_eth_ntuple_filter));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by ntuple filter");
+                       return -rte_errno;
+               }
+
+               filter->dst_port_mask  = tcp_mask->hdr.dst_port;
+               filter->src_port_mask  = tcp_mask->hdr.src_port;
+               if (tcp_mask->hdr.tcp_flags == 0xFF) {
+                       filter->flags |= RTE_NTUPLE_FLAGS_TCP_FLAG;
+               } else if (!tcp_mask->hdr.tcp_flags) {
+                       filter->flags &= ~RTE_NTUPLE_FLAGS_TCP_FLAG;
+               } else {
+                       memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by ntuple filter");
+                       return -rte_errno;
+               }
+
+               tcp_spec = (const struct rte_flow_item_tcp *)item->spec;
+               filter->dst_port  = tcp_spec->hdr.dst_port;
+               filter->src_port  = tcp_spec->hdr.src_port;
+               filter->tcp_flags = tcp_spec->hdr.tcp_flags;
+       } else {
+               udp_mask = (const struct rte_flow_item_udp *)item->mask;
+
+               /**
+                * Only support src & dst ports,
+                * others should be masked.
+                */
+               if (udp_mask->hdr.dgram_len ||
+                   udp_mask->hdr.dgram_cksum) {
+                       memset(filter, 0,
+                               sizeof(struct rte_eth_ntuple_filter));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by ntuple filter");
+                       return -rte_errno;
+               }
+
+               filter->dst_port_mask = udp_mask->hdr.dst_port;
+               filter->src_port_mask = udp_mask->hdr.src_port;
+
+               udp_spec = (const struct rte_flow_item_udp *)item->spec;
+               filter->dst_port = udp_spec->hdr.dst_port;
+               filter->src_port = udp_spec->hdr.src_port;
+       }
+
+       /* check if the next not void item is END */
+       index++;
+       NEXT_ITEM_OF_PATTERN(item, pattern, index);
+       if (item->type != RTE_FLOW_ITEM_TYPE_END) {
+               memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by ntuple filter");
+               return -rte_errno;
+       }
+
+       /* parse action */
+       index = 0;
+
+       /**
+        * n-tuple only supports forwarding,
+        * check if the first not void action is QUEUE.
+        */
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) {
+               memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ACTION,
+                       item, "Not supported action.");
+               return -rte_errno;
+       }
+       filter->queue =
+               ((const struct rte_flow_action_queue *)act->conf)->index;
+
+       /* check if the next not void item is END */
+       index++;
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+               memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ACTION,
+                       act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       /* parse attr */
+       /* must be input direction */
+       if (!attr->ingress) {
+               memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                                  attr, "Only support ingress.");
+               return -rte_errno;
+       }
+
+       /* not supported */
+       if (attr->egress) {
+               memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+                                  attr, "Not support egress.");
+               return -rte_errno;
+       }
+
+       if (attr->priority > 0xFFFF) {
+               memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+                                  attr, "Error priority.");
+               return -rte_errno;
+       }
+       filter->priority = (uint16_t)attr->priority;
+       if (attr->priority < IXGBE_MIN_N_TUPLE_PRIO ||
+           attr->priority > IXGBE_MAX_N_TUPLE_PRIO)
+           filter->priority = 1;
+
+       return 0;
+}
+
+/* a specific function for ixgbe because the flags is specific */
+static int
+ixgbe_parse_ntuple_filter(const struct rte_flow_attr *attr,
+                         const struct rte_flow_item pattern[],
+                         const struct rte_flow_action actions[],
+                         struct rte_eth_ntuple_filter *filter,
+                         struct rte_flow_error *error)
+{
+       int ret;
+
+       ret = cons_parse_ntuple_filter(attr, pattern, actions, filter, error);
+
+       if (ret)
+               return ret;
+
+       /* Ixgbe doesn't support tcp flags. */
+       if (filter->flags & RTE_NTUPLE_FLAGS_TCP_FLAG) {
+               memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                  NULL, "Not supported by ntuple filter");
+               return -rte_errno;
+       }
+
+       /* Ixgbe doesn't support many priorities. */
+       if (filter->priority < IXGBE_MIN_N_TUPLE_PRIO ||
+           filter->priority > IXGBE_MAX_N_TUPLE_PRIO) {
+               memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       NULL, "Priority not supported by ntuple filter");
+               return -rte_errno;
+       }
+
+       if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM ||
+               filter->priority > IXGBE_5TUPLE_MAX_PRI ||
+               filter->priority < IXGBE_5TUPLE_MIN_PRI)
+               return -rte_errno;
+
+       /* fixed value for ixgbe */
+       filter->flags = RTE_5TUPLE_FLAGS;
+       return 0;
+}
+
+/**
+ * Parse the rule to see if it is a ethertype rule.
+ * And get the ethertype filter info BTW.
+ * pattern:
+ * The first not void item can be ETH.
+ * The next not void item must be END.
+ * action:
+ * The first not void action should be QUEUE.
+ * The next not void action should be END.
+ * pattern example:
+ * ITEM                Spec                    Mask
+ * ETH         type    0x0807          0xFFFF
+ * END
+ * other members in mask and spec should set to 0x00.
+ * item->last should be NULL.
+ */
+static int
+cons_parse_ethertype_filter(const struct rte_flow_attr *attr,
+                           const struct rte_flow_item *pattern,
+                           const struct rte_flow_action *actions,
+                           struct rte_eth_ethertype_filter *filter,
+                           struct rte_flow_error *error)
+{
+       const struct rte_flow_item *item;
+       const struct rte_flow_action *act;
+       const struct rte_flow_item_eth *eth_spec;
+       const struct rte_flow_item_eth *eth_mask;
+       const struct rte_flow_action_queue *act_q;
+       uint32_t index;
+
+       if (!pattern) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+                               NULL, "NULL pattern.");
+               return -rte_errno;
+       }
+
+       if (!actions) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ACTION_NUM,
+                               NULL, "NULL action.");
+               return -rte_errno;
+       }
+
+       if (!attr) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR,
+                                  NULL, "NULL attribute.");
+               return -rte_errno;
+       }
+
+       /* Parse pattern */
+       index = 0;
+
+       /* The first non-void item should be MAC. */
+       item = pattern + index;
+       while (item->type == RTE_FLOW_ITEM_TYPE_VOID) {
+               index++;
+               item = pattern + index;
+       }
+       if (item->type != RTE_FLOW_ITEM_TYPE_ETH) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by ethertype filter");
+               return -rte_errno;
+       }
+
+       /*Not supported last point for range*/
+       if (item->last) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       item, "Not supported last point for range");
+               return -rte_errno;
+       }
+
+       /* Get the MAC info. */
+       if (!item->spec || !item->mask) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by ethertype filter");
+               return -rte_errno;
+       }
+
+       eth_spec = (const struct rte_flow_item_eth *)item->spec;
+       eth_mask = (const struct rte_flow_item_eth *)item->mask;
+
+       /* Mask bits of source MAC address must be full of 0.
+        * Mask bits of destination MAC address must be full
+        * of 1 or full of 0.
+        */
+       if (!is_zero_ether_addr(&eth_mask->src) ||
+           (!is_zero_ether_addr(&eth_mask->dst) &&
+            !is_broadcast_ether_addr(&eth_mask->dst))) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Invalid ether address mask");
+               return -rte_errno;
+       }
+
+       if ((eth_mask->type & UINT16_MAX) != UINT16_MAX) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Invalid ethertype mask");
+               return -rte_errno;
+       }
+
+       /* If mask bits of destination MAC address
+        * are full of 1, set RTE_ETHTYPE_FLAGS_MAC.
+        */
+       if (is_broadcast_ether_addr(&eth_mask->dst)) {
+               filter->mac_addr = eth_spec->dst;
+               filter->flags |= RTE_ETHTYPE_FLAGS_MAC;
+       } else {
+               filter->flags &= ~RTE_ETHTYPE_FLAGS_MAC;
+       }
+       filter->ether_type = rte_be_to_cpu_16(eth_spec->type);
+
+       /* Check if the next non-void item is END. */
+       index++;
+       item = pattern + index;
+       while (item->type == RTE_FLOW_ITEM_TYPE_VOID) {
+               index++;
+               item = pattern + index;
+       }
+       if (item->type != RTE_FLOW_ITEM_TYPE_END) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by ethertype filter.");
+               return -rte_errno;
+       }
+
+       /* Parse action */
+
+       index = 0;
+       /* Check if the first non-void action is QUEUE or DROP. */
+       act = actions + index;
+       while (act->type == RTE_FLOW_ACTION_TYPE_VOID) {
+               index++;
+               act = actions + index;
+       }
+       if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE &&
+           act->type != RTE_FLOW_ACTION_TYPE_DROP) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ACTION,
+                               act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+               act_q = (const struct rte_flow_action_queue *)act->conf;
+               filter->queue = act_q->index;
+       } else {
+               filter->flags |= RTE_ETHTYPE_FLAGS_DROP;
+       }
+
+       /* Check if the next non-void item is END */
+       index++;
+       act = actions + index;
+       while (act->type == RTE_FLOW_ACTION_TYPE_VOID) {
+               index++;
+               act = actions + index;
+       }
+       if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ACTION,
+                               act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       /* Parse attr */
+       /* Must be input direction */
+       if (!attr->ingress) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                               attr, "Only support ingress.");
+               return -rte_errno;
+       }
+
+       /* Not supported */
+       if (attr->egress) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+                               attr, "Not support egress.");
+               return -rte_errno;
+       }
+
+       /* Not supported */
+       if (attr->priority) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+                               attr, "Not support priority.");
+               return -rte_errno;
+       }
+
+       /* Not supported */
+       if (attr->group) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+                               attr, "Not support group.");
+               return -rte_errno;
+       }
+
+       return 0;
+}
+
+static int
+ixgbe_parse_ethertype_filter(const struct rte_flow_attr *attr,
+                            const struct rte_flow_item pattern[],
+                            const struct rte_flow_action actions[],
+                            struct rte_eth_ethertype_filter *filter,
+                            struct rte_flow_error *error)
+{
+       int ret;
+
+       ret = cons_parse_ethertype_filter(attr, pattern,
+                                       actions, filter, error);
+
+       if (ret)
+               return ret;
+
+       /* Ixgbe doesn't support MAC address. */
+       if (filter->flags & RTE_ETHTYPE_FLAGS_MAC) {
+               memset(filter, 0, sizeof(struct rte_eth_ethertype_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       NULL, "Not supported by ethertype filter");
+               return -rte_errno;
+       }
+
+       if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM) {
+               memset(filter, 0, sizeof(struct rte_eth_ethertype_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       NULL, "queue index much too big");
+               return -rte_errno;
+       }
+
+       if (filter->ether_type == ETHER_TYPE_IPv4 ||
+               filter->ether_type == ETHER_TYPE_IPv6) {
+               memset(filter, 0, sizeof(struct rte_eth_ethertype_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       NULL, "IPv4/IPv6 not supported by ethertype filter");
+               return -rte_errno;
+       }
+
+       if (filter->flags & RTE_ETHTYPE_FLAGS_MAC) {
+               memset(filter, 0, sizeof(struct rte_eth_ethertype_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       NULL, "mac compare is unsupported");
+               return -rte_errno;
+       }
+
+       if (filter->flags & RTE_ETHTYPE_FLAGS_DROP) {
+               memset(filter, 0, sizeof(struct rte_eth_ethertype_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       NULL, "drop option is unsupported");
+               return -rte_errno;
+       }
+
+       return 0;
+}
+
+/**
+ * Parse the rule to see if it is a TCP SYN rule.
+ * And get the TCP SYN filter info BTW.
+ * pattern:
+ * The first not void item must be ETH.
+ * The second not void item must be IPV4 or IPV6.
+ * The third not void item must be TCP.
+ * The next not void item must be END.
+ * action:
+ * The first not void action should be QUEUE.
+ * The next not void action should be END.
+ * pattern example:
+ * ITEM                Spec                    Mask
+ * ETH         NULL                    NULL
+ * IPV4/IPV6   NULL                    NULL
+ * TCP         tcp_flags       0x02    0xFF
+ * END
+ * other members in mask and spec should set to 0x00.
+ * item->last should be NULL.
+ */
+static int
+cons_parse_syn_filter(const struct rte_flow_attr *attr,
+                               const struct rte_flow_item pattern[],
+                               const struct rte_flow_action actions[],
+                               struct rte_eth_syn_filter *filter,
+                               struct rte_flow_error *error)
+{
+       const struct rte_flow_item *item;
+       const struct rte_flow_action *act;
+       const struct rte_flow_item_tcp *tcp_spec;
+       const struct rte_flow_item_tcp *tcp_mask;
+       const struct rte_flow_action_queue *act_q;
+       uint32_t index;
+
+       if (!pattern) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+                               NULL, "NULL pattern.");
+               return -rte_errno;
+       }
+
+       if (!actions) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ACTION_NUM,
+                               NULL, "NULL action.");
+               return -rte_errno;
+       }
+
+       if (!attr) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR,
+                                  NULL, "NULL attribute.");
+               return -rte_errno;
+       }
+
+       /* parse pattern */
+       index = 0;
+
+       /* the first not void item should be MAC or IPv4 or IPv6 or TCP */
+       NEXT_ITEM_OF_PATTERN(item, pattern, index);
+       if (item->type != RTE_FLOW_ITEM_TYPE_ETH &&
+           item->type != RTE_FLOW_ITEM_TYPE_IPV4 &&
+           item->type != RTE_FLOW_ITEM_TYPE_IPV6 &&
+           item->type != RTE_FLOW_ITEM_TYPE_TCP) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by syn filter");
+               return -rte_errno;
+       }
+               /*Not supported last point for range*/
+       if (item->last) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       item, "Not supported last point for range");
+               return -rte_errno;
+       }
+
+       /* Skip Ethernet */
+       if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
+               /* if the item is MAC, the content should be NULL */
+               if (item->spec || item->mask) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Invalid SYN address mask");
+                       return -rte_errno;
+               }
+
+               /* check if the next not void item is IPv4 or IPv6 */
+               index++;
+               NEXT_ITEM_OF_PATTERN(item, pattern, index);
+               if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 &&
+                   item->type != RTE_FLOW_ITEM_TYPE_IPV6) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by syn filter");
+                       return -rte_errno;
+               }
+       }
+
+       /* Skip IP */
+       if (item->type == RTE_FLOW_ITEM_TYPE_IPV4 ||
+           item->type == RTE_FLOW_ITEM_TYPE_IPV6) {
+               /* if the item is IP, the content should be NULL */
+               if (item->spec || item->mask) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Invalid SYN mask");
+                       return -rte_errno;
+               }
+
+               /* check if the next not void item is TCP */
+               index++;
+               NEXT_ITEM_OF_PATTERN(item, pattern, index);
+               if (item->type != RTE_FLOW_ITEM_TYPE_TCP) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by syn filter");
+                       return -rte_errno;
+               }
+       }
+
+       /* Get the TCP info. Only support SYN. */
+       if (!item->spec || !item->mask) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Invalid SYN mask");
+               return -rte_errno;
+       }
+       /*Not supported last point for range*/
+       if (item->last) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       item, "Not supported last point for range");
+               return -rte_errno;
+       }
+
+       tcp_spec = (const struct rte_flow_item_tcp *)item->spec;
+       tcp_mask = (const struct rte_flow_item_tcp *)item->mask;
+       if (!(tcp_spec->hdr.tcp_flags & TCP_SYN_FLAG) ||
+           tcp_mask->hdr.src_port ||
+           tcp_mask->hdr.dst_port ||
+           tcp_mask->hdr.sent_seq ||
+           tcp_mask->hdr.recv_ack ||
+           tcp_mask->hdr.data_off ||
+           tcp_mask->hdr.tcp_flags != TCP_SYN_FLAG ||
+           tcp_mask->hdr.rx_win ||
+           tcp_mask->hdr.cksum ||
+           tcp_mask->hdr.tcp_urp) {
+               memset(filter, 0, sizeof(struct rte_eth_syn_filter));
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by syn filter");
+               return -rte_errno;
+       }
+
+       /* check if the next not void item is END */
+       index++;
+       NEXT_ITEM_OF_PATTERN(item, pattern, index);
+       if (item->type != RTE_FLOW_ITEM_TYPE_END) {
+               memset(filter, 0, sizeof(struct rte_eth_syn_filter));
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by syn filter");
+               return -rte_errno;
+       }
+
+       /* parse action */
+       index = 0;
+
+       /* check if the first not void action is QUEUE. */
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) {
+               memset(filter, 0, sizeof(struct rte_eth_syn_filter));
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ACTION,
+                               act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       act_q = (const struct rte_flow_action_queue *)act->conf;
+       filter->queue = act_q->index;
+       if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM) {
+               memset(filter, 0, sizeof(struct rte_eth_syn_filter));
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ACTION,
+                               act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       /* check if the next not void item is END */
+       index++;
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+               memset(filter, 0, sizeof(struct rte_eth_syn_filter));
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ACTION,
+                               act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       /* parse attr */
+       /* must be input direction */
+       if (!attr->ingress) {
+               memset(filter, 0, sizeof(struct rte_eth_syn_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                       attr, "Only support ingress.");
+               return -rte_errno;
+       }
+
+       /* not supported */
+       if (attr->egress) {
+               memset(filter, 0, sizeof(struct rte_eth_syn_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+                       attr, "Not support egress.");
+               return -rte_errno;
+       }
+
+       /* Support 2 priorities, the lowest or highest. */
+       if (!attr->priority) {
+               filter->hig_pri = 0;
+       } else if (attr->priority == (uint32_t)~0U) {
+               filter->hig_pri = 1;
+       } else {
+               memset(filter, 0, sizeof(struct rte_eth_syn_filter));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+                       attr, "Not support priority.");
+               return -rte_errno;
+       }
+
+       return 0;
+}
+
+static int
+ixgbe_parse_syn_filter(const struct rte_flow_attr *attr,
+                            const struct rte_flow_item pattern[],
+                            const struct rte_flow_action actions[],
+                            struct rte_eth_syn_filter *filter,
+                            struct rte_flow_error *error)
+{
+       int ret;
+
+       ret = cons_parse_syn_filter(attr, pattern,
+                                       actions, filter, error);
+
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+/**
+ * Parse the rule to see if it is a L2 tunnel rule.
+ * And get the L2 tunnel filter info BTW.
+ * Only support E-tag now.
+ * pattern:
+ * The first not void item can be E_TAG.
+ * The next not void item must be END.
+ * action:
+ * The first not void action should be QUEUE.
+ * The next not void action should be END.
+ * pattern example:
+ * ITEM                Spec                    Mask
+ * E_TAG       grp             0x1     0x3
+               e_cid_base      0x309   0xFFF
+ * END
+ * other members in mask and spec should set to 0x00.
+ * item->last should be NULL.
+ */
+static int
+cons_parse_l2_tn_filter(const struct rte_flow_attr *attr,
+                       const struct rte_flow_item pattern[],
+                       const struct rte_flow_action actions[],
+                       struct rte_eth_l2_tunnel_conf *filter,
+                       struct rte_flow_error *error)
+{
+       const struct rte_flow_item *item;
+       const struct rte_flow_item_e_tag *e_tag_spec;
+       const struct rte_flow_item_e_tag *e_tag_mask;
+       const struct rte_flow_action *act;
+       const struct rte_flow_action_queue *act_q;
+       uint32_t index;
+
+       if (!pattern) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+                       NULL, "NULL pattern.");
+               return -rte_errno;
+       }
+
+       if (!actions) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ACTION_NUM,
+                                  NULL, "NULL action.");
+               return -rte_errno;
+       }
+
+       if (!attr) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR,
+                                  NULL, "NULL attribute.");
+               return -rte_errno;
+       }
+       /* parse pattern */
+       index = 0;
+
+       /* The first not void item should be e-tag. */
+       NEXT_ITEM_OF_PATTERN(item, pattern, index);
+       if (item->type != RTE_FLOW_ITEM_TYPE_E_TAG) {
+               memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by L2 tunnel filter");
+               return -rte_errno;
+       }
+
+       if (!item->spec || !item->mask) {
+               memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
+               rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by L2 tunnel filter");
+               return -rte_errno;
+       }
+
+       /*Not supported last point for range*/
+       if (item->last) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       item, "Not supported last point for range");
+               return -rte_errno;
+       }
+
+       e_tag_spec = (const struct rte_flow_item_e_tag *)item->spec;
+       e_tag_mask = (const struct rte_flow_item_e_tag *)item->mask;
+
+       /* Only care about GRP and E cid base. */
+       if (e_tag_mask->epcp_edei_in_ecid_b ||
+           e_tag_mask->in_ecid_e ||
+           e_tag_mask->ecid_e ||
+           e_tag_mask->rsvd_grp_ecid_b != rte_cpu_to_be_16(0x3FFF)) {
+               memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by L2 tunnel filter");
+               return -rte_errno;
+       }
+
+       filter->l2_tunnel_type = RTE_L2_TUNNEL_TYPE_E_TAG;
+       /**
+        * grp and e_cid_base are bit fields and only use 14 bits.
+        * e-tag id is taken as little endian by HW.
+        */
+       filter->tunnel_id = rte_be_to_cpu_16(e_tag_spec->rsvd_grp_ecid_b);
+
+       /* check if the next not void item is END */
+       index++;
+       NEXT_ITEM_OF_PATTERN(item, pattern, index);
+       if (item->type != RTE_FLOW_ITEM_TYPE_END) {
+               memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by L2 tunnel filter");
+               return -rte_errno;
+       }
+
+       /* parse attr */
+       /* must be input direction */
+       if (!attr->ingress) {
+               memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                       attr, "Only support ingress.");
+               return -rte_errno;
+       }
+
+       /* not supported */
+       if (attr->egress) {
+               memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+                       attr, "Not support egress.");
+               return -rte_errno;
+       }
+
+       /* not supported */
+       if (attr->priority) {
+               memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+                       attr, "Not support priority.");
+               return -rte_errno;
+       }
+
+       /* parse action */
+       index = 0;
+
+       /* check if the first not void action is QUEUE. */
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) {
+               memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ACTION,
+                       act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       act_q = (const struct rte_flow_action_queue *)act->conf;
+       filter->pool = act_q->index;
+
+       /* check if the next not void item is END */
+       index++;
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+               memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ACTION,
+                       act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       return 0;
+}
+
+static int
+ixgbe_validate_l2_tn_filter(struct rte_eth_dev *dev,
+                       const struct rte_flow_attr *attr,
+                       const struct rte_flow_item pattern[],
+                       const struct rte_flow_action actions[],
+                       struct rte_eth_l2_tunnel_conf *l2_tn_filter,
+                       struct rte_flow_error *error)
+{
+       int ret = 0;
+       struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       ret = cons_parse_l2_tn_filter(attr, pattern,
+                               actions, l2_tn_filter, error);
+
+       if (hw->mac.type != ixgbe_mac_X550 &&
+               hw->mac.type != ixgbe_mac_X550EM_x &&
+               hw->mac.type != ixgbe_mac_X550EM_a) {
+               memset(l2_tn_filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       NULL, "Not supported by L2 tunnel filter");
+               return -rte_errno;
+       }
+
+       return ret;
+}
+
+/* Parse to get the attr and action info of flow director rule. */
+static int
+ixgbe_parse_fdir_act_attr(const struct rte_flow_attr *attr,
+                         const struct rte_flow_action actions[],
+                         struct ixgbe_fdir_rule *rule,
+                         struct rte_flow_error *error)
+{
+       const struct rte_flow_action *act;
+       const struct rte_flow_action_queue *act_q;
+       const struct rte_flow_action_mark *mark;
+       uint32_t index;
+
+       /* parse attr */
+       /* must be input direction */
+       if (!attr->ingress) {
+               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                       attr, "Only support ingress.");
+               return -rte_errno;
+       }
+
+       /* not supported */
+       if (attr->egress) {
+               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+                       attr, "Not support egress.");
+               return -rte_errno;
+       }
+
+       /* not supported */
+       if (attr->priority) {
+               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+                       attr, "Not support priority.");
+               return -rte_errno;
+       }
+
+       /* parse action */
+       index = 0;
+
+       /* check if the first not void action is QUEUE or DROP. */
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE &&
+           act->type != RTE_FLOW_ACTION_TYPE_DROP) {
+               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ACTION,
+                       act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+               act_q = (const struct rte_flow_action_queue *)act->conf;
+               rule->queue = act_q->index;
+       } else { /* drop */
+               rule->fdirflags = IXGBE_FDIRCMD_DROP;
+       }
+
+       /* check if the next not void item is MARK */
+       index++;
+       NEXT_ITEM_OF_ACTION(act, actions, index);
+       if ((act->type != RTE_FLOW_ACTION_TYPE_MARK) &&
+               (act->type != RTE_FLOW_ACTION_TYPE_END)) {
+               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ACTION,
+                       act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       rule->soft_id = 0;
+
+       if (act->type == RTE_FLOW_ACTION_TYPE_MARK) {
+               mark = (const struct rte_flow_action_mark *)act->conf;
+               rule->soft_id = mark->id;
+               index++;
+               NEXT_ITEM_OF_ACTION(act, actions, index);
+       }
+
+       /* check if the next not void item is END */
+       if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ACTION,
+                       act, "Not supported action.");
+               return -rte_errno;
+       }
+
+       return 0;
+}
+
+/**
+ * Parse the rule to see if it is a IP or MAC VLAN flow director rule.
+ * And get the flow director filter info BTW.
+ * UDP/TCP/SCTP PATTERN:
+ * The first not void item can be ETH or IPV4.
+ * The second not void item must be IPV4 if the first one is ETH.
+ * The third not void item must be UDP or TCP or SCTP.
+ * The next not void item must be END.
+ * MAC VLAN PATTERN:
+ * The first not void item must be ETH.
+ * The second not void item must be MAC VLAN.
+ * The next not void item must be END.
+ * ACTION:
+ * The first not void action should be QUEUE or DROP.
+ * The second not void optional action should be MARK,
+ * mark_id is a uint32_t number.
+ * The next not void action should be END.
+ * UDP/TCP/SCTP pattern example:
+ * ITEM                Spec                    Mask
+ * ETH         NULL                    NULL
+ * IPV4                src_addr 192.168.1.20   0xFFFFFFFF
+ *             dst_addr 192.167.3.50   0xFFFFFFFF
+ * UDP/TCP/SCTP        src_port        80      0xFFFF
+ *             dst_port        80      0xFFFF
+ * END
+ * MAC VLAN pattern example:
+ * ITEM                Spec                    Mask
+ * ETH         dst_addr
+               {0xAC, 0x7B, 0xA1,      {0xFF, 0xFF, 0xFF,
+               0x2C, 0x6D, 0x36}       0xFF, 0xFF, 0xFF}
+ * MAC VLAN    tci     0x2016          0xFFFF
+ *             tpid    0x8100          0xFFFF
+ * END
+ * Other members in mask and spec should set to 0x00.
+ * Item->last should be NULL.
+ */
+static int
+ixgbe_parse_fdir_filter_normal(const struct rte_flow_attr *attr,
+                              const struct rte_flow_item pattern[],
+                              const struct rte_flow_action actions[],
+                              struct ixgbe_fdir_rule *rule,
+                              struct rte_flow_error *error)
+{
+       const struct rte_flow_item *item;
+       const struct rte_flow_item_eth *eth_spec;
+       const struct rte_flow_item_eth *eth_mask;
+       const struct rte_flow_item_ipv4 *ipv4_spec;
+       const struct rte_flow_item_ipv4 *ipv4_mask;
+       const struct rte_flow_item_tcp *tcp_spec;
+       const struct rte_flow_item_tcp *tcp_mask;
+       const struct rte_flow_item_udp *udp_spec;
+       const struct rte_flow_item_udp *udp_mask;
+       const struct rte_flow_item_sctp *sctp_spec;
+       const struct rte_flow_item_sctp *sctp_mask;
+       const struct rte_flow_item_vlan *vlan_spec;
+       const struct rte_flow_item_vlan *vlan_mask;
+
+       uint32_t index, j;
+
+       if (!pattern) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+                       NULL, "NULL pattern.");
+               return -rte_errno;
+       }
+
+       if (!actions) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ACTION_NUM,
+                                  NULL, "NULL action.");
+               return -rte_errno;
+       }
+
+       if (!attr) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR,
+                                  NULL, "NULL attribute.");
+               return -rte_errno;
+       }
+
+       /**
+        * Some fields may not be provided. Set spec to 0 and mask to default
+        * value. So, we need not do anything for the not provided fields later.
+        */
+       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+       memset(&rule->mask, 0xFF, sizeof(struct ixgbe_hw_fdir_mask));
+       rule->mask.vlan_tci_mask = 0;
+
+       /* parse pattern */
+       index = 0;
+
+       /**
+        * The first not void item should be
+        * MAC or IPv4 or TCP or UDP or SCTP.
+        */
+       NEXT_ITEM_OF_PATTERN(item, pattern, index);
+       if (item->type != RTE_FLOW_ITEM_TYPE_ETH &&
+           item->type != RTE_FLOW_ITEM_TYPE_IPV4 &&
+           item->type != RTE_FLOW_ITEM_TYPE_TCP &&
+           item->type != RTE_FLOW_ITEM_TYPE_UDP &&
+           item->type != RTE_FLOW_ITEM_TYPE_SCTP) {
+               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by fdir filter");
+               return -rte_errno;
+       }
+
+       rule->mode = RTE_FDIR_MODE_PERFECT;
+
+       /*Not supported last point for range*/
+       if (item->last) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       item, "Not supported last point for range");
+               return -rte_errno;
+       }
+
+       /* Get the MAC info. */
+       if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
+               /**
+                * Only support vlan and dst MAC address,
+                * others should be masked.
+                */
+               if (item->spec && !item->mask) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+
+               if (item->spec) {
+                       rule->b_spec = TRUE;
+                       eth_spec = (const struct rte_flow_item_eth *)item->spec;
+
+                       /* Get the dst MAC. */
+                       for (j = 0; j < ETHER_ADDR_LEN; j++) {
+                               rule->ixgbe_fdir.formatted.inner_mac[j] =
+                                       eth_spec->dst.addr_bytes[j];
+                       }
+               }
+
+
+               if (item->mask) {
+                       /* If ethernet has meaning, it means MAC VLAN mode. */
+                       rule->mode = RTE_FDIR_MODE_PERFECT_MAC_VLAN;
+
+                       rule->b_mask = TRUE;
+                       eth_mask = (const struct rte_flow_item_eth *)item->mask;
+
+                       /* Ether type should be masked. */
+                       if (eth_mask->type) {
+                               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                               rte_flow_error_set(error, EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ITEM,
+                                       item, "Not supported by fdir filter");
+                               return -rte_errno;
+                       }
+
+                       /**
+                        * src MAC address must be masked,
+                        * and don't support dst MAC address mask.
+                        */
+                       for (j = 0; j < ETHER_ADDR_LEN; j++) {
+                               if (eth_mask->src.addr_bytes[j] ||
+                                       eth_mask->dst.addr_bytes[j] != 0xFF) {
+                                       memset(rule, 0,
+                                       sizeof(struct ixgbe_fdir_rule));
+                                       rte_flow_error_set(error, EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ITEM,
+                                       item, "Not supported by fdir filter");
+                                       return -rte_errno;
+                               }
+                       }
+
+                       /* When no VLAN, considered as full mask. */
+                       rule->mask.vlan_tci_mask = rte_cpu_to_be_16(0xEFFF);
+               }
+               /*** If both spec and mask are item,
+                * it means don't care about ETH.
+                * Do nothing.
+                */
+
+               /**
+                * Check if the next not void item is vlan or ipv4.
+                * IPv6 is not supported.
+                */
+               index++;
+               NEXT_ITEM_OF_PATTERN(item, pattern, index);
+               if (rule->mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
+                       if (item->type != RTE_FLOW_ITEM_TYPE_VLAN) {
+                               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                               rte_flow_error_set(error, EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ITEM,
+                                       item, "Not supported by fdir filter");
+                               return -rte_errno;
+                       }
+               } else {
+                       if (item->type != RTE_FLOW_ITEM_TYPE_IPV4) {
+                               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                               rte_flow_error_set(error, EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ITEM,
+                                       item, "Not supported by fdir filter");
+                               return -rte_errno;
+                       }
+               }
+       }
+
+       if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
+               if (!(item->spec && item->mask)) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+
+               /*Not supported last point for range*/
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               item, "Not supported last point for range");
+                       return -rte_errno;
+               }
+
+               vlan_spec = (const struct rte_flow_item_vlan *)item->spec;
+               vlan_mask = (const struct rte_flow_item_vlan *)item->mask;
+
+               if (vlan_spec->tpid != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+
+               rule->ixgbe_fdir.formatted.vlan_id = vlan_spec->tci;
+
+               if (vlan_mask->tpid != (uint16_t)~0U) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               rule->mask.vlan_tci_mask = vlan_mask->tci;
+               /* More than one tags are not supported. */
+
+               /**
+                * Check if the next not void item is not vlan.
+                */
+               index++;
+               NEXT_ITEM_OF_PATTERN(item, pattern, index);
+               if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               } else if (item->type != RTE_FLOW_ITEM_TYPE_END) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+       }
+
+       /* Get the IP info. */
+       if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) {
+               /**
+                * Set the flow type even if there's no content
+                * as we must have a flow type.
+                */
+               rule->ixgbe_fdir.formatted.flow_type =
+                       IXGBE_ATR_FLOW_TYPE_IPV4;
+               /*Not supported last point for range*/
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               item, "Not supported last point for range");
+                       return -rte_errno;
+               }
+               /**
+                * Only care about src & dst addresses,
+                * others should be masked.
+                */
+               if (!item->mask) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               rule->b_mask = TRUE;
+               ipv4_mask =
+                       (const struct rte_flow_item_ipv4 *)item->mask;
+               if (ipv4_mask->hdr.version_ihl ||
+                   ipv4_mask->hdr.type_of_service ||
+                   ipv4_mask->hdr.total_length ||
+                   ipv4_mask->hdr.packet_id ||
+                   ipv4_mask->hdr.fragment_offset ||
+                   ipv4_mask->hdr.time_to_live ||
+                   ipv4_mask->hdr.next_proto_id ||
+                   ipv4_mask->hdr.hdr_checksum) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               rule->mask.dst_ipv4_mask = ipv4_mask->hdr.dst_addr;
+               rule->mask.src_ipv4_mask = ipv4_mask->hdr.src_addr;
+
+               if (item->spec) {
+                       rule->b_spec = TRUE;
+                       ipv4_spec =
+                               (const struct rte_flow_item_ipv4 *)item->spec;
+                       rule->ixgbe_fdir.formatted.dst_ip[0] =
+                               ipv4_spec->hdr.dst_addr;
+                       rule->ixgbe_fdir.formatted.src_ip[0] =
+                               ipv4_spec->hdr.src_addr;
+               }
+
+               /**
+                * Check if the next not void item is
+                * TCP or UDP or SCTP or END.
+                */
+               index++;
+               NEXT_ITEM_OF_PATTERN(item, pattern, index);
+               if (item->type != RTE_FLOW_ITEM_TYPE_TCP &&
+                   item->type != RTE_FLOW_ITEM_TYPE_UDP &&
+                   item->type != RTE_FLOW_ITEM_TYPE_SCTP &&
+                   item->type != RTE_FLOW_ITEM_TYPE_END) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+       }
+
+       /* Get the TCP info. */
+       if (item->type == RTE_FLOW_ITEM_TYPE_TCP) {
+               /**
+                * Set the flow type even if there's no content
+                * as we must have a flow type.
+                */
+               rule->ixgbe_fdir.formatted.flow_type =
+                       IXGBE_ATR_FLOW_TYPE_TCPV4;
+               /*Not supported last point for range*/
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               item, "Not supported last point for range");
+                       return -rte_errno;
+               }
+               /**
+                * Only care about src & dst ports,
+                * others should be masked.
+                */
+               if (!item->mask) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               rule->b_mask = TRUE;
+               tcp_mask = (const struct rte_flow_item_tcp *)item->mask;
+               if (tcp_mask->hdr.sent_seq ||
+                   tcp_mask->hdr.recv_ack ||
+                   tcp_mask->hdr.data_off ||
+                   tcp_mask->hdr.tcp_flags ||
+                   tcp_mask->hdr.rx_win ||
+                   tcp_mask->hdr.cksum ||
+                   tcp_mask->hdr.tcp_urp) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               rule->mask.src_port_mask = tcp_mask->hdr.src_port;
+               rule->mask.dst_port_mask = tcp_mask->hdr.dst_port;
+
+               if (item->spec) {
+                       rule->b_spec = TRUE;
+                       tcp_spec = (const struct rte_flow_item_tcp *)item->spec;
+                       rule->ixgbe_fdir.formatted.src_port =
+                               tcp_spec->hdr.src_port;
+                       rule->ixgbe_fdir.formatted.dst_port =
+                               tcp_spec->hdr.dst_port;
+               }
+       }
+
+       /* Get the UDP info */
+       if (item->type == RTE_FLOW_ITEM_TYPE_UDP) {
+               /**
+                * Set the flow type even if there's no content
+                * as we must have a flow type.
+                */
+               rule->ixgbe_fdir.formatted.flow_type =
+                       IXGBE_ATR_FLOW_TYPE_UDPV4;
+               /*Not supported last point for range*/
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               item, "Not supported last point for range");
+                       return -rte_errno;
+               }
+               /**
+                * Only care about src & dst ports,
+                * others should be masked.
+                */
+               if (!item->mask) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               rule->b_mask = TRUE;
+               udp_mask = (const struct rte_flow_item_udp *)item->mask;
+               if (udp_mask->hdr.dgram_len ||
+                   udp_mask->hdr.dgram_cksum) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               rule->mask.src_port_mask = udp_mask->hdr.src_port;
+               rule->mask.dst_port_mask = udp_mask->hdr.dst_port;
+
+               if (item->spec) {
+                       rule->b_spec = TRUE;
+                       udp_spec = (const struct rte_flow_item_udp *)item->spec;
+                       rule->ixgbe_fdir.formatted.src_port =
+                               udp_spec->hdr.src_port;
+                       rule->ixgbe_fdir.formatted.dst_port =
+                               udp_spec->hdr.dst_port;
+               }
+       }
+
+       /* Get the SCTP info */
+       if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) {
+               /**
+                * Set the flow type even if there's no content
+                * as we must have a flow type.
+                */
+               rule->ixgbe_fdir.formatted.flow_type =
+                       IXGBE_ATR_FLOW_TYPE_SCTPV4;
+               /*Not supported last point for range*/
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               item, "Not supported last point for range");
+                       return -rte_errno;
+               }
+               /**
+                * Only care about src & dst ports,
+                * others should be masked.
+                */
+               if (!item->mask) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               rule->b_mask = TRUE;
+               sctp_mask =
+                       (const struct rte_flow_item_sctp *)item->mask;
+               if (sctp_mask->hdr.tag ||
+                   sctp_mask->hdr.cksum) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               rule->mask.src_port_mask = sctp_mask->hdr.src_port;
+               rule->mask.dst_port_mask = sctp_mask->hdr.dst_port;
+
+               if (item->spec) {
+                       rule->b_spec = TRUE;
+                       sctp_spec =
+                               (const struct rte_flow_item_sctp *)item->spec;
+                       rule->ixgbe_fdir.formatted.src_port =
+                               sctp_spec->hdr.src_port;
+                       rule->ixgbe_fdir.formatted.dst_port =
+                               sctp_spec->hdr.dst_port;
+               }
+       }
+
+       if (item->type != RTE_FLOW_ITEM_TYPE_END) {
+               /* check if the next not void item is END */
+               index++;
+               NEXT_ITEM_OF_PATTERN(item, pattern, index);
+               if (item->type != RTE_FLOW_ITEM_TYPE_END) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+       }
+
+       return ixgbe_parse_fdir_act_attr(attr, actions, rule, error);
+}
+
+#define NVGRE_PROTOCOL 0x6558
+
+/**
+ * Parse the rule to see if it is a VxLAN or NVGRE flow director rule.
+ * And get the flow director filter info BTW.
+ * VxLAN PATTERN:
+ * The first not void item must be ETH.
+ * The second not void item must be IPV4/ IPV6.
+ * The third not void item must be NVGRE.
+ * The next not void item must be END.
+ * NVGRE PATTERN:
+ * The first not void item must be ETH.
+ * The second not void item must be IPV4/ IPV6.
+ * The third not void item must be NVGRE.
+ * The next not void item must be END.
+ * ACTION:
+ * The first not void action should be QUEUE or DROP.
+ * The second not void optional action should be MARK,
+ * mark_id is a uint32_t number.
+ * The next not void action should be END.
+ * VxLAN pattern example:
+ * ITEM                Spec                    Mask
+ * ETH         NULL                    NULL
+ * IPV4/IPV6   NULL                    NULL
+ * UDP         NULL                    NULL
+ * VxLAN       vni{0x00, 0x32, 0x54}   {0xFF, 0xFF, 0xFF}
+ * END
+ * NEGRV pattern example:
+ * ITEM                Spec                    Mask
+ * ETH         NULL                    NULL
+ * IPV4/IPV6   NULL                    NULL
+ * NVGRE       protocol        0x6558  0xFFFF
+ *             tni{0x00, 0x32, 0x54}   {0xFF, 0xFF, 0xFF}
+ * END
+ * other members in mask and spec should set to 0x00.
+ * item->last should be NULL.
+ */
+static int
+ixgbe_parse_fdir_filter_tunnel(const struct rte_flow_attr *attr,
+                              const struct rte_flow_item pattern[],
+                              const struct rte_flow_action actions[],
+                              struct ixgbe_fdir_rule *rule,
+                              struct rte_flow_error *error)
+{
+       const struct rte_flow_item *item;
+       const struct rte_flow_item_vxlan *vxlan_spec;
+       const struct rte_flow_item_vxlan *vxlan_mask;
+       const struct rte_flow_item_nvgre *nvgre_spec;
+       const struct rte_flow_item_nvgre *nvgre_mask;
+       const struct rte_flow_item_eth *eth_spec;
+       const struct rte_flow_item_eth *eth_mask;
+       const struct rte_flow_item_vlan *vlan_spec;
+       const struct rte_flow_item_vlan *vlan_mask;
+       uint32_t index, j;
+
+       if (!pattern) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+                                  NULL, "NULL pattern.");
+               return -rte_errno;
+       }
+
+       if (!actions) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ACTION_NUM,
+                                  NULL, "NULL action.");
+               return -rte_errno;
+       }
+
+       if (!attr) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR,
+                                  NULL, "NULL attribute.");
+               return -rte_errno;
+       }
+
+       /**
+        * Some fields may not be provided. Set spec to 0 and mask to default
+        * value. So, we need not do anything for the not provided fields later.
+        */
+       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+       memset(&rule->mask, 0xFF, sizeof(struct ixgbe_hw_fdir_mask));
+       rule->mask.vlan_tci_mask = 0;
+
+       /* parse pattern */
+       index = 0;
+
+       /**
+        * The first not void item should be
+        * MAC or IPv4 or IPv6 or UDP or VxLAN.
+        */
+       NEXT_ITEM_OF_PATTERN(item, pattern, index);
+       if (item->type != RTE_FLOW_ITEM_TYPE_ETH &&
+           item->type != RTE_FLOW_ITEM_TYPE_IPV4 &&
+           item->type != RTE_FLOW_ITEM_TYPE_IPV6 &&
+           item->type != RTE_FLOW_ITEM_TYPE_UDP &&
+           item->type != RTE_FLOW_ITEM_TYPE_VXLAN &&
+           item->type != RTE_FLOW_ITEM_TYPE_NVGRE) {
+               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by fdir filter");
+               return -rte_errno;
+       }
+
+       rule->mode = RTE_FDIR_MODE_PERFECT_TUNNEL;
+
+       /* Skip MAC. */
+       if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
+               /* Only used to describe the protocol stack. */
+               if (item->spec || item->mask) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               /*Not supported last point for range*/
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               item, "Not supported last point for range");
+                       return -rte_errno;
+               }
+
+               /* Check if the next not void item is IPv4 or IPv6. */
+               index++;
+               NEXT_ITEM_OF_PATTERN(item, pattern, index);
+               if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 &&
+                   item->type != RTE_FLOW_ITEM_TYPE_IPV6) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+       }
+
+       /* Skip IP. */
+       if (item->type == RTE_FLOW_ITEM_TYPE_IPV4 ||
+           item->type == RTE_FLOW_ITEM_TYPE_IPV6) {
+               /* Only used to describe the protocol stack. */
+               if (item->spec || item->mask) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               /*Not supported last point for range*/
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               item, "Not supported last point for range");
+                       return -rte_errno;
+               }
+
+               /* Check if the next not void item is UDP or NVGRE. */
+               index++;
+               NEXT_ITEM_OF_PATTERN(item, pattern, index);
+               if (item->type != RTE_FLOW_ITEM_TYPE_UDP &&
+                   item->type != RTE_FLOW_ITEM_TYPE_NVGRE) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+       }
+
+       /* Skip UDP. */
+       if (item->type == RTE_FLOW_ITEM_TYPE_UDP) {
+               /* Only used to describe the protocol stack. */
+               if (item->spec || item->mask) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               /*Not supported last point for range*/
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               item, "Not supported last point for range");
+                       return -rte_errno;
+               }
+
+               /* Check if the next not void item is VxLAN. */
+               index++;
+               NEXT_ITEM_OF_PATTERN(item, pattern, index);
+               if (item->type != RTE_FLOW_ITEM_TYPE_VXLAN) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+       }
+
+       /* Get the VxLAN info */
+       if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
+               rule->ixgbe_fdir.formatted.tunnel_type =
+                       RTE_FDIR_TUNNEL_TYPE_VXLAN;
+
+               /* Only care about VNI, others should be masked. */
+               if (!item->mask) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               /*Not supported last point for range*/
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               item, "Not supported last point for range");
+                       return -rte_errno;
+               }
+               rule->b_mask = TRUE;
+
+               /* Tunnel type is always meaningful. */
+               rule->mask.tunnel_type_mask = 1;
+
+               vxlan_mask =
+                       (const struct rte_flow_item_vxlan *)item->mask;
+               if (vxlan_mask->flags) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               /* VNI must be totally masked or not. */
+               if ((vxlan_mask->vni[0] || vxlan_mask->vni[1] ||
+                       vxlan_mask->vni[2]) &&
+                       ((vxlan_mask->vni[0] != 0xFF) ||
+                       (vxlan_mask->vni[1] != 0xFF) ||
+                               (vxlan_mask->vni[2] != 0xFF))) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+
+               rte_memcpy(&rule->mask.tunnel_id_mask, vxlan_mask->vni,
+                       RTE_DIM(vxlan_mask->vni));
+               rule->mask.tunnel_id_mask <<= 8;
+
+               if (item->spec) {
+                       rule->b_spec = TRUE;
+                       vxlan_spec = (const struct rte_flow_item_vxlan *)
+                                       item->spec;
+                       rte_memcpy(&rule->ixgbe_fdir.formatted.tni_vni,
+                               vxlan_spec->vni, RTE_DIM(vxlan_spec->vni));
+                       rule->ixgbe_fdir.formatted.tni_vni <<= 8;
+               }
+       }
+
+       /* Get the NVGRE info */
+       if (item->type == RTE_FLOW_ITEM_TYPE_NVGRE) {
+               rule->ixgbe_fdir.formatted.tunnel_type =
+                       RTE_FDIR_TUNNEL_TYPE_NVGRE;
+
+               /**
+                * Only care about flags0, flags1, protocol and TNI,
+                * others should be masked.
+                */
+               if (!item->mask) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               /*Not supported last point for range*/
+               if (item->last) {
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               item, "Not supported last point for range");
+                       return -rte_errno;
+               }
+               rule->b_mask = TRUE;
+
+               /* Tunnel type is always meaningful. */
+               rule->mask.tunnel_type_mask = 1;
+
+               nvgre_mask =
+                       (const struct rte_flow_item_nvgre *)item->mask;
+               if (nvgre_mask->flow_id) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               if (nvgre_mask->c_k_s_rsvd0_ver !=
+                       rte_cpu_to_be_16(0x3000) ||
+                   nvgre_mask->protocol != 0xFFFF) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               /* TNI must be totally masked or not. */
+               if (nvgre_mask->tni[0] &&
+                   ((nvgre_mask->tni[0] != 0xFF) ||
+                   (nvgre_mask->tni[1] != 0xFF) ||
+                   (nvgre_mask->tni[2] != 0xFF))) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               /* tni is a 24-bits bit field */
+               rte_memcpy(&rule->mask.tunnel_id_mask, nvgre_mask->tni,
+                       RTE_DIM(nvgre_mask->tni));
+               rule->mask.tunnel_id_mask <<= 8;
+
+               if (item->spec) {
+                       rule->b_spec = TRUE;
+                       nvgre_spec =
+                               (const struct rte_flow_item_nvgre *)item->spec;
+                       if (nvgre_spec->c_k_s_rsvd0_ver !=
+                           rte_cpu_to_be_16(0x2000) ||
+                           nvgre_spec->protocol !=
+                           rte_cpu_to_be_16(NVGRE_PROTOCOL)) {
+                               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                               rte_flow_error_set(error, EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ITEM,
+                                       item, "Not supported by fdir filter");
+                               return -rte_errno;
+                       }
+                       /* tni is a 24-bits bit field */
+                       rte_memcpy(&rule->ixgbe_fdir.formatted.tni_vni,
+                       nvgre_spec->tni, RTE_DIM(nvgre_spec->tni));
+                       rule->ixgbe_fdir.formatted.tni_vni <<= 8;
+               }
+       }
+
+       /* check if the next not void item is MAC */
+       index++;
+       NEXT_ITEM_OF_PATTERN(item, pattern, index);
+       if (item->type != RTE_FLOW_ITEM_TYPE_ETH) {
+               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by fdir filter");
+               return -rte_errno;
+       }
+
+       /**
+        * Only support vlan and dst MAC address,
+        * others should be masked.
+        */
+
+       if (!item->mask) {
+               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by fdir filter");
+               return -rte_errno;
+       }
+       /*Not supported last point for range*/
+       if (item->last) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       item, "Not supported last point for range");
+               return -rte_errno;
+       }
+       rule->b_mask = TRUE;
+       eth_mask = (const struct rte_flow_item_eth *)item->mask;
+
+       /* Ether type should be masked. */
+       if (eth_mask->type) {
+               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by fdir filter");
+               return -rte_errno;
+       }
+
+       /* src MAC address should be masked. */
+       for (j = 0; j < ETHER_ADDR_LEN; j++) {
+               if (eth_mask->src.addr_bytes[j]) {
+                       memset(rule, 0,
+                              sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+       }
+       rule->mask.mac_addr_byte_mask = 0;
+       for (j = 0; j < ETHER_ADDR_LEN; j++) {
+               /* It's a per byte mask. */
+               if (eth_mask->dst.addr_bytes[j] == 0xFF) {
+                       rule->mask.mac_addr_byte_mask |= 0x1 << j;
+               } else if (eth_mask->dst.addr_bytes[j]) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+       }
+
+       /* When no vlan, considered as full mask. */
+       rule->mask.vlan_tci_mask = rte_cpu_to_be_16(0xEFFF);
+
+       if (item->spec) {
+               rule->b_spec = TRUE;
+               eth_spec = (const struct rte_flow_item_eth *)item->spec;
+
+               /* Get the dst MAC. */
+               for (j = 0; j < ETHER_ADDR_LEN; j++) {
+                       rule->ixgbe_fdir.formatted.inner_mac[j] =
+                               eth_spec->dst.addr_bytes[j];
+               }
+       }
+
+       /**
+        * Check if the next not void item is vlan or ipv4.
+        * IPv6 is not supported.
+        */
+       index++;
+       NEXT_ITEM_OF_PATTERN(item, pattern, index);
+       if ((item->type != RTE_FLOW_ITEM_TYPE_VLAN) &&
+               (item->type != RTE_FLOW_ITEM_TYPE_VLAN)) {
+               memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item, "Not supported by fdir filter");
+               return -rte_errno;
+       }
+       /*Not supported last point for range*/
+       if (item->last) {
+               rte_flow_error_set(error, EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       item, "Not supported last point for range");
+               return -rte_errno;
+       }
+
+       if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
+               if (!(item->spec && item->mask)) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+
+               vlan_spec = (const struct rte_flow_item_vlan *)item->spec;
+               vlan_mask = (const struct rte_flow_item_vlan *)item->mask;
+
+               if (vlan_spec->tpid != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+
+               rule->ixgbe_fdir.formatted.vlan_id = vlan_spec->tci;
+
+               if (vlan_mask->tpid != (uint16_t)~0U) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               rule->mask.vlan_tci_mask = vlan_mask->tci;
+               /* More than one tags are not supported. */
+
+               /**
+                * Check if the next not void item is not vlan.
+                */
+               index++;
+               NEXT_ITEM_OF_PATTERN(item, pattern, index);
+               if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               } else if (item->type != RTE_FLOW_ITEM_TYPE_END) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+               /* check if the next not void item is END */
+               index++;
+               NEXT_ITEM_OF_PATTERN(item, pattern, index);
+               if (item->type != RTE_FLOW_ITEM_TYPE_END) {
+                       memset(rule, 0, sizeof(struct ixgbe_fdir_rule));
+                       rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by fdir filter");
+                       return -rte_errno;
+               }
+       }
+
+       /**
+        * If the tags is 0, it means don't care about the VLAN.
+        * Do nothing.
+        */
+
+       return ixgbe_parse_fdir_act_attr(attr, actions, rule, error);
+}
+
+static int
+ixgbe_validate_fdir_filter(struct rte_eth_dev *dev,
+                       const struct rte_flow_attr *attr,
+                       const struct rte_flow_item pattern[],
+                       const struct rte_flow_action actions[],
+                       struct ixgbe_fdir_rule *rule,
+                       struct rte_flow_error *error)
+{
+       int ret = 0;
+
+       enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode;
+
+       ixgbe_parse_fdir_filter(attr, pattern, actions,
+                               rule, error);
+
+
+       if (fdir_mode == RTE_FDIR_MODE_NONE ||
+           fdir_mode != rule->mode)
+               return -ENOTSUP;
+
+       return ret;
+}
+
+static int
+ixgbe_parse_fdir_filter(const struct rte_flow_attr *attr,
+                       const struct rte_flow_item pattern[],
+                       const struct rte_flow_action actions[],
+                       struct ixgbe_fdir_rule *rule,
+                       struct rte_flow_error *error)
+{
+       int ret;
+
+       ret = ixgbe_parse_fdir_filter_normal(attr, pattern,
+                                       actions, rule, error);
+
+       if (!ret)
+               return 0;
+
+       ret = ixgbe_parse_fdir_filter_tunnel(attr, pattern,
+                                       actions, rule, error);
+
+       return ret;
+}
+
+void
+ixgbe_filterlist_flush(void)
+{
+       struct ixgbe_ntuple_filter_ele *ntuple_filter_ptr;
+       struct ixgbe_ethertype_filter_ele *ethertype_filter_ptr;
+       struct ixgbe_eth_syn_filter_ele *syn_filter_ptr;
+       struct ixgbe_eth_l2_tunnel_conf_ele *l2_tn_filter_ptr;
+       struct ixgbe_fdir_rule_ele *fdir_rule_ptr;
+       struct ixgbe_flow_mem *ixgbe_flow_mem_ptr;
+
+       while ((ntuple_filter_ptr = TAILQ_FIRST(&filter_ntuple_list))) {
+               TAILQ_REMOVE(&filter_ntuple_list,
+                                ntuple_filter_ptr,
+                                entries);
+               rte_free(ntuple_filter_ptr);
+       }
+
+       while ((ethertype_filter_ptr = TAILQ_FIRST(&filter_ethertype_list))) {
+               TAILQ_REMOVE(&filter_ethertype_list,
+                                ethertype_filter_ptr,
+                                entries);
+               rte_free(ethertype_filter_ptr);
+       }
+
+       while ((syn_filter_ptr = TAILQ_FIRST(&filter_syn_list))) {
+               TAILQ_REMOVE(&filter_syn_list,
+                                syn_filter_ptr,
+                                entries);
+               rte_free(syn_filter_ptr);
+       }
+
+       while ((l2_tn_filter_ptr = TAILQ_FIRST(&filter_l2_tunnel_list))) {
+               TAILQ_REMOVE(&filter_l2_tunnel_list,
+                                l2_tn_filter_ptr,
+                                entries);
+               rte_free(l2_tn_filter_ptr);
+       }
+
+       while ((fdir_rule_ptr = TAILQ_FIRST(&filter_fdir_list))) {
+               TAILQ_REMOVE(&filter_fdir_list,
+                                fdir_rule_ptr,
+                                entries);
+               rte_free(fdir_rule_ptr);
+       }
+
+       while ((ixgbe_flow_mem_ptr = TAILQ_FIRST(&ixgbe_flow_list))) {
+               TAILQ_REMOVE(&ixgbe_flow_list,
+                                ixgbe_flow_mem_ptr,
+                                entries);
+               rte_free(ixgbe_flow_mem_ptr->flow);
+               rte_free(ixgbe_flow_mem_ptr);
+       }
+}
+
+/**
+ * Create or destroy a flow rule.
+ * Theorically one rule can match more than one filters.
+ * We will let it use the filter which it hitt first.
+ * So, the sequence matters.
+ */
+static struct rte_flow *
+ixgbe_flow_create(struct rte_eth_dev *dev,
+                 const struct rte_flow_attr *attr,
+                 const struct rte_flow_item pattern[],
+                 const struct rte_flow_action actions[],
+                 struct rte_flow_error *error)
+{
+       int ret;
+       struct rte_eth_ntuple_filter ntuple_filter;
+       struct rte_eth_ethertype_filter ethertype_filter;
+       struct rte_eth_syn_filter syn_filter;
+       struct ixgbe_fdir_rule fdir_rule;
+       struct rte_eth_l2_tunnel_conf l2_tn_filter;
+       struct ixgbe_hw_fdir_info *fdir_info =
+               IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private);
+       struct rte_flow *flow = NULL;
+       struct ixgbe_ntuple_filter_ele *ntuple_filter_ptr;
+       struct ixgbe_ethertype_filter_ele *ethertype_filter_ptr;
+       struct ixgbe_eth_syn_filter_ele *syn_filter_ptr;
+       struct ixgbe_eth_l2_tunnel_conf_ele *l2_tn_filter_ptr;
+       struct ixgbe_fdir_rule_ele *fdir_rule_ptr;
+       struct ixgbe_flow_mem *ixgbe_flow_mem_ptr;
+
+       flow = rte_zmalloc("ixgbe_rte_flow", sizeof(struct rte_flow), 0);
+       if (!flow) {
+               PMD_DRV_LOG(ERR, "failed to allocate memory");
+               return (struct rte_flow *)flow;
+       }
+       ixgbe_flow_mem_ptr = rte_zmalloc("ixgbe_flow_mem",
+                       sizeof(struct ixgbe_flow_mem), 0);
+       if (!ixgbe_flow_mem_ptr) {
+               PMD_DRV_LOG(ERR, "failed to allocate memory");
+               rte_free(flow);
+               return NULL;
+       }
+       ixgbe_flow_mem_ptr->flow = flow;
+       TAILQ_INSERT_TAIL(&ixgbe_flow_list,
+                               ixgbe_flow_mem_ptr, entries);
+
+       memset(&ntuple_filter, 0, sizeof(struct rte_eth_ntuple_filter));
+       ret = ixgbe_parse_ntuple_filter(attr, pattern,
+                       actions, &ntuple_filter, error);
+       if (!ret) {
+               ret = ixgbe_add_del_ntuple_filter(dev, &ntuple_filter, TRUE);
+               if (!ret) {
+                       ntuple_filter_ptr = rte_zmalloc("ixgbe_ntuple_filter",
+                               sizeof(struct ixgbe_ntuple_filter_ele), 0);
+                       (void)rte_memcpy(&ntuple_filter_ptr->filter_info,
+                               &ntuple_filter,
+                               sizeof(struct rte_eth_ntuple_filter));
+                       TAILQ_INSERT_TAIL(&filter_ntuple_list,
+                               ntuple_filter_ptr, entries);
+                       flow->rule = ntuple_filter_ptr;
+                       flow->filter_type = RTE_ETH_FILTER_NTUPLE;
+                       return flow;
+               }
+               goto out;
+       }
+
+       memset(&ethertype_filter, 0, sizeof(struct rte_eth_ethertype_filter));
+       ret = ixgbe_parse_ethertype_filter(attr, pattern,
+                               actions, &ethertype_filter, error);
+       if (!ret) {
+               ret = ixgbe_add_del_ethertype_filter(dev,
+                               &ethertype_filter, TRUE);
+               if (!ret) {
+                       ethertype_filter_ptr = rte_zmalloc(
+                               "ixgbe_ethertype_filter",
+                               sizeof(struct ixgbe_ethertype_filter_ele), 0);
+                       (void)rte_memcpy(&ethertype_filter_ptr->filter_info,
+                               &ethertype_filter,
+                               sizeof(struct rte_eth_ethertype_filter));
+                       TAILQ_INSERT_TAIL(&filter_ethertype_list,
+                               ethertype_filter_ptr, entries);
+                       flow->rule = ethertype_filter_ptr;
+                       flow->filter_type = RTE_ETH_FILTER_ETHERTYPE;
+                       return flow;
+               }
+               goto out;
+       }
+
+       memset(&syn_filter, 0, sizeof(struct rte_eth_syn_filter));
+       ret = cons_parse_syn_filter(attr, pattern, actions, &syn_filter, error);
+       if (!ret) {
+               ret = ixgbe_syn_filter_set(dev, &syn_filter, TRUE);
+               if (!ret) {
+                       syn_filter_ptr = rte_zmalloc("ixgbe_syn_filter",
+                               sizeof(struct ixgbe_eth_syn_filter_ele), 0);
+                       (void)rte_memcpy(&syn_filter_ptr->filter_info,
+                               &syn_filter,
+                               sizeof(struct rte_eth_syn_filter));
+                       TAILQ_INSERT_TAIL(&filter_syn_list,
+                               syn_filter_ptr,
+                               entries);
+                       flow->rule = syn_filter_ptr;
+                       flow->filter_type = RTE_ETH_FILTER_SYN;
+                       return flow;
+               }
+               goto out;
+       }
+
+       memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule));
+       ret = ixgbe_parse_fdir_filter(attr, pattern,
+                               actions, &fdir_rule, error);
+       if (!ret) {
+               /* A mask cannot be deleted. */
+               if (fdir_rule.b_mask) {
+                       if (!fdir_info->mask_added) {
+                               /* It's the first time the mask is set. */
+                               rte_memcpy(&fdir_info->mask,
+                                       &fdir_rule.mask,
+                                       sizeof(struct ixgbe_hw_fdir_mask));
+                               ret = ixgbe_fdir_set_input_mask(dev);
+                               if (ret)
+                                       goto out;
+
+                               fdir_info->mask_added = TRUE;
+                       } else {
+                               /**
+                                * Only support one global mask,
+                                * all the masks should be the same.
+                                */
+                               ret = memcmp(&fdir_info->mask,
+                                       &fdir_rule.mask,
+                                       sizeof(struct ixgbe_hw_fdir_mask));
+                               if (ret)
+                                       goto out;
+                       }
+               }
+
+               if (fdir_rule.b_spec) {
+                       ret = ixgbe_fdir_filter_program(dev, &fdir_rule,
+                                       FALSE, FALSE);
+                       if (!ret) {
+                               fdir_rule_ptr = rte_zmalloc("ixgbe_fdir_filter",
+                                       sizeof(struct ixgbe_fdir_rule_ele), 0);
+                               (void)rte_memcpy(&fdir_rule_ptr->filter_info,
+                                       &fdir_rule,
+                                       sizeof(struct ixgbe_fdir_rule));
+                               TAILQ_INSERT_TAIL(&filter_fdir_list,
+                                       fdir_rule_ptr, entries);
+                               flow->rule = fdir_rule_ptr;
+                               flow->filter_type = RTE_ETH_FILTER_FDIR;
+
+                               return flow;
+                       }
+
+                       if (ret)
+                               goto out;
+               }
+
+               goto out;
+       }
+
+       memset(&l2_tn_filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
+       ret = cons_parse_l2_tn_filter(attr, pattern,
+                                       actions, &l2_tn_filter, error);
+       if (!ret) {
+               ret = ixgbe_dev_l2_tunnel_filter_add(dev, &l2_tn_filter, FALSE);
+               if (!ret) {
+                       l2_tn_filter_ptr = rte_zmalloc("ixgbe_l2_tn_filter",
+                               sizeof(struct ixgbe_eth_l2_tunnel_conf_ele), 0);
+                       (void)rte_memcpy(&l2_tn_filter_ptr->filter_info,
+                               &l2_tn_filter,
+                               sizeof(struct rte_eth_l2_tunnel_conf));
+                       TAILQ_INSERT_TAIL(&filter_l2_tunnel_list,
+                               l2_tn_filter_ptr, entries);
+                       flow->rule = l2_tn_filter_ptr;
+                       flow->filter_type = RTE_ETH_FILTER_L2_TUNNEL;
+                       return flow;
+               }
+       }
+
+out:
+       TAILQ_REMOVE(&ixgbe_flow_list,
+               ixgbe_flow_mem_ptr, entries);
+       rte_free(ixgbe_flow_mem_ptr);
+       rte_free(flow);
+       return NULL;
+}
+
+/**
+ * Check if the flow rule is supported by ixgbe.
+ * It only checkes the format. Don't guarantee the rule can be programmed into
+ * the HW. Because there can be no enough room for the rule.
+ */
+static int
+ixgbe_flow_validate(__rte_unused struct rte_eth_dev *dev,
+               const struct rte_flow_attr *attr,
+               const struct rte_flow_item pattern[],
+               const struct rte_flow_action actions[],
+               struct rte_flow_error *error)
+{
+       struct rte_eth_ntuple_filter ntuple_filter;
+       struct rte_eth_ethertype_filter ethertype_filter;
+       struct rte_eth_syn_filter syn_filter;
+       struct rte_eth_l2_tunnel_conf l2_tn_filter;
+       struct ixgbe_fdir_rule fdir_rule;
+       int ret;
+
+       memset(&ntuple_filter, 0, sizeof(struct rte_eth_ntuple_filter));
+       ret = ixgbe_parse_ntuple_filter(attr, pattern,
+                               actions, &ntuple_filter, error);
+       if (!ret)
+               return 0;
+
+       memset(&ethertype_filter, 0, sizeof(struct rte_eth_ethertype_filter));
+       ret = ixgbe_parse_ethertype_filter(attr, pattern,
+                               actions, &ethertype_filter, error);
+       if (!ret)
+               return 0;
+
+       memset(&syn_filter, 0, sizeof(struct rte_eth_syn_filter));
+       ret = ixgbe_parse_syn_filter(attr, pattern,
+                               actions, &syn_filter, error);
+       if (!ret)
+               return 0;
+
+       memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule));
+       ret = ixgbe_validate_fdir_filter(dev, attr, pattern,
+                               actions, &fdir_rule, error);
+       if (!ret)
+               return 0;
+
+       memset(&l2_tn_filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
+       ret = ixgbe_validate_l2_tn_filter(dev, attr, pattern,
+                               actions, &l2_tn_filter, error);
+
+       return ret;
+}
+
+/* Destroy a flow rule on ixgbe. */
+static int
+ixgbe_flow_destroy(struct rte_eth_dev *dev,
+               struct rte_flow *flow,
+               struct rte_flow_error *error)
+{
+       int ret;
+       struct rte_flow *pmd_flow = flow;
+       enum rte_filter_type filter_type = pmd_flow->filter_type;
+       struct rte_eth_ntuple_filter ntuple_filter;
+       struct rte_eth_ethertype_filter ethertype_filter;
+       struct rte_eth_syn_filter syn_filter;
+       struct ixgbe_fdir_rule fdir_rule;
+       struct rte_eth_l2_tunnel_conf l2_tn_filter;
+       struct ixgbe_ntuple_filter_ele *ntuple_filter_ptr;
+       struct ixgbe_ethertype_filter_ele *ethertype_filter_ptr;
+       struct ixgbe_eth_syn_filter_ele *syn_filter_ptr;
+       struct ixgbe_eth_l2_tunnel_conf_ele *l2_tn_filter_ptr;
+       struct ixgbe_fdir_rule_ele *fdir_rule_ptr;
+       struct ixgbe_flow_mem *ixgbe_flow_mem_ptr;
+
+       switch (filter_type) {
+       case RTE_ETH_FILTER_NTUPLE:
+               ntuple_filter_ptr = (struct ixgbe_ntuple_filter_ele *)
+                                       pmd_flow->rule;
+               (void)rte_memcpy(&ntuple_filter,
+                       &ntuple_filter_ptr->filter_info,
+                       sizeof(struct rte_eth_ntuple_filter));
+               ret = ixgbe_add_del_ntuple_filter(dev, &ntuple_filter, FALSE);
+               if (!ret) {
+                       TAILQ_REMOVE(&filter_ntuple_list,
+                       ntuple_filter_ptr, entries);
+                       rte_free(ntuple_filter_ptr);
+               }
+               break;
+       case RTE_ETH_FILTER_ETHERTYPE:
+               ethertype_filter_ptr = (struct ixgbe_ethertype_filter_ele *)
+                                       pmd_flow->rule;
+               (void)rte_memcpy(&ethertype_filter,
+                       &ethertype_filter_ptr->filter_info,
+                       sizeof(struct rte_eth_ethertype_filter));
+               ret = ixgbe_add_del_ethertype_filter(dev,
+                               &ethertype_filter, FALSE);
+               if (!ret) {
+                       TAILQ_REMOVE(&filter_ethertype_list,
+                               ethertype_filter_ptr, entries);
+                       rte_free(ethertype_filter_ptr);
+               }
+               break;
+       case RTE_ETH_FILTER_SYN:
+               syn_filter_ptr = (struct ixgbe_eth_syn_filter_ele *)
+                               pmd_flow->rule;
+               (void)rte_memcpy(&syn_filter,
+                       &syn_filter_ptr->filter_info,
+                       sizeof(struct rte_eth_syn_filter));
+               ret = ixgbe_syn_filter_set(dev, &syn_filter, FALSE);
+               if (!ret) {
+                       TAILQ_REMOVE(&filter_syn_list,
+                               syn_filter_ptr, entries);
+                       rte_free(syn_filter_ptr);
+               }
+               break;
+       case RTE_ETH_FILTER_FDIR:
+               fdir_rule_ptr = (struct ixgbe_fdir_rule_ele *)pmd_flow->rule;
+               (void)rte_memcpy(&fdir_rule,
+                       &fdir_rule_ptr->filter_info,
+                       sizeof(struct ixgbe_fdir_rule));
+               ret = ixgbe_fdir_filter_program(dev, &fdir_rule, TRUE, FALSE);
+               if (!ret) {
+                       TAILQ_REMOVE(&filter_fdir_list,
+                               fdir_rule_ptr, entries);
+                       rte_free(fdir_rule_ptr);
+               }
+               break;
+       case RTE_ETH_FILTER_L2_TUNNEL:
+               l2_tn_filter_ptr = (struct ixgbe_eth_l2_tunnel_conf_ele *)
+                               pmd_flow->rule;
+               (void)rte_memcpy(&l2_tn_filter, &l2_tn_filter_ptr->filter_info,
+                       sizeof(struct rte_eth_l2_tunnel_conf));
+               ret = ixgbe_dev_l2_tunnel_filter_del(dev, &l2_tn_filter);
+               if (!ret) {
+                       TAILQ_REMOVE(&filter_l2_tunnel_list,
+                               l2_tn_filter_ptr, entries);
+                       rte_free(l2_tn_filter_ptr);
+               }
+               break;
+       default:
+               PMD_DRV_LOG(WARNING, "Filter type (%d) not supported",
+                           filter_type);
+               ret = -EINVAL;
+               break;
+       }
+
+       if (ret) {
+               rte_flow_error_set(error, EINVAL,
+                               RTE_FLOW_ERROR_TYPE_HANDLE,
+                               NULL, "Failed to destroy flow");
+               return ret;
+       }
+
+       TAILQ_FOREACH(ixgbe_flow_mem_ptr, &ixgbe_flow_list, entries) {
+               if (ixgbe_flow_mem_ptr->flow == pmd_flow) {
+                       TAILQ_REMOVE(&ixgbe_flow_list,
+                               ixgbe_flow_mem_ptr, entries);
+                       rte_free(ixgbe_flow_mem_ptr);
+               }
+       }
+       rte_free(flow);
+
+       return ret;
+}
+
+/*  Destroy all flow rules associated with a port on ixgbe. */
+static int
+ixgbe_flow_flush(struct rte_eth_dev *dev,
+               struct rte_flow_error *error)
+{
+       int ret = 0;
+
+       ixgbe_clear_all_ntuple_filter(dev);
+       ixgbe_clear_all_ethertype_filter(dev);
+       ixgbe_clear_syn_filter(dev);
+
+       ret = ixgbe_clear_all_fdir_filter(dev);
+       if (ret < 0) {
+               rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
+                                       NULL, "Failed to flush rule");
+               return ret;
+       }
+
+       ret = ixgbe_clear_all_l2_tn_filter(dev);
+       if (ret < 0) {
+               rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
+                                       NULL, "Failed to flush rule");
+               return ret;
+       }
+
+       ixgbe_filterlist_flush();
+
+       return 0;
+}
index 56393ff..4715045 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -51,6 +51,7 @@
 
 #include "base/ixgbe_common.h"
 #include "ixgbe_ethdev.h"
+#include "rte_pmd_ixgbe.h"
 
 #define IXGBE_MAX_VFTA     (128)
 #define IXGBE_VF_MSG_SIZE_DEFAULT 1
@@ -60,7 +61,9 @@
 static inline uint16_t
 dev_num_vf(struct rte_eth_dev *eth_dev)
 {
-       return eth_dev->pci_dev->max_vfs;
+       struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev);
+
+       return pci_dev->max_vfs;
 }
 
 static inline
@@ -175,6 +178,7 @@ ixgbe_add_tx_flow_control_drop_filter(struct rte_eth_dev *eth_dev)
                IXGBE_DEV_PRIVATE_TO_FILTER_INFO(eth_dev->data->dev_private);
        uint16_t vf_num;
        int i;
+       struct ixgbe_ethertype_filter ethertype_filter;
 
        if (!hw->mac.ops.set_ethertype_anti_spoofing) {
                RTE_LOG(INFO, PMD, "ether type anti-spoofing is not"
@@ -182,16 +186,23 @@ ixgbe_add_tx_flow_control_drop_filter(struct rte_eth_dev *eth_dev)
                return;
        }
 
-       /* occupy an entity of ether type filter */
-       for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) {
-               if (!(filter_info->ethertype_mask & (1 << i))) {
-                       filter_info->ethertype_mask |= 1 << i;
-                       filter_info->ethertype_filters[i] =
-                               IXGBE_ETHERTYPE_FLOW_CTRL;
-                       break;
-               }
+       i = ixgbe_ethertype_filter_lookup(filter_info,
+                                         IXGBE_ETHERTYPE_FLOW_CTRL);
+       if (i >= 0) {
+               RTE_LOG(ERR, PMD, "A ether type filter"
+                       " entity for flow control already exists!\n");
+               return;
        }
-       if (i == IXGBE_MAX_ETQF_FILTERS) {
+
+       ethertype_filter.ethertype = IXGBE_ETHERTYPE_FLOW_CTRL;
+       ethertype_filter.etqf = IXGBE_ETQF_FILTER_EN |
+                               IXGBE_ETQF_TX_ANTISPOOF |
+                               IXGBE_ETHERTYPE_FLOW_CTRL;
+       ethertype_filter.etqs = 0;
+       ethertype_filter.conf = TRUE;
+       i = ixgbe_ethertype_filter_insert(filter_info,
+                                         &ethertype_filter);
+       if (i < 0) {
                RTE_LOG(ERR, PMD, "Cannot find an unused ether type filter"
                        " entity for flow control.\n");
                return;
@@ -660,6 +671,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct ixgbe_vf_info *vfinfo =
                *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
+       struct rte_pmd_ixgbe_mb_event_param cb_param;
 
        retval = ixgbe_read_mbx(hw, msgbuf, mbx_size, vf);
        if (retval) {
@@ -674,27 +686,54 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
        /* flush the ack before we write any messages back */
        IXGBE_WRITE_FLUSH(hw);
 
+       /**
+        * initialise structure to send to user application
+        * will return response from user in retval field
+        */
+       cb_param.retval = RTE_PMD_IXGBE_MB_EVENT_PROCEED;
+       cb_param.vfid = vf;
+       cb_param.msg_type = msgbuf[0] & 0xFFFF;
+       cb_param.msg = (void *)msgbuf;
+
        /* perform VF reset */
        if (msgbuf[0] == IXGBE_VF_RESET) {
                int ret = ixgbe_vf_reset(dev, vf, msgbuf);
 
                vfinfo[vf].clear_to_send = true;
+
+               /* notify application about VF reset */
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_VF_MBOX, &cb_param);
                return ret;
        }
 
+       /**
+        * ask user application if we allowed to perform those functions
+        * if we get cb_param.retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED
+        * then business as usual,
+        * if 0, do nothing and send ACK to VF
+        * if cb_param.retval > 1, do nothing and send NAK to VF
+        */
+       _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_VF_MBOX, &cb_param);
+
+       retval = cb_param.retval;
+
        /* check & process VF to PF mailbox message */
        switch ((msgbuf[0] & 0xFFFF)) {
        case IXGBE_VF_SET_MAC_ADDR:
-               retval = ixgbe_vf_set_mac_addr(dev, vf, msgbuf);
+               if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED)
+                       retval = ixgbe_vf_set_mac_addr(dev, vf, msgbuf);
                break;
        case IXGBE_VF_SET_MULTICAST:
-               retval = ixgbe_vf_set_multicast(dev, vf, msgbuf);
+               if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED)
+                       retval = ixgbe_vf_set_multicast(dev, vf, msgbuf);
                break;
        case IXGBE_VF_SET_LPE:
-               retval = ixgbe_set_vf_lpe(dev, vf, msgbuf);
+               if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED)
+                       retval = ixgbe_set_vf_lpe(dev, vf, msgbuf);
                break;
        case IXGBE_VF_SET_VLAN:
-               retval = ixgbe_vf_set_vlan(dev, vf, msgbuf);
+               if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED)
+                       retval = ixgbe_vf_set_vlan(dev, vf, msgbuf);
                break;
        case IXGBE_VF_API_NEGOTIATE:
                retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf);
@@ -704,7 +743,8 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf)
                msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE;
                break;
        case IXGBE_VF_UPDATE_XCAST_MODE:
-               retval = ixgbe_set_vf_mc_promisc(dev, vf, msgbuf);
+               if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED)
+                       retval = ixgbe_set_vf_mc_promisc(dev, vf, msgbuf);
                break;
        default:
                PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]);
index c7457a6..2aa4820 100644 (file)
@@ -41,7 +41,7 @@ struct reg_info {
        uint32_t count;
        uint32_t stride;
        const char *name;
-} reg_info;
+};
 
 static const struct reg_info ixgbe_regs_general[] = {
        {IXGBE_CTRL, 1, 1, "IXGBE_CTRL"},
@@ -56,10 +56,10 @@ static const struct reg_info ixgbe_regs_general[] = {
 };
 
 static const struct reg_info ixgbevf_regs_general[] = {
-       {IXGBE_CTRL, 1, 1, "IXGBE_CTRL"},
-       {IXGBE_STATUS, 1, 1, "IXGBE_STATUS"},
+       {IXGBE_VFCTRL, 1, 1, "IXGBE_VFCTRL"},
+       {IXGBE_VFSTATUS, 1, 1, "IXGBE_VFSTATUS"},
        {IXGBE_VFLINKS, 1, 1, "IXGBE_VFLINKS"},
-       {IXGBE_FRTIMER, 1, 1, "IXGBE_FRTIMER"},
+       {IXGBE_VFFRTIMER, 1, 1, "IXGBE_VFFRTIMER"},
        {IXGBE_VFMAILBOX, 1, 1, "IXGBE_VFMAILBOX"},
        {IXGBE_VFMBMEM, 16, 4, "IXGBE_VFMBMEM"},
        {IXGBE_VFRXMEMWRAP, 1, 1, "IXGBE_VFRXMEMWRAP"},
@@ -145,17 +145,17 @@ static const struct reg_info ixgbe_regs_rxdma[] = {
 };
 
 static const struct reg_info ixgbevf_regs_rxdma[] = {
-       {IXGBE_RDBAL(0), 8, 0x40, "IXGBE_RDBAL"},
-       {IXGBE_RDBAH(0), 8, 0x40, "IXGBE_RDBAH"},
-       {IXGBE_RDLEN(0), 8, 0x40, "IXGBE_RDLEN"},
-       {IXGBE_RDH(0), 8, 0x40, "IXGBE_RDH"},
-       {IXGBE_RDT(0), 8, 0x40, "IXGBE_RDT"},
-       {IXGBE_RXDCTL(0), 8, 0x40, "IXGBE_RXDCTL"},
-       {IXGBE_SRRCTL(0), 8, 0x40, "IXGBE_SRRCTL"},
+       {IXGBE_VFRDBAL(0), 8, 0x40, "IXGBE_VFRDBAL"},
+       {IXGBE_VFRDBAH(0), 8, 0x40, "IXGBE_VFRDBAH"},
+       {IXGBE_VFRDLEN(0), 8, 0x40, "IXGBE_VFRDLEN"},
+       {IXGBE_VFRDH(0), 8, 0x40, "IXGBE_VFRDH"},
+       {IXGBE_VFRDT(0), 8, 0x40, "IXGBE_VFRDT"},
+       {IXGBE_VFRXDCTL(0), 8, 0x40, "IXGBE_VFRXDCTL"},
+       {IXGBE_VFSRRCTL(0), 8, 0x40, "IXGBE_VFSRRCTL"},
        {IXGBE_VFPSRTYPE, 1, 1, "IXGBE_VFPSRTYPE"},
        {IXGBE_VFRSCCTL(0), 8, 0x40, "IXGBE_VFRSCCTL"},
-       {IXGBE_PVFDCA_RXCTRL(0), 8, 0x40, "IXGBE_PVFDCA_RXCTRL"},
-       {IXGBE_PVFDCA_TXCTRL(0), 8, 0x40, "IXGBE_PVFDCA_TXCTRL"},
+       {IXGBE_VFDCA_RXCTRL(0), 8, 0x40, "IXGBE_VFDCA_RXCTRL"},
+       {IXGBE_VFDCA_TXCTRL(0), 8, 0x40, "IXGBE_VFDCA_TXCTRL"},
        {0, 0, 0, ""}
 };
 
@@ -193,14 +193,14 @@ static struct reg_info ixgbe_regs_tx[] = {
 };
 
 static const struct reg_info ixgbevf_regs_tx[] = {
-       {IXGBE_TDBAL(0), 4, 0x40, "IXGBE_TDBAL"},
-       {IXGBE_TDBAH(0), 4, 0x40, "IXGBE_TDBAH"},
-       {IXGBE_TDLEN(0), 4, 0x40, "IXGBE_TDLEN"},
-       {IXGBE_TDH(0), 4, 0x40, "IXGBE_TDH"},
-       {IXGBE_TDT(0), 4, 0x40, "IXGBE_TDT"},
-       {IXGBE_TXDCTL(0), 4, 0x40, "IXGBE_TXDCTL"},
-       {IXGBE_TDWBAL(0), 4, 0x40, "IXGBE_TDWBAL"},
-       {IXGBE_TDWBAH(0), 4, 0x40, "IXGBE_TDWBAH"},
+       {IXGBE_VFTDBAL(0), 4, 0x40, "IXGBE_VFTDBAL"},
+       {IXGBE_VFTDBAH(0), 4, 0x40, "IXGBE_VFTDBAH"},
+       {IXGBE_VFTDLEN(0), 4, 0x40, "IXGBE_VFTDLEN"},
+       {IXGBE_VFTDH(0), 4, 0x40, "IXGBE_VFTDH"},
+       {IXGBE_VFTDT(0), 4, 0x40, "IXGBE_VFTDT"},
+       {IXGBE_VFTXDCTL(0), 4, 0x40, "IXGBE_VFTXDCTL"},
+       {IXGBE_VFTDWBAL(0), 4, 0x40, "IXGBE_VFTDWBAL"},
+       {IXGBE_VFTDWBAH(0), 4, 0x40, "IXGBE_VFTDWBAH"},
        {0, 0, 0, ""}
 };
 
index a018e92..36f1c02 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
@@ -58,7 +58,6 @@
 #include <rte_lcore.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
-#include <rte_ring.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
@@ -71,6 +70,7 @@
 #include <rte_string_fns.h>
 #include <rte_errno.h>
 #include <rte_ip.h>
+#include <rte_net.h>
 
 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
                PKT_TX_IP_CKSUM |                \
                PKT_TX_L4_MASK |                 \
                PKT_TX_TCP_SEG |                 \
+               PKT_TX_MACSEC |                  \
                PKT_TX_OUTER_IP_CKSUM)
 
+#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
+               (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
+
 #if 1
 #define RTE_PMD_USE_PREFETCH
 #endif
@@ -322,7 +326,7 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
        /* update tail pointer */
        rte_wmb();
-       IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
+       IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
 
        return nb_pkts;
 }
@@ -520,6 +524,8 @@ tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
                cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
        if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
                cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
+       if (ol_flags & PKT_TX_MACSEC)
+               cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
        return cmdtype;
 }
 
@@ -898,12 +904,63 @@ end_of_tx:
        PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
                   (unsigned) txq->port_id, (unsigned) txq->queue_id,
                   (unsigned) tx_id, (unsigned) nb_tx);
-       IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
+       IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
        txq->tx_tail = tx_id;
 
        return nb_tx;
 }
 
+/*********************************************************************
+ *
+ *  TX prep functions
+ *
+ **********************************************************************/
+uint16_t
+ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+       int i, ret;
+       uint64_t ol_flags;
+       struct rte_mbuf *m;
+       struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+
+       for (i = 0; i < nb_pkts; i++) {
+               m = tx_pkts[i];
+               ol_flags = m->ol_flags;
+
+               /**
+                * Check if packet meets requirements for number of segments
+                *
+                * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
+                *       non-TSO
+                */
+
+               if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
+                       rte_errno = -EINVAL;
+                       return i;
+               }
+
+               if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
+                       rte_errno = -ENOTSUP;
+                       return i;
+               }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+               ret = rte_validate_tx_offload(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+#endif
+               ret = rte_net_intel_cksum_prepare(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+       }
+
+       return i;
+}
+
 /*********************************************************************
  *
  *  RX functions
@@ -1345,7 +1402,9 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status)
         * Bit 30: L4I, L4I integrity error
         */
        static uint64_t error_to_pkt_flags_map[4] = {
-               0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
+               PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
+               PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
+               PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
                PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
        };
        pkt_flags = error_to_pkt_flags_map[(rx_status >>
@@ -1580,7 +1639,8 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
                /* update tail pointer */
                rte_wmb();
-               IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
+               IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
+                                           cur_free_trigger);
        }
 
        if (rxq->rx_tail >= rxq->nb_rx_desc)
@@ -1984,8 +2044,8 @@ next_desc:
 
                        if (!ixgbe_rx_alloc_bufs(rxq, false)) {
                                rte_wmb();
-                               IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
-                                                   next_rdt);
+                               IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
+                                                           next_rdt);
                                nb_hold -= rxq->rx_free_thresh;
                        } else {
                                PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
@@ -2156,7 +2216,7 @@ next_desc:
                           rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
 
                rte_wmb();
-               IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
+               IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
                nb_hold = 0;
        }
 
@@ -2281,6 +2341,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
        if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
                        && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
                PMD_INIT_LOG(DEBUG, "Using simple tx code path");
+               dev->tx_pkt_prepare = NULL;
 #ifdef RTE_IXGBE_INC_VECTOR
                if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
                                (rte_eal_process_type() != RTE_PROC_PRIMARY ||
@@ -2301,6 +2362,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
                                (unsigned long)txq->tx_rs_thresh,
                                (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
                dev->tx_pkt_burst = ixgbe_xmit_pkts;
+               dev->tx_pkt_prepare = ixgbe_prep_pkts;
        }
 }
 
@@ -2584,7 +2646,6 @@ check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
         *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
         *   rxq->rx_free_thresh < rxq->nb_rx_desc
         *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
-        *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
         * Scattered packets are not supported.  This should be checked
         * outside of this function.
         */
@@ -2606,15 +2667,6 @@ check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
                             "rxq->rx_free_thresh=%d",
                             rxq->nb_rx_desc, rxq->rx_free_thresh);
                ret = -EINVAL;
-       } else if (!(rxq->nb_rx_desc <
-              (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
-               PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
-                            "rxq->nb_rx_desc=%d, "
-                            "IXGBE_MAX_RING_DESC=%d, "
-                            "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
-                            rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
-                            RTE_PMD_IXGBE_RX_MAX_BURST);
-               ret = -EINVAL;
        }
 
        return ret;
@@ -2631,12 +2683,7 @@ ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
        /*
         * By default, the Rx queue setup function allocates enough memory for
         * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
-        * extra memory at the end of the descriptor ring to be zero'd out. A
-        * pre-condition for using the Rx burst bulk alloc function is that the
-        * number of descriptors is less than or equal to
-        * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
-        * constraints here to see if we need to zero out memory after the end
-        * of the H/W descriptor ring.
+        * extra memory at the end of the descriptor ring to be zero'd out.
         */
        if (adapter->rx_bulk_alloc_allowed)
                /* zero out extra memory */
@@ -3312,15 +3359,16 @@ ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
 
 /**
  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
- * @hw: pointer to hardware structure
+ * @dev: pointer to eth_dev structure
  * @dcb_config: pointer to ixgbe_dcb_config structure
  */
 static void
-ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
+ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
                       struct ixgbe_dcb_config *dcb_config)
 {
        uint32_t reg;
        uint32_t q;
+       struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
        PMD_INIT_FUNC_TRACE();
        if (hw->mac.type != ixgbe_mac_82598EB) {
@@ -3339,10 +3387,17 @@ ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
                        reg |= IXGBE_MTQC_VT_ENA;
                IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
 
-               /* Disable drop for all queues */
-               for (q = 0; q < 128; q++)
-                       IXGBE_WRITE_REG(hw, IXGBE_QDE,
-                               (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
+               if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
+                       /* Disable drop for all queues in VMDQ mode*/
+                       for (q = 0; q < 128; q++)
+                               IXGBE_WRITE_REG(hw, IXGBE_QDE,
+                                               (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
+               } else {
+                       /* Enable drop for all queues in SRIOV mode */
+                       for (q = 0; q < 128; q++)
+                               IXGBE_WRITE_REG(hw, IXGBE_QDE,
+                                               (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE));
+               }
 
                /* Enable the Tx desc arbiter */
                reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
@@ -3377,7 +3432,7 @@ ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
                        vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
 
        /*Configure general DCB TX parameters*/
-       ixgbe_dcb_tx_hw_config(hw, dcb_config);
+       ixgbe_dcb_tx_hw_config(dev, dcb_config);
 }
 
 static void
@@ -3660,7 +3715,7 @@ ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
                /*get DCB TX configuration parameters from rte_eth_conf*/
                ixgbe_dcb_tx_config(dev, dcb_config);
                /*Configure general DCB TX parameters*/
-               ixgbe_dcb_tx_hw_config(hw, dcb_config);
+               ixgbe_dcb_tx_hw_config(dev, dcb_config);
                break;
        default:
                PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
@@ -3809,7 +3864,7 @@ void ixgbe_configure_dcb(struct rte_eth_dev *dev)
            (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
                return;
 
-       if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
+       if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
                return;
 
        /** Configure DCB hardware **/
@@ -4081,12 +4136,13 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
                case ETH_MQ_RX_VMDQ_RSS:
                        ixgbe_config_vf_rss(dev);
                        break;
-
-               /* FIXME if support DCB/RSS together with VMDq & SRIOV */
                case ETH_MQ_RX_VMDQ_DCB:
+                       ixgbe_vmdq_dcb_configure(dev);
+                       break;
+               /* FIXME if support DCB/RSS together with VMDq & SRIOV */
                case ETH_MQ_RX_VMDQ_DCB_RSS:
                        PMD_INIT_LOG(ERR,
-                               "Could not support DCB with VMDq & SRIOV");
+                               "Could not support DCB/RSS with VMDq & SRIOV");
                        return -1;
                default:
                        ixgbe_config_vf_default(dev);
@@ -4913,8 +4969,7 @@ ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
                        rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
                } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
                if (!poll_ms)
-            // TREX_PATCH - changed log level from ERR to DEBUG
-                       PMD_INIT_LOG(DEBUG, "Could not disable Rx Queue %d",
+                       PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
                                     rx_queue_id);
 
                rte_delay_us(RTE_IXGBE_WAIT_100_US);
index 2608b36..739fd19 100644 (file)
@@ -67,7 +67,7 @@
 #define RTE_IXGBE_MAX_RX_BURST          RTE_IXGBE_RXQ_REARM_THRESH
 #endif
 
-#define RX_RING_SZ ((IXGBE_MAX_RING_DESC + RTE_IXGBE_DESCS_PER_LOOP - 1) * \
+#define RX_RING_SZ ((IXGBE_MAX_RING_DESC + RTE_PMD_IXGBE_RX_MAX_BURST) * \
                    sizeof(union ixgbe_adv_rx_desc))
 
 #ifdef RTE_PMD_PACKET_PREFETCH
@@ -80,6 +80,8 @@
 #define RTE_IXGBE_WAIT_100_US               100
 #define RTE_IXGBE_VMTXSW_REGISTER_COUNT     2
 
+#define IXGBE_TX_MAX_SEG                    40
+
 #define IXGBE_PACKET_TYPE_MASK_82599        0X7F
 #define IXGBE_PACKET_TYPE_MASK_X550         0X10FF
 #define IXGBE_PACKET_TYPE_MASK_TUNNEL       0XFF
index 62b8201..a3473b9 100644 (file)
@@ -204,8 +204,20 @@ _ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
                return;
 
        /* free all mbufs that are valid in the ring */
-       for (i = rxq->rx_tail; i != rxq->rxrearm_start; i = (i + 1) & mask)
-               rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+       if (rxq->rxrearm_nb == 0) {
+               for (i = 0; i < rxq->nb_rx_desc; i++) {
+                       if (rxq->sw_ring[i].mbuf != NULL)
+                               rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+               }
+       } else {
+               for (i = rxq->rx_tail;
+                    i != rxq->rxrearm_start;
+                    i = (i + 1) & mask) {
+                       if (rxq->sw_ring[i].mbuf != NULL)
+                               rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+               }
+       }
+
        rxq->rxrearm_nb = rxq->nb_rx_desc;
 
        /* set all entries to NULL */
@@ -309,12 +321,8 @@ ixgbe_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
        if (fconf->mode != RTE_FDIR_MODE_NONE)
                return -1;
 
-       /*
-        * - no csum error report support
-        * - no header split support
-        */
-       if (rxmode->hw_ip_checksum == 1 ||
-           rxmode->header_split == 1)
+       /* no header split support */
+       if (rxmode->header_split == 1)
                return -1;
 
        return 0;
index 64a329e..f96cc85 100644 (file)
@@ -556,5 +556,11 @@ ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq)
 int __attribute__((cold))
 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
 {
+       struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
+
+       /* no csum error report support */
+       if (rxmode->hw_ip_checksum == 1)
+               return -1;
+
        return ixgbe_rx_vec_dev_conf_condition_check_default(dev);
 }
index 1c4fd7c..abbf284 100644 (file)
@@ -145,7 +145,7 @@ static inline void
 desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
        struct rte_mbuf **rx_pkts)
 {
-       __m128i ptype0, ptype1, vtag0, vtag1;
+       __m128i ptype0, ptype1, vtag0, vtag1, csum;
        union {
                uint16_t e[4];
                uint64_t dword;
@@ -156,24 +156,45 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
                        0x0000, 0x0000, 0x0000, 0x0000,
                        0x000F, 0x000F, 0x000F, 0x000F);
 
+       /* mask the lower byte of ol_flags */
+       const __m128i ol_flags_msk = _mm_set_epi16(
+                       0x0000, 0x0000, 0x0000, 0x0000,
+                       0x00FF, 0x00FF, 0x00FF, 0x00FF);
+
        /* map rss type to rss hash flag */
        const __m128i rss_flags = _mm_set_epi8(PKT_RX_FDIR, 0, 0, 0,
                        0, 0, 0, PKT_RX_RSS_HASH,
                        PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0,
                        PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0);
 
-       /* mask everything except vlan present bit */
-       const __m128i vlan_msk = _mm_set_epi16(
-                       0x0000, 0x0000,
-                       0x0000, 0x0000,
-                       IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
-                       IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP);
-       /* map vlan present (0x8) to ol_flags */
-       const __m128i vlan_map = _mm_set_epi8(
+       /* mask everything except vlan present and l4/ip csum error */
+       const __m128i vlan_csum_msk = _mm_set_epi16(
+               (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16,
+               (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16,
+               (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16,
+               (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16,
+               IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
+               IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP);
+       /* map vlan present (0x8), IPE (0x2), L4E (0x1) to ol_flags */
+       const __m128i vlan_csum_map_lo = _mm_set_epi8(
                0, 0, 0, 0,
-               0, 0, 0, vlan_flags,
+               vlan_flags | PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+               vlan_flags | PKT_RX_IP_CKSUM_BAD,
+               vlan_flags | PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
+               vlan_flags | PKT_RX_IP_CKSUM_GOOD,
                0, 0, 0, 0,
-               0, 0, 0, 0);
+               PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+               PKT_RX_IP_CKSUM_BAD,
+               PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
+               PKT_RX_IP_CKSUM_GOOD);
+
+       const __m128i vlan_csum_map_hi = _mm_set_epi8(
+               0, 0, 0, 0,
+               0, PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
+               PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t),
+               0, 0, 0, 0,
+               0, PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
+               PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t));
 
        ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);
        ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]);
@@ -185,8 +206,26 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
        ptype0 = _mm_shuffle_epi8(rss_flags, ptype0);
 
        vtag1 = _mm_unpacklo_epi32(vtag0, vtag1);
-       vtag1 = _mm_and_si128(vtag1, vlan_msk);
-       vtag1 = _mm_shuffle_epi8(vlan_map, vtag1);
+       vtag1 = _mm_and_si128(vtag1, vlan_csum_msk);
+
+       /* csum bits are in the most significant, to use shuffle we need to
+        * shift them. Change mask to 0xc000 to 0x0003.
+        */
+       csum = _mm_srli_epi16(vtag1, 14);
+
+       /* now or the most significant 64 bits containing the checksum
+        * flags with the vlan present flags.
+        */
+       csum = _mm_srli_si128(csum, 8);
+       vtag1 = _mm_or_si128(csum, vtag1);
+
+       /* convert VP, IPE, L4E to ol_flags */
+       vtag0 = _mm_shuffle_epi8(vlan_csum_map_hi, vtag1);
+       vtag0 = _mm_slli_epi16(vtag0, sizeof(uint8_t));
+
+       vtag1 = _mm_shuffle_epi8(vlan_csum_map_lo, vtag1);
+       vtag1 = _mm_and_si128(vtag1, ol_flags_msk);
+       vtag1 = _mm_or_si128(vtag0, vtag1);
 
        vtag1 = _mm_or_si128(ptype0, vtag1);
        vol.dword = _mm_cvtsi128_si64(vtag1);
@@ -210,7 +249,6 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
  * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
  *   numbers of DD bit
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
- * - don't support ol_flags for rss and csum err
  */
 static inline uint16_t
 _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -243,7 +281,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
         */
        rxdp = rxq->rx_ring + rxq->rx_tail;
 
-       _mm_prefetch((const void *)rxdp, _MM_HINT_T0);
+       rte_prefetch0(rxdp);
 
        /* See if we need to rearm the RX queue - gives the prefetch a bit
         * of time to act
@@ -305,6 +343,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                /* Read desc statuses backwards to avoid race condition */
                /* A.1 load 4 pkts desc */
                descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
+               rte_compiler_barrier();
 
                /* B.2 copy 2 mbuf point into rx_pkts  */
                _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
@@ -313,8 +352,10 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]);
 
                descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
+               rte_compiler_barrier();
                /* B.1 load 2 mbuf point */
                descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
+               rte_compiler_barrier();
                descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
 
                /* B.2 copy 2 mbuf point into rx_pkts  */
@@ -425,7 +466,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
  * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
  *   numbers of DD bit
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
- * - don't support ol_flags for rss and csum err
  */
 uint16_t
 ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -438,7 +478,6 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
  * vPMD receive routine that reassembles scattered packets
  *
  * Notice:
- * - don't support ol_flags for rss and csum err
  * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
  *   numbers of DD bit
diff --git a/src/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.h b/src/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.h
new file mode 100644 (file)
index 0000000..4d7b507
--- /dev/null
@@ -0,0 +1,412 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2016 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file rte_pmd_ixgbe.h
+ * ixgbe PMD specific functions.
+ *
+ **/
+
+#ifndef _PMD_IXGBE_H_
+#define _PMD_IXGBE_H_
+
+#include <rte_ethdev.h>
+
+/**
+ * Set the VF MAC address.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param vf
+ *   VF id.
+ * @param mac_addr
+ *   VF MAC address.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if *vf* or *mac_addr* is invalid.
+ */
+int rte_pmd_ixgbe_set_vf_mac_addr(uint8_t port, uint16_t vf,
+               struct ether_addr *mac_addr);
+
+/**
+ * Enable/Disable VF VLAN anti spoofing.
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf
+ *    VF on which to set VLAN anti spoofing.
+ * @param on
+ *    1 - Enable VFs VLAN anti spoofing.
+ *    0 - Disable VFs VLAN anti spoofing.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_ixgbe_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on);
+
+/**
+ * Enable/Disable VF MAC anti spoofing.
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf
+ *    VF on which to set MAC anti spoofing.
+ * @param on
+ *    1 - Enable VFs MAC anti spoofing.
+ *    0 - Disable VFs MAC anti spoofing.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_ixgbe_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on);
+
+/**
+ * Enable/Disable vf vlan insert
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf
+ *    ID specifying VF.
+ * @param vlan_id
+ *    0 - Disable VF's vlan insert.
+ *    n - Enable; n is inserted as the vlan id.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_ixgbe_set_vf_vlan_insert(uint8_t port, uint16_t vf,
+               uint16_t vlan_id);
+
+/**
+ * Enable/Disable tx loopback
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param on
+ *    1 - Enable tx loopback.
+ *    0 - Disable tx loopback.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_ixgbe_set_tx_loopback(uint8_t port, uint8_t on);
+
+/**
+ * set all queues drop enable bit
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param on
+ *    1 - set the queue drop enable bit for all pools.
+ *    0 - reset the queue drop enable bit for all pools.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_ixgbe_set_all_queues_drop_en(uint8_t port, uint8_t on);
+
+/**
+ * set drop enable bit in the VF split rx control register
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf
+ *    ID specifying VF.
+ * @param on
+ *    1 - set the drop enable bit in the split rx control register.
+ *    0 - reset the drop enable bit in the split rx control register.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+
+int rte_pmd_ixgbe_set_vf_split_drop_en(uint8_t port, uint16_t vf, uint8_t on);
+
+/**
+ * Enable/Disable vf vlan strip for all queues in a pool
+ *
+ * @param port
+ *    The port identifier of the Ethernet device.
+ * @param vf
+ *    ID specifying VF.
+ * @param on
+ *    1 - Enable VF's vlan strip on RX queues.
+ *    0 - Disable VF's vlan strip on RX queues.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int
+rte_pmd_ixgbe_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on);
+
+/**
+ * Enable MACsec offload.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param en
+ *    1 - Enable encryption (encrypt and add integrity signature).
+ *    0 - Disable encryption (only add integrity signature).
+ * @param rp
+ *    1 - Enable replay protection.
+ *    0 - Disable replay protection.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ */
+int rte_pmd_ixgbe_macsec_enable(uint8_t port, uint8_t en, uint8_t rp);
+
+/**
+ * Disable MACsec offload.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ */
+int rte_pmd_ixgbe_macsec_disable(uint8_t port);
+
+/**
+ * Configure Tx SC (Secure Connection).
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param mac
+ *   The MAC address on the local side.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ */
+int rte_pmd_ixgbe_macsec_config_txsc(uint8_t port, uint8_t *mac);
+
+/**
+ * Configure Rx SC (Secure Connection).
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param mac
+ *   The MAC address on the remote side.
+ * @param pi
+ *   The PI (port identifier) on the remote side.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ */
+int rte_pmd_ixgbe_macsec_config_rxsc(uint8_t port, uint8_t *mac, uint16_t pi);
+
+/**
+ * Enable Tx SA (Secure Association).
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param idx
+ *   The SA to be enabled (0 or 1).
+ * @param an
+ *   The association number on the local side.
+ * @param pn
+ *   The packet number on the local side.
+ * @param key
+ *   The key on the local side.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_ixgbe_macsec_select_txsa(uint8_t port, uint8_t idx, uint8_t an,
+               uint32_t pn, uint8_t *key);
+
+/**
+ * Enable Rx SA (Secure Association).
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param idx
+ *   The SA to be enabled (0 or 1)
+ * @param an
+ *   The association number on the remote side.
+ * @param pn
+ *   The packet number on the remote side.
+ * @param key
+ *   The key on the remote side.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_ixgbe_macsec_select_rxsa(uint8_t port, uint8_t idx, uint8_t an,
+               uint32_t pn, uint8_t *key);
+
+/**
+* Set RX L2 Filtering mode of a VF of an Ethernet device.
+*
+* @param port
+*   The port identifier of the Ethernet device.
+* @param vf
+*   VF id.
+* @param rx_mask
+*    The RX mode mask, which is one or more of accepting Untagged Packets,
+*    packets that match the PFUTA table, Broadcast and Multicast Promiscuous.
+*    ETH_VMDQ_ACCEPT_UNTAG,ETH_VMDQ_ACCEPT_HASH_UC,
+*    ETH_VMDQ_ACCEPT_BROADCAST and ETH_VMDQ_ACCEPT_MULTICAST will be used
+*    in rx_mode.
+* @param on
+*    1 - Enable a VF RX mode.
+*    0 - Disable a VF RX mode.
+* @return
+*   - (0) if successful.
+*   - (-ENOTSUP) if hardware doesn't support.
+*   - (-ENODEV) if *port_id* invalid.
+*   - (-EINVAL) if bad parameter.
+*/
+int
+rte_pmd_ixgbe_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mask, uint8_t on);
+
+/**
+* Enable or disable a VF traffic receive of an Ethernet device.
+*
+* @param port
+*   The port identifier of the Ethernet device.
+* @param vf
+*   VF id.
+* @param on
+*    1 - Enable a VF traffic receive.
+*    0 - Disable a VF traffic receive.
+* @return
+*   - (0) if successful.
+*   - (-ENOTSUP) if hardware doesn't support.
+*   - (-ENODEV) if *port_id* invalid.
+*   - (-EINVAL) if bad parameter.
+*/
+int
+rte_pmd_ixgbe_set_vf_rx(uint8_t port, uint16_t vf, uint8_t on);
+
+/**
+* Enable or disable a VF traffic transmit of the Ethernet device.
+*
+* @param port
+*   The port identifier of the Ethernet device.
+* @param vf
+*   VF id.
+* @param on
+*    1 - Enable a VF traffic transmit.
+*    0 - Disable a VF traffic transmit.
+* @return
+*   - (0) if successful.
+*   - (-ENODEV) if *port_id* invalid.
+*   - (-ENOTSUP) if hardware doesn't support.
+*   - (-EINVAL) if bad parameter.
+*/
+int
+rte_pmd_ixgbe_set_vf_tx(uint8_t port, uint16_t vf, uint8_t on);
+
+/**
+* Enable/Disable hardware VF VLAN filtering by an Ethernet device of
+* received VLAN packets tagged with a given VLAN Tag Identifier.
+*
+* @param port
+*   The port identifier of the Ethernet device.
+* @param vlan
+*   The VLAN Tag Identifier whose filtering must be enabled or disabled.
+* @param vf_mask
+*    Bitmap listing which VFs participate in the VLAN filtering.
+* @param vlan_on
+*    1 - Enable VFs VLAN filtering.
+*    0 - Disable VFs VLAN filtering.
+* @return
+*   - (0) if successful.
+*   - (-ENOTSUP) if hardware doesn't support.
+*   - (-ENODEV) if *port_id* invalid.
+*   - (-EINVAL) if bad parameter.
+*/
+int
+rte_pmd_ixgbe_set_vf_vlan_filter(uint8_t port, uint16_t vlan, uint64_t vf_mask, uint8_t vlan_on);
+
+/**
+ * Set the rate limitation for a vf on an Ethernet device.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param vf
+ *   VF id.
+ * @param tx_rate
+ *   The tx rate allocated from the total link speed for this VF id.
+ * @param q_msk
+ *   The queue mask which need to set the rate.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_pmd_ixgbe_set_vf_rate_limit(uint8_t port, uint16_t vf, uint16_t tx_rate, uint64_t q_msk);
+
+/**
+ * Response sent back to ixgbe driver from user app after callback
+ */
+enum rte_pmd_ixgbe_mb_event_rsp {
+       RTE_PMD_IXGBE_MB_EVENT_NOOP_ACK,  /**< skip mbox request and ACK */
+       RTE_PMD_IXGBE_MB_EVENT_NOOP_NACK, /**< skip mbox request and NACK */
+       RTE_PMD_IXGBE_MB_EVENT_PROCEED,  /**< proceed with mbox request  */
+       RTE_PMD_IXGBE_MB_EVENT_MAX       /**< max value of this enum */
+};
+
+/**
+ * Data sent to the user application when the callback is executed.
+ */
+struct rte_pmd_ixgbe_mb_event_param {
+       uint16_t vfid;     /**< Virtual Function number */
+       uint16_t msg_type; /**< VF to PF message type, defined in ixgbe_mbx.h */
+       uint16_t retval;   /**< return value */
+       void *msg;         /**< pointer to message */
+};
+#endif /* _PMD_IXGBE_H_ */
index 304c846..79efaaa 100644 (file)
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_ether.h>
 #include <rte_ethdev.h>
@@ -87,7 +87,7 @@
 #include <rte_alarm.h>
 #include <rte_memory.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* Generated configuration header. */
@@ -2961,19 +2961,25 @@ rxq_cq_to_pkt_type(uint32_t flags)
        if (flags & IBV_EXP_CQ_RX_TUNNEL_PACKET)
                pkt_type =
                        TRANSPOSE(flags,
-                                 IBV_EXP_CQ_RX_OUTER_IPV4_PACKET, RTE_PTYPE_L3_IPV4) |
+                                 IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
+                                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN) |
                        TRANSPOSE(flags,
-                                 IBV_EXP_CQ_RX_OUTER_IPV6_PACKET, RTE_PTYPE_L3_IPV6) |
+                                 IBV_EXP_CQ_RX_OUTER_IPV6_PACKET,
+                                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN) |
                        TRANSPOSE(flags,
-                                 IBV_EXP_CQ_RX_IPV4_PACKET, RTE_PTYPE_INNER_L3_IPV4) |
+                                 IBV_EXP_CQ_RX_IPV4_PACKET,
+                                 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN) |
                        TRANSPOSE(flags,
-                                 IBV_EXP_CQ_RX_IPV6_PACKET, RTE_PTYPE_INNER_L3_IPV6);
+                                 IBV_EXP_CQ_RX_IPV6_PACKET,
+                                 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN);
        else
                pkt_type =
                        TRANSPOSE(flags,
-                                 IBV_EXP_CQ_RX_IPV4_PACKET, RTE_PTYPE_L3_IPV4) |
+                                 IBV_EXP_CQ_RX_IPV4_PACKET,
+                                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN) |
                        TRANSPOSE(flags,
-                                 IBV_EXP_CQ_RX_IPV6_PACKET, RTE_PTYPE_L3_IPV6);
+                                 IBV_EXP_CQ_RX_IPV6_PACKET,
+                                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN);
        return pkt_type;
 }
 
@@ -2995,25 +3001,20 @@ rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t flags)
 
        if (rxq->csum)
                ol_flags |=
-                       TRANSPOSE(~flags,
+                       TRANSPOSE(flags,
                                  IBV_EXP_CQ_RX_IP_CSUM_OK,
-                                 PKT_RX_IP_CKSUM_BAD) |
-                       TRANSPOSE(~flags,
+                                 PKT_RX_IP_CKSUM_GOOD) |
+                       TRANSPOSE(flags,
                                  IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK,
-                                 PKT_RX_L4_CKSUM_BAD);
-       /*
-        * PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place
-        * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
-        * (its value is 0).
-        */
+                                 PKT_RX_L4_CKSUM_GOOD);
        if ((flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
                ol_flags |=
-                       TRANSPOSE(~flags,
+                       TRANSPOSE(flags,
                                  IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
-                                 PKT_RX_IP_CKSUM_BAD) |
-                       TRANSPOSE(~flags,
+                                 PKT_RX_IP_CKSUM_GOOD) |
+                       TRANSPOSE(flags,
                                  IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
-                                 PKT_RX_L4_CKSUM_BAD);
+                                 PKT_RX_L4_CKSUM_GOOD);
        return ol_flags;
 }
 
@@ -4426,6 +4427,8 @@ mlx4_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
        unsigned int max;
        char ifname[IF_NAMESIZE];
 
+       info->pci_dev = RTE_DEV_TO_PCI(dev->device);
+
        if (priv == NULL)
                return;
        priv_lock(priv);
@@ -4826,7 +4829,7 @@ end:
 }
 
 /**
- * DPDK callback to retrieve physical link information (unlocked version).
+ * DPDK callback to retrieve physical link information.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
@@ -4834,9 +4837,9 @@ end:
  *   Wait for request completion (ignored).
  */
 static int
-mlx4_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
+mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete)
 {
-       struct priv *priv = mlx4_get_priv(dev);
+       const struct priv *priv = mlx4_get_priv(dev);
        struct ethtool_cmd edata = {
                .cmd = ETHTOOL_GSET
        };
@@ -4844,6 +4847,8 @@ mlx4_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
        struct rte_eth_link dev_link;
        int link_speed = 0;
 
+       /* priv_lock() is not taken to allow concurrent calls. */
+
        if (priv == NULL)
                return -EINVAL;
        (void)wait_to_complete;
@@ -4878,28 +4883,6 @@ mlx4_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
        return -1;
 }
 
-/**
- * DPDK callback to retrieve physical link information.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param wait_to_complete
- *   Wait for request completion (ignored).
- */
-static int
-mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete)
-{
-       struct priv *priv = mlx4_get_priv(dev);
-       int ret;
-
-       if (priv == NULL)
-               return -EINVAL;
-       priv_lock(priv);
-       ret = mlx4_link_update_unlocked(dev, wait_to_complete);
-       priv_unlock(priv);
-       return ret;
-}
-
 /**
  * DPDK callback to change the MTU.
  *
@@ -5416,7 +5399,7 @@ priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev)
                struct rte_eth_link *link = &dev->data->dev_link;
 
                priv->pending_alarm = 0;
-               mlx4_link_update_unlocked(dev, 0);
+               mlx4_link_update(dev, 0);
                if (((link->link_speed == 0) && link->link_status) ||
                    ((link->link_speed != 0) && !link->link_status)) {
                        /* Inconsistent status, check again later. */
@@ -5448,7 +5431,7 @@ mlx4_dev_link_status_handler(void *arg)
        ret = priv_dev_link_status_handler(priv, dev);
        priv_unlock(priv);
        if (ret)
-               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
 }
 
 /**
@@ -5471,7 +5454,7 @@ mlx4_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg)
        ret = priv_dev_link_status_handler(priv, dev);
        priv_unlock(priv);
        if (ret)
-               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
 }
 
 /**
@@ -5544,7 +5527,7 @@ static struct eth_driver mlx4_driver;
  *   0 on success, negative errno value on failure.
  */
 static int
-mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 {
        struct ibv_device **list;
        struct ibv_device *ibv_dev;
@@ -5803,7 +5786,7 @@ mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 
                        snprintf(name, sizeof(name), "%s port %u",
                                 ibv_get_device_name(ibv_dev), port);
-                       eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI);
+                       eth_dev = rte_eth_dev_allocate(name);
                }
                if (eth_dev == NULL) {
                        ERROR("can not allocate rte ethdev");
@@ -5839,11 +5822,9 @@ mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                        eth_dev->rx_pkt_burst = mlx4_rx_burst_secondary_setup;
                } else {
                        eth_dev->data->dev_private = priv;
-                       eth_dev->data->rx_mbuf_alloc_failed = 0;
-                       eth_dev->data->mtu = ETHER_MTU;
                        eth_dev->data->mac_addrs = priv->mac;
                }
-               eth_dev->pci_dev = pci_dev;
+               eth_dev->device = &pci_dev->device;
 
                rte_eth_copy_pci_info(eth_dev, pci_dev);
 
@@ -5851,7 +5832,6 @@ mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 
                priv->dev = eth_dev;
                eth_dev->dev_ops = &mlx4_dev_ops;
-               TAILQ_INIT(&eth_dev->link_intr_cbs);
 
                /* Bring Ethernet device up. */
                DEBUG("forcing Ethernet interface up");
@@ -5911,9 +5891,11 @@ static const struct rte_pci_id mlx4_pci_id_map[] = {
 
 static struct eth_driver mlx4_driver = {
        .pci_drv = {
-               .name = MLX4_DRIVER_NAME,
+               .driver = {
+                       .name = MLX4_DRIVER_NAME
+               },
                .id_table = mlx4_pci_id_map,
-               .devinit = mlx4_pci_devinit,
+               .probe = mlx4_pci_probe,
                .drv_flags = RTE_PCI_DRV_INTR_LSC,
        },
        .dev_private_size = sizeof(struct priv)
@@ -5922,12 +5904,10 @@ static struct eth_driver mlx4_driver = {
 /**
  * Driver initialization routine.
  */
-static int
-rte_mlx4_pmd_init(const char *name, const char *args)
+RTE_INIT(rte_mlx4_pmd_init);
+static void
+rte_mlx4_pmd_init(void)
 {
-       (void)name;
-       (void)args;
-
        RTE_BUILD_BUG_ON(sizeof(wr_id_t) != sizeof(uint64_t));
        /*
         * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
@@ -5938,13 +5918,9 @@ rte_mlx4_pmd_init(const char *name, const char *args)
        setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
        ibv_fork_init();
        rte_eal_pci_register(&mlx4_driver.pci_drv);
-       return 0;
 }
 
-static struct rte_driver rte_mlx4_driver = {
-       .type = PMD_PDEV,
-       .init = rte_mlx4_pmd_init,
-};
-
-PMD_REGISTER_DRIVER(rte_mlx4_driver, mlx4);
-DRIVER_REGISTER_PCI_TABLE(mlx4, mlx4_pci_id_map);
+RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__);
+RTE_PMD_REGISTER_PCI_TABLE(net_mlx4, mlx4_pci_id_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_mlx4,
+       "* ib_uverbs & mlx4_en & mlx4_core & mlx4_ib");
index d0c7bc2..4c7505e 100644 (file)
@@ -96,7 +96,7 @@ enum {
        PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO = 0x1007,
 };
 
-#define MLX4_DRIVER_NAME "librte_pmd_mlx4"
+#define MLX4_DRIVER_NAME "net_mlx4"
 
 /* Bit-field manipulation. */
 #define BITFIELD_DECLARE(bf, type, size)                               \
index 7c07239..d4bd469 100644 (file)
@@ -181,9 +181,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
        }
        if (priv->reta_idx != NULL)
                rte_free(priv->reta_idx);
-
-    mlx5_stats_free(dev);
-
        priv_unlock(priv);
        memset(priv, 0, sizeof(*priv));
 }
@@ -202,6 +199,9 @@ static const struct eth_dev_ops mlx5_dev_ops = {
        .link_update = mlx5_link_update,
        .stats_get = mlx5_stats_get,
        .stats_reset = mlx5_stats_reset,
+       .xstats_get = mlx5_xstats_get,
+       .xstats_reset = mlx5_xstats_reset,
+       .xstats_get_names = mlx5_xstats_get_names,
        .dev_infos_get = mlx5_dev_infos_get,
        .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
        .vlan_filter_set = mlx5_vlan_filter_set,
@@ -257,7 +257,6 @@ mlx5_dev_idx(struct rte_pci_addr *pci_addr)
        return ret;
 }
 
-
 /**
  * Verify and store value for device argument.
  *
@@ -290,7 +289,7 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
        } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
                priv->txqs_inline = tmp;
        } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
-               priv->mps = !!tmp;
+               priv->mps &= !!tmp; /* Enable MPW only if HW supports */
        } else {
                WARN("%s: unknown parameter", key);
                return -EINVAL;
@@ -298,8 +297,6 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
        return 0;
 }
 
-
-
 /**
  * Parse device parameters.
  *
@@ -336,16 +333,16 @@ mlx5_args(struct priv *priv, struct rte_devargs *devargs)
                if (rte_kvargs_count(kvlist, params[i])) {
                        ret = rte_kvargs_process(kvlist, params[i],
                                                 mlx5_args_check, priv);
-                       if (ret != 0)
+                       if (ret != 0) {
+                               rte_kvargs_free(kvlist);
                                return ret;
+                       }
                }
        }
        rte_kvargs_free(kvlist);
        return 0;
 }
 
-
-
 static struct eth_driver mlx5_driver;
 
 /**
@@ -363,7 +360,7 @@ static struct eth_driver mlx5_driver;
  *   0 on success, negative errno value on failure.
  */
 static int
-mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 {
        struct ibv_device **list;
        struct ibv_device *ibv_dev;
@@ -374,13 +371,6 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
        unsigned int mps;
        int idx;
        int i;
-    static int ibv_was_init=0;
-
-    if (ibv_was_init==0) {
-        ibv_fork_init();
-        ibv_was_init=1;
-    }
-
 
        (void)pci_drv;
        assert(pci_drv == &mlx5_driver.pci_drv);
@@ -423,10 +413,26 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                sriov = ((pci_dev->id.device_id ==
                       PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) ||
                      (pci_dev->id.device_id ==
-                      PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF));
-               /* Multi-packet send is only supported by ConnectX-4 Lx PF. */
-               mps = (pci_dev->id.device_id ==
-                      PCI_DEVICE_ID_MELLANOX_CONNECTX4LX);
+                      PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) ||
+                     (pci_dev->id.device_id ==
+                      PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) ||
+                     (pci_dev->id.device_id ==
+                      PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF));
+               /*
+                * Multi-packet send is supported by ConnectX-4 Lx PF as well
+                * as all ConnectX-5 devices.
+                */
+               switch (pci_dev->id.device_id) {
+               case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX:
+               case PCI_DEVICE_ID_MELLANOX_CONNECTX5:
+               case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
+               case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX:
+               case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF:
+                       mps = 1;
+                       break;
+               default:
+                       mps = 0;
+               }
                INFO("PCI information matches, using device \"%s\""
                     " (SR-IOV: %s, MPS: %s)",
                     list[i]->name,
@@ -526,16 +532,7 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                priv->mtu = ETHER_MTU;
                priv->mps = mps; /* Enable MPW by default if supported. */
                priv->cqe_comp = 1; /* Enable compression by default. */
-
-
-               err = mlx5_args(priv, pci_dev->devargs);
-
-               /* TREX PATCH */
-               /* set for maximum performance default */
-               priv->txq_inline  =64;
-               priv->txqs_inline =4;
-
-
+               err = mlx5_args(priv, pci_dev->device.devargs);
                if (err) {
                        ERROR("failed to process device arguments: %s",
                              strerror(err));
@@ -562,8 +559,9 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size;
                /* Remove this check once DPDK supports larger/variable
                 * indirection tables. */
-               if (priv->ind_table_max_size > (unsigned int)RSS_INDIRECTION_TABLE_SIZE)
-                       priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE;
+               if (priv->ind_table_max_size >
+                               (unsigned int)ETH_RSS_RETA_SIZE_512)
+                       priv->ind_table_max_size = ETH_RSS_RETA_SIZE_512;
                DEBUG("maximum RX indirection table size is %u",
                      priv->ind_table_max_size);
                priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap &
@@ -641,7 +639,7 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 
                        snprintf(name, sizeof(name), "%s port %u",
                                 ibv_get_device_name(ibv_dev), port);
-                       eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI);
+                       eth_dev = rte_eth_dev_allocate(name);
                }
                if (eth_dev == NULL) {
                        ERROR("can not allocate rte ethdev");
@@ -676,22 +674,19 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                        eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup;
                } else {
                        eth_dev->data->dev_private = priv;
-                       eth_dev->data->rx_mbuf_alloc_failed = 0;
-                       eth_dev->data->mtu = ETHER_MTU;
                        eth_dev->data->mac_addrs = priv->mac;
                }
 
-               eth_dev->pci_dev = pci_dev;
+               eth_dev->device = &pci_dev->device;
                rte_eth_copy_pci_info(eth_dev, pci_dev);
                eth_dev->driver = &mlx5_driver;
                priv->dev = eth_dev;
                eth_dev->dev_ops = &mlx5_dev_ops;
 
-               TAILQ_INIT(&eth_dev->link_intr_cbs);
-
                /* Bring Ethernet device up. */
                DEBUG("forcing Ethernet interface up");
                priv_set_flags(priv, ~IFF_UP, IFF_UP);
+               mlx5_link_update(priv->dev, 1);
                continue;
 
 port_error:
@@ -745,521 +740,21 @@ static const struct rte_pci_id mlx5_pci_id_map[] = {
                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
                               PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)
        },
-       {
-               .vendor_id = 0
-       }
-};
-
-static struct eth_driver mlx5_driver = {
-       .pci_drv = {
-               .name = MLX5_DRIVER_NAME,
-               .id_table = mlx5_pci_id_map,
-               .devinit = mlx5_pci_devinit,
-               .drv_flags = RTE_PCI_DRV_INTR_LSC,
-       },
-       .dev_private_size = sizeof(struct priv)
-};
-
-/**
- * Driver initialization routine.
- */
-static int
-rte_mlx5_pmd_init(const char *name, const char *args)
-{
-       (void)name;
-       (void)args;
-       /*
-        * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
-        * huge pages. Calling ibv_fork_init() during init allows
-        * applications to use fork() safely for purposes other than
-        * using this PMD, which is not supported in forked processes.
-        */
-       setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
-       rte_eal_pci_register(&mlx5_driver.pci_drv);
-       return 0;
-}
-
-static struct rte_driver rte_mlx5_driver = {
-       .type = PMD_PDEV,
-       .init = rte_mlx5_pmd_init,
-};
-
-PMD_REGISTER_DRIVER(rte_mlx5_driver, mlx5);
-DRIVER_REGISTER_PCI_TABLE(mlx5, mlx5_pci_id_map);
-
-
-
-
-
-
-#if 0
-/**
- * Verify and store value for device argument.
- *
- * @param[in] key
- *   Key argument to verify.
- * @param[in] val
- *   Value associated with key.
- * @param opaque
- *   User data.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-static int
-mlx5_args_check(const char *key, const char *val, void *opaque)
-{
-       struct priv *priv = opaque;
-       unsigned long tmp;
-
-       errno = 0;
-       tmp = strtoul(val, NULL, 0);
-       if (errno) {
-               WARN("%s: \"%s\" is not a valid integer", key, val);
-               return errno;
-       }
-       if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
-               priv->cqe_comp = !!tmp;
-       } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) {
-               priv->txq_inline = tmp;
-       } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
-               priv->txqs_inline = tmp;
-       } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
-               priv->mps = !!tmp;
-       } else {
-               WARN("%s: unknown parameter", key);
-               return -EINVAL;
-       }
-       return 0;
-}
-
-/**
- * Parse device parameters.
- *
- * @param priv
- *   Pointer to private structure.
- * @param devargs
- *   Device arguments structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-mlx5_args(struct priv *priv, struct rte_devargs *devargs)
-{
-       const char **params = (const char *[]){
-               MLX5_RXQ_CQE_COMP_EN,
-               MLX5_TXQ_INLINE,
-               MLX5_TXQS_MIN_INLINE,
-               MLX5_TXQ_MPW_EN,
-               NULL,
-       };
-       struct rte_kvargs *kvlist;
-       int ret = 0;
-       int i;
-
-       if (devargs == NULL)
-               return 0;
-       /* Following UGLY cast is done to pass checkpatch. */
-       kvlist = rte_kvargs_parse(devargs->args, params);
-       if (kvlist == NULL)
-               return 0;
-       /* Process parameters. */
-       for (i = 0; (params[i] != NULL); ++i) {
-               if (rte_kvargs_count(kvlist, params[i])) {
-                       ret = rte_kvargs_process(kvlist, params[i],
-                                                mlx5_args_check, priv);
-                       if (ret != 0)
-                               return ret;
-               }
-       }
-       rte_kvargs_free(kvlist);
-       return 0;
-}
-
-static struct eth_driver mlx5_driver;
-
-/**
- * DPDK callback to register a PCI device.
- *
- * This function creates an Ethernet device for each port of a given
- * PCI device.
- *
- * @param[in] pci_drv
- *   PCI driver structure (mlx5_driver).
- * @param[in] pci_dev
- *   PCI device information.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-static int
-mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
-{
-       struct ibv_device **list;
-       struct ibv_device *ibv_dev;
-       int err = 0;
-       struct ibv_context *attr_ctx = NULL;
-       struct ibv_device_attr device_attr;
-       unsigned int sriov;
-       unsigned int mps;
-       int idx;
-       int i;
-
-       (void)pci_drv;
-       assert(pci_drv == &mlx5_driver.pci_drv);
-       /* Get mlx5_dev[] index. */
-       idx = mlx5_dev_idx(&pci_dev->addr);
-       if (idx == -1) {
-               ERROR("this driver cannot support any more adapters");
-               return -ENOMEM;
-       }
-       DEBUG("using driver device index %d", idx);
-
-       /* Save PCI address. */
-       mlx5_dev[idx].pci_addr = pci_dev->addr;
-       list = ibv_get_device_list(&i);
-       if (list == NULL) {
-               assert(errno);
-               if (errno == ENOSYS) {
-                       WARN("cannot list devices, is ib_uverbs loaded?");
-                       return 0;
-               }
-               return -errno;
-       }
-       assert(i >= 0);
-       /*
-        * For each listed device, check related sysfs entry against
-        * the provided PCI ID.
-        */
-       while (i != 0) {
-               struct rte_pci_addr pci_addr;
-
-               --i;
-               DEBUG("checking device \"%s\"", list[i]->name);
-               if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr))
-                       continue;
-               if ((pci_dev->addr.domain != pci_addr.domain) ||
-                   (pci_dev->addr.bus != pci_addr.bus) ||
-                   (pci_dev->addr.devid != pci_addr.devid) ||
-                   (pci_dev->addr.function != pci_addr.function))
-                       continue;
-               sriov = ((pci_dev->id.device_id ==
-                      PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) ||
-                     (pci_dev->id.device_id ==
-                      PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF));
-               /* Multi-packet send is only supported by ConnectX-4 Lx PF. */
-               mps = (pci_dev->id.device_id ==
-                      PCI_DEVICE_ID_MELLANOX_CONNECTX4LX);
-               INFO("PCI information matches, using device \"%s\""
-                    " (SR-IOV: %s, MPS: %s)",
-                    list[i]->name,
-                    sriov ? "true" : "false",
-                    mps ? "true" : "false");
-               attr_ctx = ibv_open_device(list[i]);
-               err = errno;
-               break;
-       }
-       if (attr_ctx == NULL) {
-               ibv_free_device_list(list);
-               switch (err) {
-               case 0:
-                       WARN("cannot access device, is mlx5_ib loaded?");
-                       return 0;
-               case EINVAL:
-                       WARN("cannot use device, are drivers up to date?");
-                       return 0;
-               }
-               assert(err > 0);
-               return -err;
-       }
-       ibv_dev = list[i];
-
-       DEBUG("device opened");
-       if (ibv_query_device(attr_ctx, &device_attr))
-               goto error;
-       INFO("%u port(s) detected", device_attr.phys_port_cnt);
-
-       for (i = 0; i < device_attr.phys_port_cnt; i++) {
-               uint32_t port = i + 1; /* ports are indexed from one */
-               uint32_t test = (1 << i);
-               struct ibv_context *ctx = NULL;
-               struct ibv_port_attr port_attr;
-               struct ibv_pd *pd = NULL;
-               struct priv *priv = NULL;
-               struct rte_eth_dev *eth_dev;
-               struct ibv_exp_device_attr exp_device_attr;
-               struct ether_addr mac;
-               uint16_t num_vfs = 0;
-
-               exp_device_attr.comp_mask =
-                       IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS |
-                       IBV_EXP_DEVICE_ATTR_RX_HASH |
-                       IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS |
-                       IBV_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN |
-                       0;
-
-               DEBUG("using port %u (%08" PRIx32 ")", port, test);
-
-               ctx = ibv_open_device(ibv_dev);
-               if (ctx == NULL)
-                       goto port_error;
-
-               /* Check port status. */
-               err = ibv_query_port(ctx, port, &port_attr);
-               if (err) {
-                       ERROR("port query failed: %s", strerror(err));
-                       goto port_error;
-               }
-
-               if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
-                       ERROR("port %d is not configured in Ethernet mode",
-                             port);
-                       goto port_error;
-               }
-
-               if (port_attr.state != IBV_PORT_ACTIVE)
-                       DEBUG("port %d is not active: \"%s\" (%d)",
-                             port, ibv_port_state_str(port_attr.state),
-                             port_attr.state);
-
-               /* Allocate protection domain. */
-               pd = ibv_alloc_pd(ctx);
-               if (pd == NULL) {
-                       ERROR("PD allocation failure");
-                       err = ENOMEM;
-                       goto port_error;
-               }
-
-               mlx5_dev[idx].ports |= test;
-
-               /* from rte_ethdev.c */
-               priv = rte_zmalloc("ethdev private structure",
-                                  sizeof(*priv),
-                                  RTE_CACHE_LINE_SIZE);
-               if (priv == NULL) {
-                       ERROR("priv allocation failure");
-                       err = ENOMEM;
-                       goto port_error;
-               }
-
-               priv->ctx = ctx;
-               priv->device_attr = device_attr;
-               priv->port = port;
-               priv->pd = pd;
-               priv->mtu = ETHER_MTU;
-               priv->mps = mps; /* Enable MPW by default if supported. */
-               priv->cqe_comp = 1; /* Enable compression by default. */
-               err = mlx5_args(priv, pci_dev->device.devargs);
-               if (err) {
-                       ERROR("failed to process device arguments: %s",
-                             strerror(err));
-                       goto port_error;
-               }
-               if (ibv_exp_query_device(ctx, &exp_device_attr)) {
-                       ERROR("ibv_exp_query_device() failed");
-                       goto port_error;
-               }
-
-               priv->hw_csum =
-                       ((exp_device_attr.exp_device_cap_flags &
-                         IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) &&
-                        (exp_device_attr.exp_device_cap_flags &
-                         IBV_EXP_DEVICE_RX_CSUM_IP_PKT));
-               DEBUG("checksum offloading is %ssupported",
-                     (priv->hw_csum ? "" : "not "));
-
-               priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags &
-                                        IBV_EXP_DEVICE_VXLAN_SUPPORT);
-               DEBUG("L2 tunnel checksum offloads are %ssupported",
-                     (priv->hw_csum_l2tun ? "" : "not "));
-
-               priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size;
-               /* Remove this check once DPDK supports larger/variable
-                * indirection tables. */
-               if (priv->ind_table_max_size > (unsigned int)RSS_INDIRECTION_TABLE_SIZE)
-                       priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE;
-               DEBUG("maximum RX indirection table size is %u",
-                     priv->ind_table_max_size);
-               priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap &
-                                        IBV_EXP_RECEIVE_WQ_CVLAN_STRIP);
-               DEBUG("VLAN stripping is %ssupported",
-                     (priv->hw_vlan_strip ? "" : "not "));
-
-               priv->hw_fcs_strip = !!(exp_device_attr.exp_device_cap_flags &
-                                       IBV_EXP_DEVICE_SCATTER_FCS);
-               DEBUG("FCS stripping configuration is %ssupported",
-                     (priv->hw_fcs_strip ? "" : "not "));
-
-               priv->hw_padding = !!exp_device_attr.rx_pad_end_addr_align;
-               DEBUG("hardware RX end alignment padding is %ssupported",
-                     (priv->hw_padding ? "" : "not "));
-
-               priv_get_num_vfs(priv, &num_vfs);
-               priv->sriov = (num_vfs || sriov);
-               if (priv->mps && !mps) {
-                       ERROR("multi-packet send not supported on this device"
-                             " (" MLX5_TXQ_MPW_EN ")");
-                       err = ENOTSUP;
-                       goto port_error;
-               }
-               /* Allocate and register default RSS hash keys. */
-               priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
-                                           sizeof((*priv->rss_conf)[0]), 0);
-               if (priv->rss_conf == NULL) {
-                       err = ENOMEM;
-                       goto port_error;
-               }
-               err = rss_hash_rss_conf_new_key(priv,
-                                               rss_hash_default_key,
-                                               rss_hash_default_key_len,
-                                               ETH_RSS_PROTO_MASK);
-               if (err)
-                       goto port_error;
-               /* Configure the first MAC address by default. */
-               if (priv_get_mac(priv, &mac.addr_bytes)) {
-                       ERROR("cannot get MAC address, is mlx5_en loaded?"
-                             " (errno: %s)", strerror(errno));
-                       goto port_error;
-               }
-               INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
-                    priv->port,
-                    mac.addr_bytes[0], mac.addr_bytes[1],
-                    mac.addr_bytes[2], mac.addr_bytes[3],
-                    mac.addr_bytes[4], mac.addr_bytes[5]);
-               /* Register MAC address. */
-               claim_zero(priv_mac_addr_add(priv, 0,
-                                            (const uint8_t (*)[ETHER_ADDR_LEN])
-                                            mac.addr_bytes));
-               /* Initialize FD filters list. */
-               err = fdir_init_filters_list(priv);
-               if (err)
-                       goto port_error;
-#ifndef NDEBUG
-               {
-                       char ifname[IF_NAMESIZE];
-
-                       if (priv_get_ifname(priv, &ifname) == 0)
-                               DEBUG("port %u ifname is \"%s\"",
-                                     priv->port, ifname);
-                       else
-                               DEBUG("port %u ifname is unknown", priv->port);
-               }
-#endif
-               /* Get actual MTU if possible. */
-               priv_get_mtu(priv, &priv->mtu);
-               DEBUG("port %u MTU is %u", priv->port, priv->mtu);
-
-               /* from rte_ethdev.c */
-               {
-                       char name[RTE_ETH_NAME_MAX_LEN];
-
-                       snprintf(name, sizeof(name), "%s port %u",
-                                ibv_get_device_name(ibv_dev), port);
-                       eth_dev = rte_eth_dev_allocate(name);
-               }
-               if (eth_dev == NULL) {
-                       ERROR("can not allocate rte ethdev");
-                       err = ENOMEM;
-                       goto port_error;
-               }
-
-               /* Secondary processes have to use local storage for their
-                * private data as well as a copy of eth_dev->data, but this
-                * pointer must not be modified before burst functions are
-                * actually called. */
-               if (mlx5_is_secondary()) {
-                       struct mlx5_secondary_data *sd =
-                               &mlx5_secondary_data[eth_dev->data->port_id];
-                       sd->primary_priv = eth_dev->data->dev_private;
-                       if (sd->primary_priv == NULL) {
-                               ERROR("no private data for port %u",
-                                               eth_dev->data->port_id);
-                               err = EINVAL;
-                               goto port_error;
-                       }
-                       sd->shared_dev_data = eth_dev->data;
-                       rte_spinlock_init(&sd->lock);
-                       memcpy(sd->data.name, sd->shared_dev_data->name,
-                                  sizeof(sd->data.name));
-                       sd->data.dev_private = priv;
-                       sd->data.rx_mbuf_alloc_failed = 0;
-                       sd->data.mtu = ETHER_MTU;
-                       sd->data.port_id = sd->shared_dev_data->port_id;
-                       sd->data.mac_addrs = priv->mac;
-                       eth_dev->tx_pkt_burst = mlx5_tx_burst_secondary_setup;
-                       eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup;
-               } else {
-                       eth_dev->data->dev_private = priv;
-                       eth_dev->data->rx_mbuf_alloc_failed = 0;
-                       eth_dev->data->mtu = ETHER_MTU;
-                       eth_dev->data->mac_addrs = priv->mac;
-               }
-
-               eth_dev->pci_dev = pci_dev;
-               rte_eth_copy_pci_info(eth_dev, pci_dev);
-               eth_dev->driver = &mlx5_driver;
-               priv->dev = eth_dev;
-               eth_dev->dev_ops = &mlx5_dev_ops;
-
-               TAILQ_INIT(&eth_dev->link_intr_cbs);
-
-               /* Bring Ethernet device up. */
-               DEBUG("forcing Ethernet interface up");
-               priv_set_flags(priv, ~IFF_UP, IFF_UP);
-               mlx5_link_update_unlocked(priv->dev, 1);
-               continue;
-
-port_error:
-               if (priv) {
-                       rte_free(priv->rss_conf);
-                       rte_free(priv);
-               }
-               if (pd)
-                       claim_zero(ibv_dealloc_pd(pd));
-               if (ctx)
-                       claim_zero(ibv_close_device(ctx));
-               break;
-       }
-
-       /*
-        * XXX if something went wrong in the loop above, there is a resource
-        * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as
-        * long as the dpdk does not provide a way to deallocate a ethdev and a
-        * way to enumerate the registered ethdevs to free the previous ones.
-        */
-
-       /* no port found, complain */
-       if (!mlx5_dev[idx].ports) {
-               err = ENODEV;
-               goto error;
-       }
-
-error:
-       if (attr_ctx)
-               claim_zero(ibv_close_device(attr_ctx));
-       if (list)
-               ibv_free_device_list(list);
-       assert(err >= 0);
-       return -err;
-}
-
-static const struct rte_pci_id mlx5_pci_id_map[] = {
        {
                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
-                              PCI_DEVICE_ID_MELLANOX_CONNECTX4)
+                              PCI_DEVICE_ID_MELLANOX_CONNECTX5)
        },
        {
                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
-                              PCI_DEVICE_ID_MELLANOX_CONNECTX4VF)
+                              PCI_DEVICE_ID_MELLANOX_CONNECTX5VF)
        },
        {
                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
-                              PCI_DEVICE_ID_MELLANOX_CONNECTX4LX)
+                              PCI_DEVICE_ID_MELLANOX_CONNECTX5EX)
        },
        {
                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
-                              PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)
+                              PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF)
        },
        {
                .vendor_id = 0
@@ -1292,9 +787,10 @@ rte_mlx5_pmd_init(void)
         * using this PMD, which is not supported in forked processes.
         */
        setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
+       ibv_fork_init();
        rte_eal_pci_register(&mlx5_driver.pci_drv);
 }
 
 RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__);
 RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map);
-#endif
+RTE_PMD_REGISTER_KMOD_DEP(net_mlx5, "* ib_uverbs & mlx5_core & mlx5_ib");
index 83b29e1..879da5e 100644 (file)
@@ -59,6 +59,7 @@
 #include <rte_spinlock.h>
 #include <rte_interrupts.h>
 #include <rte_errno.h>
+#include <rte_flow.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
@@ -82,36 +83,20 @@ enum {
        PCI_DEVICE_ID_MELLANOX_CONNECTX4VF = 0x1014,
        PCI_DEVICE_ID_MELLANOX_CONNECTX4LX = 0x1015,
        PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF = 0x1016,
+       PCI_DEVICE_ID_MELLANOX_CONNECTX5 = 0x1017,
+       PCI_DEVICE_ID_MELLANOX_CONNECTX5VF = 0x1018,
+       PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019,
+       PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a,
 };
 
-struct mlx5_stats_priv {
-
-    struct rte_eth_stats m_shadow;
-    uint32_t      n_stats; /* number of counters */
-
-    void    *  et_stats  ;/* point to ethtool counter struct ethtool_stats*/
-
-    /* index into ethtool */
-    uint16_t inx_rx_vport_unicast_bytes;
-    uint16_t inx_rx_vport_multicast_bytes;
-    uint16_t inx_rx_vport_broadcast_bytes;
-    uint16_t inx_rx_vport_unicast_packets;
-    uint16_t inx_rx_vport_multicast_packets;
-    uint16_t inx_rx_vport_broadcast_packets;
-    uint16_t inx_tx_vport_unicast_bytes;
-    uint16_t inx_tx_vport_multicast_bytes;
-    uint16_t inx_tx_vport_broadcast_bytes;
-    uint16_t inx_tx_vport_unicast_packets;
-    uint16_t inx_tx_vport_multicast_packets;
-    uint16_t inx_tx_vport_broadcast_packets;
-    uint16_t inx_rx_wqe_err;
-    uint16_t inx_rx_crc_errors_phy;
-    uint16_t inx_rx_in_range_len_errors_phy;
-    uint16_t inx_rx_symbol_err_phy;
-    uint16_t inx_tx_errors_phy;
+struct mlx5_xstats_ctrl {
+       /* Number of device stats. */
+       uint16_t stats_n;
+       /* Index in the device counters table. */
+       uint16_t dev_table_idx[MLX5_MAX_XSTATS];
+       uint64_t base[MLX5_MAX_XSTATS];
 };
 
-
 struct priv {
        struct rte_eth_dev *dev; /* Ethernet device. */
        struct ibv_context *ctx; /* Verbs context. */
@@ -163,9 +148,10 @@ struct priv {
        unsigned int reta_idx_n; /* RETA index size. */
        struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */
        struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
+       LIST_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
        uint32_t link_speed_capa; /* Link speed capabilities. */
+       struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
        rte_spinlock_t lock; /* Lock for control functions. */
-    struct mlx5_stats_priv m_stats;
 };
 
 /* Local storage for secondary process data. */
@@ -217,7 +203,6 @@ int priv_set_flags(struct priv *, unsigned int, unsigned int);
 int mlx5_dev_configure(struct rte_eth_dev *);
 void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *);
 const uint32_t *mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev);
-int mlx5_link_update_unlocked(struct rte_eth_dev *, int);
 int mlx5_link_update(struct rte_eth_dev *, int);
 int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t);
 int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *);
@@ -273,9 +258,14 @@ void mlx5_allmulticast_disable(struct rte_eth_dev *);
 
 /* mlx5_stats.c */
 
+void priv_xstats_init(struct priv *);
 void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *);
 void mlx5_stats_reset(struct rte_eth_dev *);
-void mlx5_stats_free(struct rte_eth_dev *dev);
+int mlx5_xstats_get(struct rte_eth_dev *,
+                   struct rte_eth_xstat *, unsigned int);
+void mlx5_xstats_reset(struct rte_eth_dev *);
+int mlx5_xstats_get_names(struct rte_eth_dev *,
+                         struct rte_eth_xstat_name *, unsigned int);
 
 /* mlx5_vlan.c */
 
@@ -298,4 +288,21 @@ void priv_fdir_enable(struct priv *);
 int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type,
                         enum rte_filter_op, void *);
 
+/* mlx5_flow.c */
+
+int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *,
+                      const struct rte_flow_item [],
+                      const struct rte_flow_action [],
+                      struct rte_flow_error *);
+struct rte_flow *mlx5_flow_create(struct rte_eth_dev *,
+                                 const struct rte_flow_attr *,
+                                 const struct rte_flow_item [],
+                                 const struct rte_flow_action [],
+                                 struct rte_flow_error *);
+int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
+                     struct rte_flow_error *);
+int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
+int priv_flow_start(struct priv *);
+void priv_flow_stop(struct priv *);
+
 #endif /* RTE_PMD_MLX5_H_ */
index 30adfeb..e91d245 100644 (file)
@@ -54,9 +54,6 @@
  */
 #define MLX5_TX_COMP_THRESH 32
 
-/* RSS Indirection table size. */
-#define RSS_INDIRECTION_TABLE_SIZE 256
-
 /*
  * Maximum number of cached Memory Pools (MPs) per TX queue. Each RTE MP
  * from which buffers are to be transmitted will have to be mapped by this
 /* Alarm timeout. */
 #define MLX5_ALARM_TIMEOUT_US 100000
 
-
-//#ifdef TREX_PATCH_DPDK PATH for DPDK16.11 should be removed 
-
-/**
- * Mask of bits used to determine the status of RX IP checksum.
- * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet
- *   data, but the integrity of the IP header is verified.
- */
-#define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7))
-
-#define PKT_RX_IP_CKSUM_UNKNOWN 0
-#define PKT_RX_IP_CKSUM_BAD     (1ULL << 4)
-#define PKT_RX_IP_CKSUM_GOOD    (1ULL << 7)
-#define PKT_RX_IP_CKSUM_NONE    ((1ULL << 4) | (1ULL << 7))
-
-/**
- * Mask of bits used to determine the status of RX L4 checksum.
- * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
- *   data, but the integrity of the L4 data is verified.
- */
-#define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8))
-
-#define PKT_RX_L4_CKSUM_UNKNOWN 0
-#define PKT_RX_L4_CKSUM_BAD     (1ULL << 3)
-#define PKT_RX_L4_CKSUM_GOOD    (1ULL << 8)
-#define PKT_RX_L4_CKSUM_NONE    ((1ULL << 3) | (1ULL << 8))
-
-
-//#endif
-
+/* Maximum number of extended statistics counters. */
+#define MLX5_MAX_XSTATS 32
 
 #endif /* RTE_PMD_MLX5_DEFS_H_ */
index 85b8136..2145965 100644 (file)
 #include <net/if.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
+#include <sys/utsname.h>
 #include <netinet/in.h>
 #include <linux/ethtool.h>
 #include <linux/sockios.h>
+#include <linux/version.h>
 #include <fcntl.h>
 
 /* DPDK headers don't like -pedantic. */
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 
+/* Add defines in case the running kernel is not the same as user headers. */
+#ifndef ETHTOOL_GLINKSETTINGS
+struct ethtool_link_settings {
+       uint32_t cmd;
+       uint32_t speed;
+       uint8_t duplex;
+       uint8_t port;
+       uint8_t phy_address;
+       uint8_t autoneg;
+       uint8_t mdio_support;
+       uint8_t eth_to_mdix;
+       uint8_t eth_tp_mdix_ctrl;
+       int8_t link_mode_masks_nwords;
+       uint32_t reserved[8];
+       uint32_t link_mode_masks[];
+};
+
+#define ETHTOOL_GLINKSETTINGS 0x0000004c
+#define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5
+#define ETHTOOL_LINK_MODE_Autoneg_BIT 6
+#define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17
+#define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18
+#define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19
+#define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20
+#define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21
+#define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22
+#define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23
+#define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24
+#define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25
+#define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26
+#define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27
+#define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28
+#define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29
+#define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30
+#endif
+#ifndef HAVE_ETHTOOL_LINK_MODE_25G
+#define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31
+#define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32
+#define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33
+#endif
+#ifndef HAVE_ETHTOOL_LINK_MODE_50G
+#define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34
+#define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35
+#endif
+#ifndef HAVE_ETHTOOL_LINK_MODE_100G
+#define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36
+#define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37
+#define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38
+#define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39
+#endif
+
 /**
  * Return private structure associated with an Ethernet device.
  *
@@ -562,6 +615,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
        unsigned int max;
        char ifname[IF_NAMESIZE];
 
+       info->pci_dev = RTE_DEV_TO_PCI(dev->device);
+
        priv_lock(priv);
        /* FIXME: we should ask the device for these values. */
        info->min_rx_bufsize = 32;
@@ -626,7 +681,7 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 }
 
 /**
- * Retrieve physical link information (unlocked version using legacy ioctl).
+ * DPDK callback to retrieve physical link information.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
@@ -644,6 +699,8 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
        struct rte_eth_link dev_link;
        int link_speed = 0;
 
+       /* priv_lock() is not taken to allow concurrent calls. */
+
        (void)wait_to_complete;
        if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
                WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
@@ -690,8 +747,7 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
 }
 
 /**
- * Retrieve physical link information (unlocked version using new ioctl from
- * Linux 4.5).
+ * Retrieve physical link information (unlocked version using new ioctl).
  *
  * @param dev
  *   Pointer to Ethernet device structure.
@@ -701,7 +757,6 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
 static int
 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
 {
-#ifdef ETHTOOL_GLINKSETTINGS
        struct priv *priv = mlx5_get_priv(dev);
        struct ethtool_link_settings edata = {
                .cmd = ETHTOOL_GLINKSETTINGS,
@@ -728,7 +783,6 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
        sc = edata.link_mode_masks[0] |
                ((uint64_t)edata.link_mode_masks[1] << 32);
        priv->link_speed_capa = 0;
-       /* Link speeds available in kernel v4.5. */
        if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT)
                priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
        if (sc & (ETHTOOL_LINK_MODE_1000baseT_Full_BIT |
@@ -751,25 +805,18 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
                  ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT |
                  ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))
                priv->link_speed_capa |= ETH_LINK_SPEED_56G;
-       /* Link speeds available in kernel v4.6. */
-#ifdef HAVE_ETHTOOL_LINK_MODE_25G
        if (sc & (ETHTOOL_LINK_MODE_25000baseCR_Full_BIT |
                  ETHTOOL_LINK_MODE_25000baseKR_Full_BIT |
                  ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))
                priv->link_speed_capa |= ETH_LINK_SPEED_25G;
-#endif
-#ifdef HAVE_ETHTOOL_LINK_MODE_50G
        if (sc & (ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT |
                  ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))
                priv->link_speed_capa |= ETH_LINK_SPEED_50G;
-#endif
-#ifdef HAVE_ETHTOOL_LINK_MODE_100G
        if (sc & (ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT |
                  ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT |
                  ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT |
                  ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))
                priv->link_speed_capa |= ETH_LINK_SPEED_100G;
-#endif
        dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
                                ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
        dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
@@ -779,33 +826,10 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
                dev->data->dev_link = dev_link;
                return 0;
        }
-#else
-       (void)dev;
-       (void)wait_to_complete;
-#endif
        /* Link status is still the same. */
        return -1;
 }
 
-/**
- * DPDK callback to retrieve physical link information (unlocked version).
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param wait_to_complete
- *   Wait for request completion (ignored).
- */
-int
-mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
-{
-       int ret;
-
-       ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete);
-       if (ret < 0)
-               ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete);
-       return ret;
-}
-
 /**
  * DPDK callback to retrieve physical link information.
  *
@@ -817,13 +841,15 @@ mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
 int
 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
 {
-       struct priv *priv = mlx5_get_priv(dev);
-       int ret;
-
-       priv_lock(priv);
-       ret = mlx5_link_update_unlocked(dev, wait_to_complete);
-       priv_unlock(priv);
-       return ret;
+       struct utsname utsname;
+       int ver[3];
+
+       if (uname(&utsname) == -1 ||
+           sscanf(utsname.release, "%d.%d.%d",
+                  &ver[0], &ver[1], &ver[2]) != 3 ||
+           KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0))
+               return mlx5_link_update_unlocked_gset(dev, wait_to_complete);
+       return mlx5_link_update_unlocked_gs(dev, wait_to_complete);
 }
 
 /**
@@ -1141,7 +1167,7 @@ static int
 priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev)
 {
        struct ibv_async_event event;
-       int port_change = 0;
+       struct rte_eth_link *link = &dev->data->dev_link;
        int ret = 0;
 
        /* Read all message and acknowledge them. */
@@ -1149,29 +1175,24 @@ priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev)
                if (ibv_get_async_event(priv->ctx, &event))
                        break;
 
-               if (event.event_type == IBV_EVENT_PORT_ACTIVE ||
-                   event.event_type == IBV_EVENT_PORT_ERR)
-                       port_change = 1;
-               else
+               if (event.event_type != IBV_EVENT_PORT_ACTIVE &&
+                   event.event_type != IBV_EVENT_PORT_ERR)
                        DEBUG("event type %d on port %d not handled",
                              event.event_type, event.element.port_num);
                ibv_ack_async_event(&event);
        }
-
-       if (port_change ^ priv->pending_alarm) {
-               struct rte_eth_link *link = &dev->data->dev_link;
-
-               priv->pending_alarm = 0;
-               mlx5_link_update_unlocked(dev, 0);
-               if (((link->link_speed == 0) && link->link_status) ||
-                   ((link->link_speed != 0) && !link->link_status)) {
+       mlx5_link_update(dev, 0);
+       if (((link->link_speed == 0) && link->link_status) ||
+           ((link->link_speed != 0) && !link->link_status)) {
+               if (!priv->pending_alarm) {
                        /* Inconsistent status, check again later. */
                        priv->pending_alarm = 1;
                        rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
                                          mlx5_dev_link_status_handler,
                                          dev);
-               } else
-                       ret = 1;
+               }
+       } else {
+               ret = 1;
        }
        return ret;
 }
@@ -1191,10 +1212,11 @@ mlx5_dev_link_status_handler(void *arg)
 
        priv_lock(priv);
        assert(priv->pending_alarm == 1);
+       priv->pending_alarm = 0;
        ret = priv_dev_link_status_handler(priv, dev);
        priv_unlock(priv);
-       //if (ret)
-       //      _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
+       if (ret)
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
 }
 
 /**
@@ -1216,8 +1238,8 @@ mlx5_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg)
        priv_lock(priv);
        ret = priv_dev_link_status_handler(priv, dev);
        priv_unlock(priv);
-       //if (ret)
-       //      _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
+       if (ret)
+               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
 }
 
 /**
@@ -1515,14 +1537,11 @@ void
 priv_select_tx_function(struct priv *priv)
 {
        priv->dev->tx_pkt_burst = mlx5_tx_burst;
-       /* Display warning for unsupported configurations. */
-       if (priv->sriov && priv->mps)
-               WARN("multi-packet send WQE cannot be used on a SR-IOV setup");
        /* Select appropriate TX function. */
-       if ((priv->sriov == 0) && priv->mps && priv->txq_inline) {
+       if (priv->mps && priv->txq_inline) {
                priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
                DEBUG("selected MPW inline TX function");
-       } else if ((priv->sriov == 0) && priv->mps) {
+       } else if (priv->mps) {
                priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw;
                DEBUG("selected MPW TX function");
        }
index f03e95e..f80c58b 100644 (file)
 #include <string.h>
 #include <errno.h>
 
-#define TREX_PATCH
-
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
 #pragma GCC diagnostic ignored "-Wpedantic"
 #endif
-#include <infiniband/verbs_exp.h>
+#include <infiniband/verbs.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
@@ -57,6 +55,8 @@
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
 #include <rte_common.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
@@ -69,9 +69,6 @@ struct fdir_flow_desc {
        uint16_t src_port;
        uint32_t src_ip[4];
        uint32_t dst_ip[4];
-    uint8_t  tos;
-    uint8_t  ip_id;
-    uint8_t  proto;
        uint8_t mac[6];
        uint16_t vlan_tag;
        enum hash_rxq_type type;
@@ -107,7 +104,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
        /* Set VLAN ID. */
        desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci;
 
-#ifndef TREX_PATCH
        /* Set MAC address. */
        if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
                rte_memcpy(desc->mac,
@@ -117,14 +113,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
                desc->type = HASH_RXQ_ETH;
                return;
        }
-#else
-    if (fdir_filter->input.flow.ip4_flow.ip_id == 2) {
-        desc->type = HASH_RXQ_ETH;
-        desc->ip_id = fdir_filter->input.flow.ip4_flow.ip_id;
-        return;
-    }
-#endif
-
 
        /* Set mode */
        switch (fdir_filter->input.flow_type) {
@@ -159,9 +147,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
        case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
                desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip;
                desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip;
-        desc->tos       = fdir_filter->input.flow.ip4_flow.ttl; /* TTL is mapped to TOS TREX_PATCH */
-        desc->ip_id     = fdir_filter->input.flow.ip4_flow.ip_id;
-        desc->proto     = fdir_filter->input.flow.ip4_flow.proto;
                break;
        case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
        case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
@@ -175,9 +160,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
                rte_memcpy(desc->dst_ip,
                           fdir_filter->input.flow.ipv6_flow.dst_ip,
                           sizeof(desc->dst_ip));
-        desc->tos       = (uint8_t)fdir_filter->input.flow.ipv6_flow.hop_limits;  /* TTL is mapped to TOS - TREX_PATCH */
-        desc->ip_id     = (uint8_t)fdir_filter->input.flow.ipv6_flow.flow_label;
-        desc->proto     = fdir_filter->input.flow.ipv6_flow.proto;
                break;
        default:
                break;
@@ -218,11 +200,6 @@ priv_fdir_overlap(const struct priv *priv,
            ((desc1->dst_port & mask->dst_port_mask) !=
             (desc2->dst_port & mask->dst_port_mask)))
                return 0;
-    if  ( (desc1->tos    != desc2->tos)  ||
-          (desc1->ip_id  != desc2->ip_id) ||
-          (desc1->proto  != desc2->proto) ) 
-        return 0;
-
        switch (desc1->type) {
        case HASH_RXQ_IPV4:
        case HASH_RXQ_UDPV4:
@@ -277,8 +254,8 @@ priv_fdir_flow_add(struct priv *priv,
        struct ibv_exp_flow_attr *attr = &data->attr;
        uintptr_t spec_offset = (uintptr_t)&data->spec;
        struct ibv_exp_flow_spec_eth *spec_eth;
-       struct ibv_exp_flow_spec_ipv4_ext *spec_ipv4;
-       struct ibv_exp_flow_spec_ipv6_ext *spec_ipv6;
+       struct ibv_exp_flow_spec_ipv4 *spec_ipv4;
+       struct ibv_exp_flow_spec_ipv6 *spec_ipv6;
        struct ibv_exp_flow_spec_tcp_udp *spec_tcp_udp;
        struct mlx5_fdir_filter *iter_fdir_filter;
        unsigned int i;
@@ -290,10 +267,8 @@ priv_fdir_flow_add(struct priv *priv,
                    (iter_fdir_filter->flow != NULL) &&
                    (priv_fdir_overlap(priv,
                                       &mlx5_fdir_filter->desc,
-                                      &iter_fdir_filter->desc))){
-            ERROR("overlap rules, please check your rules");
-            return EEXIST;
-        }
+                                      &iter_fdir_filter->desc)))
+                       return EEXIST;
 
        /*
         * No padding must be inserted by the compiler between attr and spec.
@@ -316,7 +291,6 @@ priv_fdir_flow_add(struct priv *priv,
        /* Update priority */
        attr->priority = 2;
 
-#ifndef TREX_PATCH
        if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
                /* MAC Address */
                for (i = 0; i != RTE_DIM(spec_eth->mask.dst_mac); ++i) {
@@ -326,14 +300,6 @@ priv_fdir_flow_add(struct priv *priv,
                }
                goto create_flow;
        }
-#else
-    // empty mask means "match everything". This rule will match all packets, no matter what is the ether type
-    if (desc->ip_id == 2) {
-        spec_eth->val.ether_type = 0x0806;
-        spec_eth->mask.ether_type = 0x0000;
-        goto create_flow;
-    }
-#endif    
 
        switch (desc->type) {
        case HASH_RXQ_IPV4:
@@ -342,10 +308,10 @@ priv_fdir_flow_add(struct priv *priv,
                spec_offset += spec_eth->size;
 
                /* Set IP spec */
-               spec_ipv4 = (struct ibv_exp_flow_spec_ipv4_ext *)spec_offset;
+               spec_ipv4 = (struct ibv_exp_flow_spec_ipv4 *)spec_offset;
 
                /* The second specification must be IP. */
-               assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4_EXT);
+               assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4);
                assert(spec_ipv4->size == sizeof(*spec_ipv4));
 
                spec_ipv4->val.src_ip =
@@ -355,21 +321,6 @@ priv_fdir_flow_add(struct priv *priv,
                spec_ipv4->mask.src_ip = mask->ipv4_mask.src_ip;
                spec_ipv4->mask.dst_ip = mask->ipv4_mask.dst_ip;
 
-        /* PROTO */
-        spec_ipv4->val.proto  = desc->proto & mask->ipv4_mask.proto;
-        spec_ipv4->mask.proto = mask->ipv4_mask.proto;
-
-#ifdef TREX_PATCH
-        /* TOS */
-        if (desc->ip_id == 1) {
-            spec_ipv4->mask.tos = 0x1;
-            spec_ipv4->val.tos = 0x1;
-        } else {
-            spec_ipv4->mask.tos = 0x0;
-            spec_ipv4->val.tos = 0x0;
-        }
-#endif
-
                /* Update priority */
                attr->priority = 1;
 
@@ -384,10 +335,10 @@ priv_fdir_flow_add(struct priv *priv,
                spec_offset += spec_eth->size;
 
                /* Set IP spec */
-               spec_ipv6 = (struct ibv_exp_flow_spec_ipv6_ext *)spec_offset;
+               spec_ipv6 = (struct ibv_exp_flow_spec_ipv6 *)spec_offset;
 
                /* The second specification must be IP. */
-               assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6_EXT);
+               assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6);
                assert(spec_ipv6->size == sizeof(*spec_ipv6));
 
                for (i = 0; i != RTE_DIM(desc->src_ip); ++i) {
@@ -403,20 +354,6 @@ priv_fdir_flow_add(struct priv *priv,
                           mask->ipv6_mask.dst_ip,
                           sizeof(spec_ipv6->mask.dst_ip));
 
-        spec_ipv6->val.next_hdr  = desc->proto & mask->ipv6_mask.proto;
-        spec_ipv6->mask.next_hdr = mask->ipv6_mask.proto;
-
-#ifdef TREX_PATCH
-        /* TOS */
-        if (desc->ip_id == 1) {
-            spec_ipv6->mask.traffic_class = 0x1;
-            spec_ipv6->val.traffic_class = 0x1;
-        } else {
-            spec_ipv6->mask.traffic_class = 0;
-            spec_ipv6->val.traffic_class = 0;
-        }
-#endif
-
                /* Update priority */
                attr->priority = 1;
 
@@ -894,10 +831,8 @@ priv_fdir_filter_add(struct priv *priv,
        /* Duplicate filters are currently unsupported. */
        mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
        if (mlx5_fdir_filter != NULL) {
-#ifndef TREX_PATCH
                ERROR("filter already exists");
-#endif
-        return EEXIST;
+               return EINVAL;
        }
 
        /* Create new flow director filter. */
@@ -1022,11 +957,9 @@ priv_fdir_filter_delete(struct priv *priv,
                return 0;
        }
 
-#ifndef TREX_PATCH
        ERROR("%p: flow director delete failed, cannot find filter",
              (void *)priv);
-#endif
-        return ENOENT;
+       return EINVAL;
 }
 
 /**
@@ -1111,6 +1044,14 @@ priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
        return ret;
 }
 
+static const struct rte_flow_ops mlx5_flow_ops = {
+       .validate = mlx5_flow_validate,
+       .create = mlx5_flow_create,
+       .destroy = mlx5_flow_destroy,
+       .flush = mlx5_flow_flush,
+       .query = NULL,
+};
+
 /**
  * Manage filter operations.
  *
@@ -1136,6 +1077,11 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
        struct priv *priv = dev->data->dev_private;
 
        switch (filter_type) {
+       case RTE_ETH_FILTER_GENERIC:
+               if (filter_op != RTE_ETH_FILTER_GET)
+                       return -EINVAL;
+               *(const void **)arg = &mlx5_flow_ops;
+               return 0;
        case RTE_ETH_FILTER_FDIR:
                priv_lock(priv);
                ret = priv_fdir_ctrl_func(priv, filter_op, arg);
diff --git a/src/dpdk/drivers/net/mlx5/mlx5_flow.c b/src/dpdk/drivers/net/mlx5/mlx5_flow.c
new file mode 100644 (file)
index 0000000..23c1b5e
--- /dev/null
@@ -0,0 +1,1247 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   Copyright 2016 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+#include <string.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_ethdev.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+
+#include "mlx5.h"
+#include "mlx5_prm.h"
+
+static int
+mlx5_flow_create_eth(const struct rte_flow_item *item,
+                    const void *default_mask,
+                    void *data);
+
+static int
+mlx5_flow_create_vlan(const struct rte_flow_item *item,
+                     const void *default_mask,
+                     void *data);
+
+static int
+mlx5_flow_create_ipv4(const struct rte_flow_item *item,
+                     const void *default_mask,
+                     void *data);
+
+static int
+mlx5_flow_create_ipv6(const struct rte_flow_item *item,
+                     const void *default_mask,
+                     void *data);
+
+static int
+mlx5_flow_create_udp(const struct rte_flow_item *item,
+                    const void *default_mask,
+                    void *data);
+
+static int
+mlx5_flow_create_tcp(const struct rte_flow_item *item,
+                    const void *default_mask,
+                    void *data);
+
+static int
+mlx5_flow_create_vxlan(const struct rte_flow_item *item,
+                      const void *default_mask,
+                      void *data);
+
+struct rte_flow {
+       LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
+       struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+       struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
+       struct ibv_qp *qp; /**< Verbs queue pair. */
+       struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
+       struct ibv_exp_wq *wq; /**< Verbs work queue. */
+       struct ibv_cq *cq; /**< Verbs completion queue. */
+       struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
+       uint32_t mark:1; /**< Set if the flow is marked. */
+};
+
+/** Static initializer for items. */
+#define ITEMS(...) \
+       (const enum rte_flow_item_type []){ \
+               __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
+       }
+
+/** Structure to generate a simple graph of layers supported by the NIC. */
+struct mlx5_flow_items {
+       /** List of possible actions for these items. */
+       const enum rte_flow_action_type *const actions;
+       /** Bit-masks corresponding to the possibilities for the item. */
+       const void *mask;
+       /**
+        * Default bit-masks to use when item->mask is not provided. When
+        * \default_mask is also NULL, the full supported bit-mask (\mask) is
+        * used instead.
+        */
+       const void *default_mask;
+       /** Bit-masks size in bytes. */
+       const unsigned int mask_sz;
+       /**
+        * Conversion function from rte_flow to NIC specific flow.
+        *
+        * @param item
+        *   rte_flow item to convert.
+        * @param default_mask
+        *   Default bit-masks to use when item->mask is not provided.
+        * @param data
+        *   Internal structure to store the conversion.
+        *
+        * @return
+        *   0 on success, negative value otherwise.
+        */
+       int (*convert)(const struct rte_flow_item *item,
+                      const void *default_mask,
+                      void *data);
+       /** Size in bytes of the destination structure. */
+       const unsigned int dst_sz;
+       /** List of possible following items.  */
+       const enum rte_flow_item_type *const items;
+};
+
+/** Valid action for this PMD. */
+static const enum rte_flow_action_type valid_actions[] = {
+       RTE_FLOW_ACTION_TYPE_DROP,
+       RTE_FLOW_ACTION_TYPE_QUEUE,
+       RTE_FLOW_ACTION_TYPE_MARK,
+       RTE_FLOW_ACTION_TYPE_END,
+};
+
+/** Graph of supported items and associated actions. */
+static const struct mlx5_flow_items mlx5_flow_items[] = {
+       [RTE_FLOW_ITEM_TYPE_END] = {
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
+                              RTE_FLOW_ITEM_TYPE_VXLAN),
+       },
+       [RTE_FLOW_ITEM_TYPE_ETH] = {
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
+                              RTE_FLOW_ITEM_TYPE_IPV4,
+                              RTE_FLOW_ITEM_TYPE_IPV6),
+               .actions = valid_actions,
+               .mask = &(const struct rte_flow_item_eth){
+                       .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+                       .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+               },
+               .default_mask = &rte_flow_item_eth_mask,
+               .mask_sz = sizeof(struct rte_flow_item_eth),
+               .convert = mlx5_flow_create_eth,
+               .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
+       },
+       [RTE_FLOW_ITEM_TYPE_VLAN] = {
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
+                              RTE_FLOW_ITEM_TYPE_IPV6),
+               .actions = valid_actions,
+               .mask = &(const struct rte_flow_item_vlan){
+                       .tci = -1,
+               },
+               .default_mask = &rte_flow_item_vlan_mask,
+               .mask_sz = sizeof(struct rte_flow_item_vlan),
+               .convert = mlx5_flow_create_vlan,
+               .dst_sz = 0,
+       },
+       [RTE_FLOW_ITEM_TYPE_IPV4] = {
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
+                              RTE_FLOW_ITEM_TYPE_TCP),
+               .actions = valid_actions,
+               .mask = &(const struct rte_flow_item_ipv4){
+                       .hdr = {
+                               .src_addr = -1,
+                               .dst_addr = -1,
+                               .type_of_service = -1,
+                               .next_proto_id = -1,
+                       },
+               },
+               .default_mask = &rte_flow_item_ipv4_mask,
+               .mask_sz = sizeof(struct rte_flow_item_ipv4),
+               .convert = mlx5_flow_create_ipv4,
+               .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
+       },
+       [RTE_FLOW_ITEM_TYPE_IPV6] = {
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
+                              RTE_FLOW_ITEM_TYPE_TCP),
+               .actions = valid_actions,
+               .mask = &(const struct rte_flow_item_ipv6){
+                       .hdr = {
+                               .src_addr = {
+                                       0xff, 0xff, 0xff, 0xff,
+                                       0xff, 0xff, 0xff, 0xff,
+                                       0xff, 0xff, 0xff, 0xff,
+                                       0xff, 0xff, 0xff, 0xff,
+                               },
+                               .dst_addr = {
+                                       0xff, 0xff, 0xff, 0xff,
+                                       0xff, 0xff, 0xff, 0xff,
+                                       0xff, 0xff, 0xff, 0xff,
+                                       0xff, 0xff, 0xff, 0xff,
+                               },
+                       },
+               },
+               .default_mask = &rte_flow_item_ipv6_mask,
+               .mask_sz = sizeof(struct rte_flow_item_ipv6),
+               .convert = mlx5_flow_create_ipv6,
+               .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6),
+       },
+       [RTE_FLOW_ITEM_TYPE_UDP] = {
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
+               .actions = valid_actions,
+               .mask = &(const struct rte_flow_item_udp){
+                       .hdr = {
+                               .src_port = -1,
+                               .dst_port = -1,
+                       },
+               },
+               .default_mask = &rte_flow_item_udp_mask,
+               .mask_sz = sizeof(struct rte_flow_item_udp),
+               .convert = mlx5_flow_create_udp,
+               .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
+       },
+       [RTE_FLOW_ITEM_TYPE_TCP] = {
+               .actions = valid_actions,
+               .mask = &(const struct rte_flow_item_tcp){
+                       .hdr = {
+                               .src_port = -1,
+                               .dst_port = -1,
+                       },
+               },
+               .default_mask = &rte_flow_item_tcp_mask,
+               .mask_sz = sizeof(struct rte_flow_item_tcp),
+               .convert = mlx5_flow_create_tcp,
+               .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
+       },
+       [RTE_FLOW_ITEM_TYPE_VXLAN] = {
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
+               .actions = valid_actions,
+               .mask = &(const struct rte_flow_item_vxlan){
+                       .vni = "\xff\xff\xff",
+               },
+               .default_mask = &rte_flow_item_vxlan_mask,
+               .mask_sz = sizeof(struct rte_flow_item_vxlan),
+               .convert = mlx5_flow_create_vxlan,
+               .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
+       },
+};
+
+/** Structure to pass to the conversion function. */
+struct mlx5_flow {
+       struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
+       unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
+       uint32_t inner; /**< Set once VXLAN is encountered. */
+};
+
+struct mlx5_flow_action {
+       uint32_t queue:1; /**< Target is a receive queue. */
+       uint32_t drop:1; /**< Target is a drop queue. */
+       uint32_t mark:1; /**< Mark is present in the flow. */
+       uint32_t queue_id; /**< Identifier of the queue. */
+       uint32_t mark_id; /**< Mark identifier. */
+};
+
+/**
+ * Check support for a given item.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param mask[in]
+ *   Bit-masks covering supported fields to compare with spec, last and mask in
+ *   \item.
+ * @param size
+ *   Bit-Mask size in bytes.
+ *
+ * @return
+ *   0 on success.
+ */
+static int
+mlx5_flow_item_validate(const struct rte_flow_item *item,
+                       const uint8_t *mask, unsigned int size)
+{
+       int ret = 0;
+
+       if (!item->spec && (item->mask || item->last))
+               return -1;
+       if (item->spec && !item->mask) {
+               unsigned int i;
+               const uint8_t *spec = item->spec;
+
+               for (i = 0; i < size; ++i)
+                       if ((spec[i] | mask[i]) != mask[i])
+                               return -1;
+       }
+       if (item->last && !item->mask) {
+               unsigned int i;
+               const uint8_t *spec = item->last;
+
+               for (i = 0; i < size; ++i)
+                       if ((spec[i] | mask[i]) != mask[i])
+                               return -1;
+       }
+       if (item->mask) {
+               unsigned int i;
+               const uint8_t *spec = item->mask;
+
+               for (i = 0; i < size; ++i)
+                       if ((spec[i] | mask[i]) != mask[i])
+                               return -1;
+       }
+       if (item->spec && item->last) {
+               uint8_t spec[size];
+               uint8_t last[size];
+               const uint8_t *apply = mask;
+               unsigned int i;
+
+               if (item->mask)
+                       apply = item->mask;
+               for (i = 0; i < size; ++i) {
+                       spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
+                       last[i] = ((const uint8_t *)item->last)[i] & apply[i];
+               }
+               ret = memcmp(spec, last, size);
+       }
+       return ret;
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] flow
+ *   Flow structure to update.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_validate(struct priv *priv,
+                  const struct rte_flow_attr *attr,
+                  const struct rte_flow_item items[],
+                  const struct rte_flow_action actions[],
+                  struct rte_flow_error *error,
+                  struct mlx5_flow *flow)
+{
+       const struct mlx5_flow_items *cur_item = mlx5_flow_items;
+       struct mlx5_flow_action action = {
+               .queue = 0,
+               .drop = 0,
+               .mark = 0,
+       };
+
+       (void)priv;
+       if (attr->group) {
+               rte_flow_error_set(error, ENOTSUP,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+                                  NULL,
+                                  "groups are not supported");
+               return -rte_errno;
+       }
+       if (attr->priority) {
+               rte_flow_error_set(error, ENOTSUP,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+                                  NULL,
+                                  "priorities are not supported");
+               return -rte_errno;
+       }
+       if (attr->egress) {
+               rte_flow_error_set(error, ENOTSUP,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+                                  NULL,
+                                  "egress is not supported");
+               return -rte_errno;
+       }
+       if (!attr->ingress) {
+               rte_flow_error_set(error, ENOTSUP,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                                  NULL,
+                                  "only ingress is supported");
+               return -rte_errno;
+       }
+       for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+               const struct mlx5_flow_items *token = NULL;
+               unsigned int i;
+               int err;
+
+               if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+                       continue;
+               for (i = 0;
+                    cur_item->items &&
+                    cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
+                    ++i) {
+                       if (cur_item->items[i] == items->type) {
+                               token = &mlx5_flow_items[items->type];
+                               break;
+                       }
+               }
+               if (!token)
+                       goto exit_item_not_supported;
+               cur_item = token;
+               err = mlx5_flow_item_validate(items,
+                                             (const uint8_t *)cur_item->mask,
+                                             cur_item->mask_sz);
+               if (err)
+                       goto exit_item_not_supported;
+               if (flow->ibv_attr && cur_item->convert) {
+                       err = cur_item->convert(items,
+                                               (cur_item->default_mask ?
+                                                cur_item->default_mask :
+                                                cur_item->mask),
+                                               flow);
+                       if (err)
+                               goto exit_item_not_supported;
+               }
+               flow->offset += cur_item->dst_sz;
+       }
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+               if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+                       continue;
+               } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+                       action.drop = 1;
+               } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+                       const struct rte_flow_action_queue *queue =
+                               (const struct rte_flow_action_queue *)
+                               actions->conf;
+
+                       if (!queue || (queue->index > (priv->rxqs_n - 1)))
+                               goto exit_action_not_supported;
+                       action.queue = 1;
+               } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
+                       const struct rte_flow_action_mark *mark =
+                               (const struct rte_flow_action_mark *)
+                               actions->conf;
+
+                       if (!mark) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ACTION,
+                                                  actions,
+                                                  "mark must be defined");
+                               return -rte_errno;
+                       } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
+                               rte_flow_error_set(error, ENOTSUP,
+                                                  RTE_FLOW_ERROR_TYPE_ACTION,
+                                                  actions,
+                                                  "mark must be between 0"
+                                                  " and 16777199");
+                               return -rte_errno;
+                       }
+                       action.mark = 1;
+               } else {
+                       goto exit_action_not_supported;
+               }
+       }
+       if (action.mark && !flow->ibv_attr && !action.drop)
+               flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
+       if (!action.queue && !action.drop) {
+               rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
+                                  NULL, "no valid action");
+               return -rte_errno;
+       }
+       return 0;
+exit_item_not_supported:
+       rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+                          items, "item not supported");
+       return -rte_errno;
+exit_action_not_supported:
+       rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+                          actions, "action not supported");
+       return -rte_errno;
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_validate(struct rte_eth_dev *dev,
+                  const struct rte_flow_attr *attr,
+                  const struct rte_flow_item items[],
+                  const struct rte_flow_action actions[],
+                  struct rte_flow_error *error)
+{
+       struct priv *priv = dev->data->dev_private;
+       int ret;
+       struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
+
+       priv_lock(priv);
+       ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
+       priv_unlock(priv);
+       return ret;
+}
+
+/**
+ * Convert Ethernet item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_eth(const struct rte_flow_item *item,
+                    const void *default_mask,
+                    void *data)
+{
+       const struct rte_flow_item_eth *spec = item->spec;
+       const struct rte_flow_item_eth *mask = item->mask;
+       struct mlx5_flow *flow = (struct mlx5_flow *)data;
+       struct ibv_exp_flow_spec_eth *eth;
+       const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
+       unsigned int i;
+
+       ++flow->ibv_attr->num_of_specs;
+       flow->ibv_attr->priority = 2;
+       eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+       *eth = (struct ibv_exp_flow_spec_eth) {
+               .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
+               .size = eth_size,
+       };
+       if (!spec)
+               return 0;
+       if (!mask)
+               mask = default_mask;
+       memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+       memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+       memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+       memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+       /* Remove unwanted bits from values. */
+       for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+               eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
+               eth->val.src_mac[i] &= eth->mask.src_mac[i];
+       }
+       return 0;
+}
+
+/**
+ * Convert VLAN item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_vlan(const struct rte_flow_item *item,
+                     const void *default_mask,
+                     void *data)
+{
+       const struct rte_flow_item_vlan *spec = item->spec;
+       const struct rte_flow_item_vlan *mask = item->mask;
+       struct mlx5_flow *flow = (struct mlx5_flow *)data;
+       struct ibv_exp_flow_spec_eth *eth;
+       const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
+
+       eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
+       if (!spec)
+               return 0;
+       if (!mask)
+               mask = default_mask;
+       eth->val.vlan_tag = spec->tci;
+       eth->mask.vlan_tag = mask->tci;
+       eth->val.vlan_tag &= eth->mask.vlan_tag;
+       return 0;
+}
+
+/**
+ * Convert IPv4 item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_ipv4(const struct rte_flow_item *item,
+                     const void *default_mask,
+                     void *data)
+{
+       const struct rte_flow_item_ipv4 *spec = item->spec;
+       const struct rte_flow_item_ipv4 *mask = item->mask;
+       struct mlx5_flow *flow = (struct mlx5_flow *)data;
+       struct ibv_exp_flow_spec_ipv4_ext *ipv4;
+       unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
+
+       ++flow->ibv_attr->num_of_specs;
+       flow->ibv_attr->priority = 1;
+       ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+       *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
+               .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
+               .size = ipv4_size,
+       };
+       if (!spec)
+               return 0;
+       if (!mask)
+               mask = default_mask;
+       ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
+               .src_ip = spec->hdr.src_addr,
+               .dst_ip = spec->hdr.dst_addr,
+               .proto = spec->hdr.next_proto_id,
+               .tos = spec->hdr.type_of_service,
+       };
+       ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
+               .src_ip = mask->hdr.src_addr,
+               .dst_ip = mask->hdr.dst_addr,
+               .proto = mask->hdr.next_proto_id,
+               .tos = mask->hdr.type_of_service,
+       };
+       /* Remove unwanted bits from values. */
+       ipv4->val.src_ip &= ipv4->mask.src_ip;
+       ipv4->val.dst_ip &= ipv4->mask.dst_ip;
+       ipv4->val.proto &= ipv4->mask.proto;
+       ipv4->val.tos &= ipv4->mask.tos;
+       return 0;
+}
+
+/**
+ * Convert IPv6 item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_ipv6(const struct rte_flow_item *item,
+                     const void *default_mask,
+                     void *data)
+{
+       const struct rte_flow_item_ipv6 *spec = item->spec;
+       const struct rte_flow_item_ipv6 *mask = item->mask;
+       struct mlx5_flow *flow = (struct mlx5_flow *)data;
+       struct ibv_exp_flow_spec_ipv6 *ipv6;
+       unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6);
+       unsigned int i;
+
+       ++flow->ibv_attr->num_of_specs;
+       flow->ibv_attr->priority = 1;
+       ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+       *ipv6 = (struct ibv_exp_flow_spec_ipv6) {
+               .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6,
+               .size = ipv6_size,
+       };
+       if (!spec)
+               return 0;
+       if (!mask)
+               mask = default_mask;
+       memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
+              RTE_DIM(ipv6->val.src_ip));
+       memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
+              RTE_DIM(ipv6->val.dst_ip));
+       memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
+              RTE_DIM(ipv6->mask.src_ip));
+       memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
+              RTE_DIM(ipv6->mask.dst_ip));
+       /* Remove unwanted bits from values. */
+       for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
+               ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
+               ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
+       }
+       return 0;
+}
+
+/**
+ * Convert UDP item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_udp(const struct rte_flow_item *item,
+                    const void *default_mask,
+                    void *data)
+{
+       const struct rte_flow_item_udp *spec = item->spec;
+       const struct rte_flow_item_udp *mask = item->mask;
+       struct mlx5_flow *flow = (struct mlx5_flow *)data;
+       struct ibv_exp_flow_spec_tcp_udp *udp;
+       unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
+
+       ++flow->ibv_attr->num_of_specs;
+       flow->ibv_attr->priority = 0;
+       udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+       *udp = (struct ibv_exp_flow_spec_tcp_udp) {
+               .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
+               .size = udp_size,
+       };
+       if (!spec)
+               return 0;
+       if (!mask)
+               mask = default_mask;
+       udp->val.dst_port = spec->hdr.dst_port;
+       udp->val.src_port = spec->hdr.src_port;
+       udp->mask.dst_port = mask->hdr.dst_port;
+       udp->mask.src_port = mask->hdr.src_port;
+       /* Remove unwanted bits from values. */
+       udp->val.src_port &= udp->mask.src_port;
+       udp->val.dst_port &= udp->mask.dst_port;
+       return 0;
+}
+
+/**
+ * Convert TCP item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_tcp(const struct rte_flow_item *item,
+                    const void *default_mask,
+                    void *data)
+{
+       const struct rte_flow_item_tcp *spec = item->spec;
+       const struct rte_flow_item_tcp *mask = item->mask;
+       struct mlx5_flow *flow = (struct mlx5_flow *)data;
+       struct ibv_exp_flow_spec_tcp_udp *tcp;
+       unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
+
+       ++flow->ibv_attr->num_of_specs;
+       flow->ibv_attr->priority = 0;
+       tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+       *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
+               .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
+               .size = tcp_size,
+       };
+       if (!spec)
+               return 0;
+       if (!mask)
+               mask = default_mask;
+       tcp->val.dst_port = spec->hdr.dst_port;
+       tcp->val.src_port = spec->hdr.src_port;
+       tcp->mask.dst_port = mask->hdr.dst_port;
+       tcp->mask.src_port = mask->hdr.src_port;
+       /* Remove unwanted bits from values. */
+       tcp->val.src_port &= tcp->mask.src_port;
+       tcp->val.dst_port &= tcp->mask.dst_port;
+       return 0;
+}
+
+/**
+ * Convert VXLAN item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_vxlan(const struct rte_flow_item *item,
+                      const void *default_mask,
+                      void *data)
+{
+       const struct rte_flow_item_vxlan *spec = item->spec;
+       const struct rte_flow_item_vxlan *mask = item->mask;
+       struct mlx5_flow *flow = (struct mlx5_flow *)data;
+       struct ibv_exp_flow_spec_tunnel *vxlan;
+       unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
+       union vni {
+               uint32_t vlan_id;
+               uint8_t vni[4];
+       } id;
+
+       ++flow->ibv_attr->num_of_specs;
+       flow->ibv_attr->priority = 0;
+       id.vni[0] = 0;
+       vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+       *vxlan = (struct ibv_exp_flow_spec_tunnel) {
+               .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
+               .size = size,
+       };
+       flow->inner = IBV_EXP_FLOW_SPEC_INNER;
+       if (!spec)
+               return 0;
+       if (!mask)
+               mask = default_mask;
+       memcpy(&id.vni[1], spec->vni, 3);
+       vxlan->val.tunnel_id = id.vlan_id;
+       memcpy(&id.vni[1], mask->vni, 3);
+       vxlan->mask.tunnel_id = id.vlan_id;
+       /* Remove unwanted bits from values. */
+       vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
+       return 0;
+}
+
+/**
+ * Convert mark/flag action to Verbs specification.
+ *
+ * @param flow
+ *   Pointer to MLX5 flow structure.
+ * @param mark_id
+ *   Mark identifier.
+ */
+static int
+mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
+{
+       struct ibv_exp_flow_spec_action_tag *tag;
+       unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
+
+       tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+       *tag = (struct ibv_exp_flow_spec_action_tag){
+               .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
+               .size = size,
+               .tag_id = mlx5_flow_mark_set(mark_id),
+       };
+       ++flow->ibv_attr->num_of_specs;
+       return 0;
+}
+
+/**
+ * Complete flow rule creation.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param ibv_attr
+ *   Verbs flow attributes.
+ * @param action
+ *   Target action structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A flow if the rule could be created.
+ */
+static struct rte_flow *
+priv_flow_create_action_queue(struct priv *priv,
+                             struct ibv_exp_flow_attr *ibv_attr,
+                             struct mlx5_flow_action *action,
+                             struct rte_flow_error *error)
+{
+       struct rxq_ctrl *rxq;
+       struct rte_flow *rte_flow;
+
+       assert(priv->pd);
+       assert(priv->ctx);
+       rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
+       if (!rte_flow) {
+               rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+                                  NULL, "cannot allocate flow memory");
+               return NULL;
+       }
+       if (action->drop) {
+               rte_flow->cq =
+                       ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
+                                         &(struct ibv_exp_cq_init_attr){
+                                                 .comp_mask = 0,
+                                         });
+               if (!rte_flow->cq) {
+                       rte_flow_error_set(error, ENOMEM,
+                                          RTE_FLOW_ERROR_TYPE_HANDLE,
+                                          NULL, "cannot allocate CQ");
+                       goto error;
+               }
+               rte_flow->wq = ibv_exp_create_wq(priv->ctx,
+                                                &(struct ibv_exp_wq_init_attr){
+                                                .wq_type = IBV_EXP_WQT_RQ,
+                                                .max_recv_wr = 1,
+                                                .max_recv_sge = 1,
+                                                .pd = priv->pd,
+                                                .cq = rte_flow->cq,
+                                                });
+       } else {
+               rxq = container_of((*priv->rxqs)[action->queue_id],
+                                  struct rxq_ctrl, rxq);
+               rte_flow->rxq = &rxq->rxq;
+               rxq->rxq.mark |= action->mark;
+               rte_flow->wq = rxq->wq;
+       }
+       rte_flow->mark = action->mark;
+       rte_flow->ibv_attr = ibv_attr;
+       rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
+               priv->ctx,
+               &(struct ibv_exp_rwq_ind_table_init_attr){
+                       .pd = priv->pd,
+                       .log_ind_tbl_size = 0,
+                       .ind_tbl = &rte_flow->wq,
+                       .comp_mask = 0,
+               });
+       if (!rte_flow->ind_table) {
+               rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+                                  NULL, "cannot allocate indirection table");
+               goto error;
+       }
+       rte_flow->qp = ibv_exp_create_qp(
+               priv->ctx,
+               &(struct ibv_exp_qp_init_attr){
+                       .qp_type = IBV_QPT_RAW_PACKET,
+                       .comp_mask =
+                               IBV_EXP_QP_INIT_ATTR_PD |
+                               IBV_EXP_QP_INIT_ATTR_PORT |
+                               IBV_EXP_QP_INIT_ATTR_RX_HASH,
+                       .pd = priv->pd,
+                       .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
+                               .rx_hash_function =
+                                       IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
+                               .rx_hash_key_len = rss_hash_default_key_len,
+                               .rx_hash_key = rss_hash_default_key,
+                               .rx_hash_fields_mask = 0,
+                               .rwq_ind_tbl = rte_flow->ind_table,
+                       },
+                       .port_num = priv->port,
+               });
+       if (!rte_flow->qp) {
+               rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+                                  NULL, "cannot allocate QP");
+               goto error;
+       }
+       if (!priv->started)
+               return rte_flow;
+       rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
+                                                rte_flow->ibv_attr);
+       if (!rte_flow->ibv_flow) {
+               rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+                                  NULL, "flow rule creation failure");
+               goto error;
+       }
+       return rte_flow;
+error:
+       assert(rte_flow);
+       if (rte_flow->qp)
+               ibv_destroy_qp(rte_flow->qp);
+       if (rte_flow->ind_table)
+               ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
+       if (!rte_flow->rxq && rte_flow->wq)
+               ibv_exp_destroy_wq(rte_flow->wq);
+       if (!rte_flow->rxq && rte_flow->cq)
+               ibv_destroy_cq(rte_flow->cq);
+       rte_free(rte_flow->ibv_attr);
+       rte_free(rte_flow);
+       return NULL;
+}
+
+/**
+ * Convert a flow.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A flow on success, NULL otherwise.
+ */
+static struct rte_flow *
+priv_flow_create(struct priv *priv,
+                const struct rte_flow_attr *attr,
+                const struct rte_flow_item items[],
+                const struct rte_flow_action actions[],
+                struct rte_flow_error *error)
+{
+       struct rte_flow *rte_flow;
+       struct mlx5_flow_action action;
+       struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
+       int err;
+
+       err = priv_flow_validate(priv, attr, items, actions, error, &flow);
+       if (err)
+               goto exit;
+       flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
+       flow.offset = sizeof(struct ibv_exp_flow_attr);
+       if (!flow.ibv_attr) {
+               rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+                                  NULL, "cannot allocate ibv_attr memory");
+               goto exit;
+       }
+       *flow.ibv_attr = (struct ibv_exp_flow_attr){
+               .type = IBV_EXP_FLOW_ATTR_NORMAL,
+               .size = sizeof(struct ibv_exp_flow_attr),
+               .priority = attr->priority,
+               .num_of_specs = 0,
+               .port = 0,
+               .flags = 0,
+               .reserved = 0,
+       };
+       flow.inner = 0;
+       claim_zero(priv_flow_validate(priv, attr, items, actions,
+                                     error, &flow));
+       action = (struct mlx5_flow_action){
+               .queue = 0,
+               .drop = 0,
+               .mark = 0,
+               .mark_id = MLX5_FLOW_MARK_DEFAULT,
+       };
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+               if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+                       continue;
+               } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+                       action.queue = 1;
+                       action.queue_id =
+                               ((const struct rte_flow_action_queue *)
+                                actions->conf)->index;
+               } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+                       action.drop = 1;
+                       action.mark = 0;
+               } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
+                       const struct rte_flow_action_mark *mark =
+                               (const struct rte_flow_action_mark *)
+                               actions->conf;
+
+                       if (mark)
+                               action.mark_id = mark->id;
+                       action.mark = !action.drop;
+               } else {
+                       rte_flow_error_set(error, ENOTSUP,
+                                          RTE_FLOW_ERROR_TYPE_ACTION,
+                                          actions, "unsupported action");
+                       goto exit;
+               }
+       }
+       if (action.mark) {
+               mlx5_flow_create_flag_mark(&flow, action.mark_id);
+               flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
+       }
+       rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
+                                                &action, error);
+       return rte_flow;
+exit:
+       rte_free(flow.ibv_attr);
+       return NULL;
+}
+
+/**
+ * Create a flow.
+ *
+ * @see rte_flow_create()
+ * @see rte_flow_ops
+ */
+struct rte_flow *
+mlx5_flow_create(struct rte_eth_dev *dev,
+                const struct rte_flow_attr *attr,
+                const struct rte_flow_item items[],
+                const struct rte_flow_action actions[],
+                struct rte_flow_error *error)
+{
+       struct priv *priv = dev->data->dev_private;
+       struct rte_flow *flow;
+
+       priv_lock(priv);
+       flow = priv_flow_create(priv, attr, items, actions, error);
+       if (flow) {
+               LIST_INSERT_HEAD(&priv->flows, flow, next);
+               DEBUG("Flow created %p", (void *)flow);
+       }
+       priv_unlock(priv);
+       return flow;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] flow
+ *   Flow to destroy.
+ */
+static void
+priv_flow_destroy(struct priv *priv,
+                 struct rte_flow *flow)
+{
+       (void)priv;
+       LIST_REMOVE(flow, next);
+       if (flow->ibv_flow)
+               claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
+       if (flow->qp)
+               claim_zero(ibv_destroy_qp(flow->qp));
+       if (flow->ind_table)
+               claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
+       if (!flow->rxq && flow->wq)
+               claim_zero(ibv_exp_destroy_wq(flow->wq));
+       if (!flow->rxq && flow->cq)
+               claim_zero(ibv_destroy_cq(flow->cq));
+       if (flow->mark) {
+               struct rte_flow *tmp;
+               uint32_t mark_n = 0;
+
+               for (tmp = LIST_FIRST(&priv->flows);
+                    tmp;
+                    tmp = LIST_NEXT(tmp, next)) {
+                       if ((flow->rxq == tmp->rxq) && tmp->mark)
+                               ++mark_n;
+               }
+               flow->rxq->mark = !!mark_n;
+       }
+       rte_free(flow->ibv_attr);
+       DEBUG("Flow destroyed %p", (void *)flow);
+       rte_free(flow);
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_destroy(struct rte_eth_dev *dev,
+                 struct rte_flow *flow,
+                 struct rte_flow_error *error)
+{
+       struct priv *priv = dev->data->dev_private;
+
+       (void)error;
+       priv_lock(priv);
+       priv_flow_destroy(priv, flow);
+       priv_unlock(priv);
+       return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_flow_flush(struct priv *priv)
+{
+       while (!LIST_EMPTY(&priv->flows)) {
+               struct rte_flow *flow;
+
+               flow = LIST_FIRST(&priv->flows);
+               priv_flow_destroy(priv, flow);
+       }
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_flush(struct rte_eth_dev *dev,
+               struct rte_flow_error *error)
+{
+       struct priv *priv = dev->data->dev_private;
+
+       (void)error;
+       priv_lock(priv);
+       priv_flow_flush(priv);
+       priv_unlock(priv);
+       return 0;
+}
+
+/**
+ * Remove all flows.
+ *
+ * Called by dev_stop() to remove all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+priv_flow_stop(struct priv *priv)
+{
+       struct rte_flow *flow;
+
+       for (flow = LIST_FIRST(&priv->flows);
+            flow;
+            flow = LIST_NEXT(flow, next)) {
+               claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
+               flow->ibv_flow = NULL;
+               if (flow->mark)
+                       flow->rxq->mark = 0;
+               DEBUG("Flow %p removed", (void *)flow);
+       }
+}
+
+/**
+ * Add all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, a errno value otherwise and rte_errno is set.
+ */
+int
+priv_flow_start(struct priv *priv)
+{
+       struct rte_flow *flow;
+
+       for (flow = LIST_FIRST(&priv->flows);
+            flow;
+            flow = LIST_NEXT(flow, next)) {
+               flow->ibv_flow = ibv_exp_create_flow(flow->qp,
+                                                    flow->ibv_attr);
+               if (!flow->ibv_flow) {
+                       DEBUG("Flow %p cannot be applied", (void *)flow);
+                       rte_errno = EINVAL;
+                       return rte_errno;
+               }
+               DEBUG("Flow %p applied", (void *)flow);
+               if (flow->rxq)
+                       flow->rxq->mark |= flow->mark;
+       }
+       return 0;
+}
index 8426adb..755b5d7 100644 (file)
@@ -34,6 +34,8 @@
 #ifndef RTE_PMD_MLX5_PRM_H_
 #define RTE_PMD_MLX5_PRM_H_
 
+#include <assert.h>
+
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
@@ -44,6 +46,7 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include <rte_vect.h>
 #include "mlx5_autoconf.h"
 
 /* Get CQE owner bit. */
@@ -61,9 +64,6 @@
 /* Invalidate a CQE. */
 #define MLX5_CQE_INVALIDATE (MLX5_CQE_INVALID << 4)
 
-/* CQE value to inform that VLAN is stripped. */
-#define MLX5_CQE_VLAN_STRIPPED 0x1
-
 /* Maximum number of packets a multi-packet WQE can handle. */
 #define MLX5_MPW_DSEG_MAX 5
 
 /* Room for inline data in multi-packet WQE. */
 #define MLX5_MWQE64_INL_DATA 28
 
-//#ifndef HAVE_VERBS_MLX5_OPCODE_TSO
-//#define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */
-//#endif
+#ifndef HAVE_VERBS_MLX5_OPCODE_TSO
+#define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */
+#endif
+
+/* CQE value to inform that VLAN is stripped. */
+#define MLX5_CQE_VLAN_STRIPPED (1u << 0)
 
-/* IPv4 packet. */
-#define MLX5_CQE_RX_IPV4_PACKET (1u << 2)
+/* IPv4 options. */
+#define MLX5_CQE_RX_IP_EXT_OPTS_PACKET (1u << 1)
 
 /* IPv6 packet. */
-#define MLX5_CQE_RX_IPV6_PACKET (1u << 3)
+#define MLX5_CQE_RX_IPV6_PACKET (1u << 2)
+
+/* IPv4 packet. */
+#define MLX5_CQE_RX_IPV4_PACKET (1u << 3)
+
+/* TCP packet. */
+#define MLX5_CQE_RX_TCP_PACKET (1u << 4)
+
+/* UDP packet. */
+#define MLX5_CQE_RX_UDP_PACKET (1u << 5)
+
+/* IP is fragmented. */
+#define MLX5_CQE_RX_IP_FRAG_PACKET (1u << 7)
 
-/* Outer IPv4 packet. */
-#define MLX5_CQE_RX_OUTER_IPV4_PACKET (1u << 7)
+/* L2 header is valid. */
+#define MLX5_CQE_RX_L2_HDR_VALID (1u << 8)
 
-/* Outer IPv6 packet. */
-#define MLX5_CQE_RX_OUTER_IPV6_PACKET (1u << 8)
+/* L3 header is valid. */
+#define MLX5_CQE_RX_L3_HDR_VALID (1u << 9)
+
+/* L4 header is valid. */
+#define MLX5_CQE_RX_L4_HDR_VALID (1u << 10)
+
+/* Outer packet, 0 IPv4, 1 IPv6. */
+#define MLX5_CQE_RX_OUTER_PACKET (1u << 1)
 
 /* Tunnel packet bit in the CQE. */
-#define MLX5_CQE_RX_TUNNEL_PACKET (1u << 4)
+#define MLX5_CQE_RX_TUNNEL_PACKET (1u << 0)
 
-/* Outer IP checksum OK. */
-#define MLX5_CQE_RX_OUTER_IP_CSUM_OK (1u << 5)
+/* INVALID is used by packets matching no flow rules. */
+#define MLX5_FLOW_MARK_INVALID 0
 
-/* Outer UDP header and checksum OK. */
-#define MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK (1u << 6)
+/* Maximum allowed value to mark a packet. */
+#define MLX5_FLOW_MARK_MAX 0xfffff0
+
+/* Default mark value used when none is provided. */
+#define MLX5_FLOW_MARK_DEFAULT 0xffffff
 
 /* Subset of struct mlx5_wqe_eth_seg. */
 struct mlx5_wqe_eth_seg_small {
@@ -114,12 +138,19 @@ struct mlx5_wqe_eth_seg_small {
        uint32_t rsvd2;
        uint16_t inline_hdr_sz;
        uint8_t inline_hdr[2];
-};
+} __rte_aligned(MLX5_WQE_DWORD_SIZE);
 
 struct mlx5_wqe_inl_small {
        uint32_t byte_cnt;
        uint8_t raw;
-};
+} __rte_aligned(MLX5_WQE_DWORD_SIZE);
+
+struct mlx5_wqe_ctrl {
+       uint32_t ctrl0;
+       uint32_t ctrl1;
+       uint32_t ctrl2;
+       uint32_t ctrl3;
+} __rte_aligned(MLX5_WQE_DWORD_SIZE);
 
 /* Small common part of the WQE. */
 struct mlx5_wqe {
@@ -127,11 +158,17 @@ struct mlx5_wqe {
        struct mlx5_wqe_eth_seg_small eseg;
 };
 
+/* Vectorize WQE header. */
+struct mlx5_wqe_v {
+       rte_v128u32_t ctrl;
+       rte_v128u32_t eseg;
+};
+
 /* WQE. */
 struct mlx5_wqe64 {
        struct mlx5_wqe hdr;
        uint8_t raw[32];
-} __rte_aligned(64);
+} __rte_aligned(MLX5_WQE_SIZE);
 
 /* MPW session status. */
 enum mlx5_mpw_state {
@@ -163,16 +200,72 @@ struct mlx5_cqe {
        uint32_t rx_hash_res;
        uint8_t rx_hash_type;
        uint8_t rsvd1[11];
-       uint8_t hds_ip_ext;
-       uint8_t l4_hdr_type_etc;
+       uint16_t hdr_type_etc;
        uint16_t vlan_info;
        uint8_t rsvd2[12];
        uint32_t byte_cnt;
        uint64_t timestamp;
-       uint8_t rsvd3[4];
+       uint32_t sop_drop_qpn;
        uint16_t wqe_counter;
        uint8_t rsvd4;
        uint8_t op_own;
 };
 
+/**
+ * Convert a user mark to flow mark.
+ *
+ * @param val
+ *   Mark value to convert.
+ *
+ * @return
+ *   Converted mark value.
+ */
+static inline uint32_t
+mlx5_flow_mark_set(uint32_t val)
+{
+       uint32_t ret;
+
+       /*
+        * Add one to the user value to differentiate un-marked flows from
+        * marked flows.
+        */
+       ++val;
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+       /*
+        * Mark is 24 bits (minus reserved values) but is stored on a 32 bit
+        * word, byte-swapped by the kernel on little-endian systems. In this
+        * case, left-shifting the resulting big-endian value ensures the
+        * least significant 24 bits are retained when converting it back.
+        */
+       ret = rte_cpu_to_be_32(val) >> 8;
+#else
+       ret = val;
+#endif
+       assert(ret <= MLX5_FLOW_MARK_MAX);
+       return ret;
+}
+
+/**
+ * Convert a mark to user mark.
+ *
+ * @param val
+ *   Mark value to convert.
+ *
+ * @return
+ *   Converted mark value.
+ */
+static inline uint32_t
+mlx5_flow_mark_get(uint32_t val)
+{
+       /*
+        * Subtract one from the retrieved value. It was added by
+        * mlx5_flow_mark_set() to distinguish unmarked flows.
+        */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+       return (val >> 8) - 1;
+#else
+       return val - 1;
+#endif
+}
+
 #endif /* RTE_PMD_MLX5_PRM_H_ */
index c5746fa..28e93d3 100644 (file)
@@ -102,7 +102,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                ETH_RSS_FRAG_IPV4),
                .flow_priority = 1,
                .flow_spec.ipv4 = {
-                       .type = IBV_EXP_FLOW_SPEC_IPV4_EXT,
+                       .type = IBV_EXP_FLOW_SPEC_IPV4,
                        .size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
                },
                .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
@@ -140,7 +140,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                ETH_RSS_FRAG_IPV6),
                .flow_priority = 1,
                .flow_spec.ipv6 = {
-                       .type = IBV_EXP_FLOW_SPEC_IPV6_EXT,
+                       .type = IBV_EXP_FLOW_SPEC_IPV6,
                        .size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
                },
                .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
index b56c0a1..88b0354 100644 (file)
 #include "mlx5_defs.h"
 #include "mlx5_prm.h"
 
-//#define MLX5_OPCODE_TSO 0xe
+static inline int
+check_cqe(volatile struct mlx5_cqe *cqe,
+         unsigned int cqes_n, const uint16_t ci)
+         __attribute__((always_inline));
+
+static inline void
+txq_complete(struct txq *txq) __attribute__((always_inline));
+
+static inline uint32_t
+txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
+       __attribute__((always_inline));
+
+static inline void
+mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
+       __attribute__((always_inline));
+
+static inline uint32_t
+rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
+       __attribute__((always_inline));
+
+static inline int
+mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+                uint16_t cqe_cnt, uint32_t *rss_hash)
+                __attribute__((always_inline));
+
+static inline uint32_t
+rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
+                  __attribute__((always_inline));
 
 #ifndef NDEBUG
 
@@ -86,7 +113,7 @@ static inline int
 check_cqe_seen(volatile struct mlx5_cqe *cqe)
 {
        static const uint8_t magic[] = "seen";
-       volatile uint8_t (*buf)[sizeof(cqe->rsvd3)] = &cqe->rsvd3;
+       volatile uint8_t (*buf)[sizeof(cqe->rsvd0)] = &cqe->rsvd0;
        int ret = 1;
        unsigned int i;
 
@@ -100,11 +127,6 @@ check_cqe_seen(volatile struct mlx5_cqe *cqe)
 
 #endif /* NDEBUG */
 
-static inline int
-check_cqe(volatile struct mlx5_cqe *cqe,
-         unsigned int cqes_n, const uint16_t ci)
-         __attribute__((always_inline));
-
 /**
  * Check whether CQE is valid.
  *
@@ -154,8 +176,23 @@ check_cqe(volatile struct mlx5_cqe *cqe,
        return 0;
 }
 
-static inline void
-txq_complete(struct txq *txq) __attribute__((always_inline));
+/**
+ * Return the address of the WQE.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param  wqe_ci
+ *   WQE consumer index.
+ *
+ * @return
+ *   WQE address.
+ */
+static inline uintptr_t *
+tx_mlx5_wqe(struct txq *txq, uint16_t ci)
+{
+       ci &= ((1 << txq->wqe_n) - 1);
+       return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
+}
 
 /**
  * Manage TX completions.
@@ -175,7 +212,7 @@ txq_complete(struct txq *txq)
        uint16_t elts_tail;
        uint16_t cq_ci = txq->cq_ci;
        volatile struct mlx5_cqe *cqe = NULL;
-       volatile struct mlx5_wqe *wqe;
+       volatile struct mlx5_wqe_ctrl *ctrl;
 
        do {
                volatile struct mlx5_cqe *tmp;
@@ -201,9 +238,10 @@ txq_complete(struct txq *txq)
        } while (1);
        if (unlikely(cqe == NULL))
                return;
-       wqe = &(*txq->wqes)[htons(cqe->wqe_counter) &
-                           ((1 << txq->wqe_n) - 1)].hdr;
-       elts_tail = wqe->ctrl[3];
+       txq->wqe_pi = ntohs(cqe->wqe_counter);
+       ctrl = (volatile struct mlx5_wqe_ctrl *)
+               tx_mlx5_wqe(txq, txq->wqe_pi);
+       elts_tail = ctrl->ctrl3;
        assert(elts_tail < (1 << txq->wqe_n));
        /* Free buffers. */
        while (elts_free != elts_tail) {
@@ -248,10 +286,6 @@ txq_mb2mp(struct rte_mbuf *buf)
        return buf->pool;
 }
 
-static inline uint32_t
-txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
-       __attribute__((always_inline));
-
 /**
  * Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[].
  * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
@@ -294,57 +328,20 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  *
  * @param txq
  *   Pointer to TX queue structure.
+ * @param wqe
+ *   Pointer to the last WQE posted in the NIC.
  */
 static inline void
-mlx5_tx_dbrec(struct txq *txq)
+mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
 {
-       uint8_t *dst = (uint8_t *)((uintptr_t)txq->bf_reg + txq->bf_offset);
-       uint32_t data[4] = {
-               htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND),
-               htonl(txq->qp_num_8s),
-               0,
-               0,
-       };
+       uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+       volatile uint64_t *src = ((volatile uint64_t *)wqe);
+
        rte_wmb();
        *txq->qp_db = htonl(txq->wqe_ci);
        /* Ensure ordering between DB record and BF copy. */
        rte_wmb();
-       memcpy(dst, (uint8_t *)data, 16);
-       txq->bf_offset ^= (1 << txq->bf_buf_size);
-}
-
-/**
- * Prefetch a CQE.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param cqe_ci
- *   CQE consumer index.
- */
-static inline void
-tx_prefetch_cqe(struct txq *txq, uint16_t ci)
-{
-       volatile struct mlx5_cqe *cqe;
-
-       cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)];
-       rte_prefetch0(cqe);
-}
-
-/**
- * Prefetch a WQE.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param  wqe_ci
- *   WQE consumer index.
- */
-static inline void
-tx_prefetch_wqe(struct txq *txq, uint16_t ci)
-{
-       volatile struct mlx5_wqe64 *wqe;
-
-       wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)];
-       rte_prefetch0(wqe);
+       *dst = *src;
 }
 
 /**
@@ -369,8 +366,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        unsigned int i = 0;
        unsigned int j = 0;
        unsigned int max;
+       uint16_t max_wqe;
        unsigned int comp;
-       volatile struct mlx5_wqe *wqe = NULL;
+       volatile struct mlx5_wqe_v *wqe = NULL;
        unsigned int segs_n = 0;
        struct rte_mbuf *buf = NULL;
        uint8_t *raw;
@@ -378,19 +376,24 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        if (unlikely(!pkts_n))
                return 0;
        /* Prefetch first packet cacheline. */
-       tx_prefetch_cqe(txq, txq->cq_ci);
-       tx_prefetch_cqe(txq, txq->cq_ci + 1);
        rte_prefetch0(*pkts);
        /* Start processing. */
        txq_complete(txq);
        max = (elts_n - (elts_head - txq->elts_tail));
        if (max > elts_n)
                max -= elts_n;
+       max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
+       if (unlikely(!max_wqe))
+               return 0;
        do {
-               volatile struct mlx5_wqe_data_seg *dseg = NULL;
+               volatile rte_v128u32_t *dseg = NULL;
                uint32_t length;
                unsigned int ds = 0;
                uintptr_t addr;
+               uint64_t naddr;
+               uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2;
+               uint16_t ehdr;
+               uint8_t cs_flags = 0;
 #ifdef MLX5_PMD_SOFT_COUNTERS
                uint32_t total_length = 0;
 #endif
@@ -409,13 +412,17 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                --segs_n;
                if (!segs_n)
                        --pkts_n;
-               wqe = &(*txq->wqes)[txq->wqe_ci &
-                                   ((1 << txq->wqe_n) - 1)].hdr;
-               tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+               if (unlikely(--max_wqe == 0))
+                       break;
+               wqe = (volatile struct mlx5_wqe_v *)
+                       tx_mlx5_wqe(txq, txq->wqe_ci);
+               rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
                if (pkts_n > 1)
                        rte_prefetch0(*pkts);
                addr = rte_pktmbuf_mtod(buf, uintptr_t);
                length = DATA_LEN(buf);
+               ehdr = (((uint8_t *)addr)[1] << 8) |
+                      ((uint8_t *)addr)[0];
 #ifdef MLX5_PMD_SOFT_COUNTERS
                total_length = length;
 #endif
@@ -433,78 +440,88 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                /* Should we enable HW CKSUM offload */
                if (buf->ol_flags &
                    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
-                       wqe->eseg.cs_flags =
-                               MLX5_ETH_WQE_L3_CSUM |
-                               MLX5_ETH_WQE_L4_CSUM;
-               } else {
-                       wqe->eseg.cs_flags = 0;
+                       cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
                }
-               raw  = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0];
-               /* Start the know and common part of the WQE structure. */
-               wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-               wqe->ctrl[2] = 0;
-               wqe->ctrl[3] = 0;
-               wqe->eseg.rsvd0 = 0;
-               wqe->eseg.rsvd1 = 0;
-               wqe->eseg.mss = 0;
-               wqe->eseg.rsvd2 = 0;
-               /* Start by copying the Ethernet Header. */
-               memcpy((uint8_t *)raw, ((uint8_t *)addr), 16);
-               length -= MLX5_WQE_DWORD_SIZE;
-               addr += MLX5_WQE_DWORD_SIZE;
+               raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
                /* Replace the Ethernet type by the VLAN if necessary. */
                if (buf->ol_flags & PKT_TX_VLAN_PKT) {
                        uint32_t vlan = htonl(0x81000000 | buf->vlan_tci);
-
-                       memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE -
-                                          sizeof(vlan)),
-                              &vlan, sizeof(vlan));
-                       addr -= sizeof(vlan);
-                       length += sizeof(vlan);
+                       unsigned int len = 2 * ETHER_ADDR_LEN - 2;
+
+                       addr += 2;
+                       length -= 2;
+                       /* Copy Destination and source mac address. */
+                       memcpy((uint8_t *)raw, ((uint8_t *)addr), len);
+                       /* Copy VLAN. */
+                       memcpy((uint8_t *)raw + len, &vlan, sizeof(vlan));
+                       /* Copy missing two bytes to end the DSeg. */
+                       memcpy((uint8_t *)raw + len + sizeof(vlan),
+                              ((uint8_t *)addr) + len, 2);
+                       addr += len + 2;
+                       length -= (len + 2);
+               } else {
+                       memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2,
+                              MLX5_WQE_DWORD_SIZE);
+                       length -= pkt_inline_sz;
+                       addr += pkt_inline_sz;
                }
                /* Inline if enough room. */
-               if (txq->max_inline != 0) {
-                       uintptr_t end =
-                               (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
-                       uint16_t max_inline =
-                               txq->max_inline * RTE_CACHE_LINE_SIZE;
-                       uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
-                       uint16_t room;
+               if (txq->max_inline) {
+                       uintptr_t end = (uintptr_t)
+                               (((uintptr_t)txq->wqes) +
+                                (1 << txq->wqe_n) * MLX5_WQE_SIZE);
+                       unsigned int max_inline = txq->max_inline *
+                                                 RTE_CACHE_LINE_SIZE -
+                                                 MLX5_WQE_DWORD_SIZE;
+                       uintptr_t addr_end = (addr + max_inline) &
+                                            ~(RTE_CACHE_LINE_SIZE - 1);
+                       unsigned int copy_b = (addr_end > addr) ?
+                               RTE_MIN((addr_end - addr), length) :
+                               0;
 
                        raw += MLX5_WQE_DWORD_SIZE;
-                       room = end - (uintptr_t)raw;
-                       if (room > max_inline) {
-                               uintptr_t addr_end = (addr + max_inline) &
-                                       ~(RTE_CACHE_LINE_SIZE - 1);
-                               uint16_t copy_b = ((addr_end - addr) > length) ?
-                                                 length :
-                                                 (addr_end - addr);
+                       if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
+                               /*
+                                * One Dseg remains in the current WQE.  To
+                                * keep the computation positive, it is
+                                * removed after the bytes to Dseg conversion.
+                                */
+                               uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
 
+                               if (unlikely(max_wqe < n))
+                                       break;
+                               max_wqe -= n;
                                rte_memcpy((void *)raw, (void *)addr, copy_b);
                                addr += copy_b;
                                length -= copy_b;
                                pkt_inline_sz += copy_b;
-                               /* Sanity check. */
-                               assert(addr <= addr_end);
                        }
-                       /* Store the inlined packet size in the WQE. */
-                       wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
                        /*
-                        * 2 DWORDs consumed by the WQE header + 1 DSEG +
+                        * 2 DWORDs consumed by the WQE header + ETH segment +
                         * the size of the inline part of the packet.
                         */
                        ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
                        if (length > 0) {
-                               dseg = (struct mlx5_wqe_data_seg *)
-                                       ((uintptr_t)wqe +
-                                        (ds * MLX5_WQE_DWORD_SIZE));
-                               if ((uintptr_t)dseg >= end)
-                                       dseg = (struct mlx5_wqe_data_seg *)
-                                               ((uintptr_t)&(*txq->wqes)[0]);
+                               if (ds % (MLX5_WQE_SIZE /
+                                         MLX5_WQE_DWORD_SIZE) == 0) {
+                                       if (unlikely(--max_wqe == 0))
+                                               break;
+                                       dseg = (volatile rte_v128u32_t *)
+                                              tx_mlx5_wqe(txq, txq->wqe_ci +
+                                                          ds / 4);
+                               } else {
+                                       dseg = (volatile rte_v128u32_t *)
+                                               ((uintptr_t)wqe +
+                                                (ds * MLX5_WQE_DWORD_SIZE));
+                               }
                                goto use_dseg;
                        } else if (!segs_n) {
                                goto next_pkt;
                        } else {
+                               /* dseg will be advance as part of next_seg */
+                               dseg = (volatile rte_v128u32_t *)
+                                       ((uintptr_t)wqe +
+                                        ((ds - 1) * MLX5_WQE_DWORD_SIZE));
                                goto next_seg;
                        }
                } else {
@@ -512,16 +529,17 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                         * No inline has been done in the packet, only the
                         * Ethernet Header as been stored.
                         */
-                       wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE);
-                       dseg = (struct mlx5_wqe_data_seg *)
+                       dseg = (volatile rte_v128u32_t *)
                                ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE));
                        ds = 3;
 use_dseg:
                        /* Add the remaining packet as a simple ds. */
-                       *dseg = (struct mlx5_wqe_data_seg) {
-                               .addr = htonll(addr),
-                               .byte_count = htonl(length),
-                               .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+                       naddr = htonll(addr);
+                       *dseg = (rte_v128u32_t){
+                               htonl(length),
+                               txq_mp2mr(txq, txq_mb2mp(buf)),
+                               naddr,
+                               naddr >> 32,
                        };
                        ++ds;
                        if (!segs_n)
@@ -538,17 +556,13 @@ next_seg:
                 */
                assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
                if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) {
-                       unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) &
-                               ((1 << txq->wqe_n) - 1);
-
-                       dseg = (struct mlx5_wqe_data_seg *)
-                               ((uintptr_t)&(*txq->wqes)[n]);
-                       tx_prefetch_wqe(txq, n + 1);
-               } else if (!dseg) {
-            dseg = (struct mlx5_wqe_data_seg *)
-                ((uintptr_t)wqe +
-                 (ds * MLX5_WQE_DWORD_SIZE));
-        } else {
+                       if (unlikely(--max_wqe == 0))
+                               break;
+                       dseg = (volatile rte_v128u32_t *)
+                              tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4);
+                       rte_prefetch0(tx_mlx5_wqe(txq,
+                                                 txq->wqe_ci + ds / 4 + 1));
+               } else {
                        ++dseg;
                }
                ++ds;
@@ -559,10 +573,12 @@ next_seg:
                total_length += length;
 #endif
                /* Store segment information. */
-               *dseg = (struct mlx5_wqe_data_seg) {
-                       .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
-                       .byte_count = htonl(length),
-                       .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+               naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
+               *dseg = (rte_v128u32_t){
+                       htonl(length),
+                       txq_mp2mr(txq, txq_mb2mp(buf)),
+                       naddr,
+                       naddr >> 32,
                };
                (*txq->elts)[elts_head] = buf;
                elts_head = (elts_head + 1) & (elts_n - 1);
@@ -574,7 +590,19 @@ next_seg:
                        --pkts_n;
 next_pkt:
                ++i;
-               wqe->ctrl[1] = htonl(txq->qp_num_8s | ds);
+               /* Initialize known and common part of the WQE structure. */
+               wqe->ctrl = (rte_v128u32_t){
+                       htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND),
+                       htonl(txq->qp_num_8s | ds),
+                       0,
+                       0,
+               };
+               wqe->eseg = (rte_v128u32_t){
+                       0,
+                       cs_flags,
+                       0,
+                       (ehdr << 16) | htons(pkt_inline_sz),
+               };
                txq->wqe_ci += (ds + 3) / 4;
 #ifdef MLX5_PMD_SOFT_COUNTERS
                /* Increment sent bytes counter. */
@@ -587,10 +615,13 @@ next_pkt:
        /* Check whether completion threshold has been reached. */
        comp = txq->elts_comp + i + j;
        if (comp >= MLX5_TX_COMP_THRESH) {
+               volatile struct mlx5_wqe_ctrl *w =
+                       (volatile struct mlx5_wqe_ctrl *)wqe;
+
                /* Request completion on last WQE. */
-               wqe->ctrl[2] = htonl(8);
+               w->ctrl2 = htonl(8);
                /* Save elts_head in unused "immediate" field of WQE. */
-               wqe->ctrl[3] = elts_head;
+               w->ctrl3 = elts_head;
                txq->elts_comp = 0;
        } else {
                txq->elts_comp = comp;
@@ -600,7 +631,7 @@ next_pkt:
        txq->stats.opackets += i;
 #endif
        /* Ring QP doorbell. */
-       mlx5_tx_dbrec(txq);
+       mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)wqe);
        txq->elts_head = elts_head;
        return i;
 }
@@ -621,13 +652,13 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
        uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
        volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
                (volatile struct mlx5_wqe_data_seg (*)[])
-               (uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)];
+               tx_mlx5_wqe(txq, idx + 1);
 
        mpw->state = MLX5_MPW_STATE_OPENED;
        mpw->pkts_n = 0;
        mpw->len = length;
        mpw->total_len = 0;
-       mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
+       mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
        mpw->wqe->eseg.mss = htons(length);
        mpw->wqe->eseg.inline_hdr_sz = 0;
        mpw->wqe->eseg.rsvd0 = 0;
@@ -669,8 +700,8 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
                ++txq->wqe_ci;
        else
                txq->wqe_ci += 2;
-       tx_prefetch_wqe(txq, txq->wqe_ci);
-       tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+       rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
+       rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
 }
 
 /**
@@ -695,6 +726,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        unsigned int i = 0;
        unsigned int j = 0;
        unsigned int max;
+       uint16_t max_wqe;
        unsigned int comp;
        struct mlx5_mpw mpw = {
                .state = MLX5_MPW_STATE_CLOSED,
@@ -703,14 +735,16 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        if (unlikely(!pkts_n))
                return 0;
        /* Prefetch first packet cacheline. */
-       tx_prefetch_cqe(txq, txq->cq_ci);
-       tx_prefetch_wqe(txq, txq->wqe_ci);
-       tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+       rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
+       rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
        /* Start processing. */
        txq_complete(txq);
        max = (elts_n - (elts_head - txq->elts_tail));
        if (max > elts_n)
                max -= elts_n;
+       max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
+       if (unlikely(!max_wqe))
+               return 0;
        do {
                struct rte_mbuf *buf = *(pkts++);
                unsigned int elts_head_next;
@@ -744,6 +778,14 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                     (mpw.wqe->eseg.cs_flags != cs_flags)))
                        mlx5_mpw_close(txq, &mpw);
                if (mpw.state == MLX5_MPW_STATE_CLOSED) {
+                       /*
+                        * Multi-Packet WQE consumes at most two WQE.
+                        * mlx5_mpw_new() expects to be able to use such
+                        * resources.
+                        */
+                       if (unlikely(max_wqe < 2))
+                               break;
+                       max_wqe -= 2;
                        mlx5_mpw_new(txq, &mpw, length);
                        mpw.wqe->eseg.cs_flags = cs_flags;
                }
@@ -808,7 +850,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        /* Ring QP doorbell. */
        if (mpw.state == MLX5_MPW_STATE_OPENED)
                mlx5_mpw_close(txq, &mpw);
-       mlx5_tx_dbrec(txq);
+       mlx5_tx_dbrec(txq, mpw.wqe);
        txq->elts_head = elts_head;
        return i;
 }
@@ -833,7 +875,7 @@ mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
        mpw->pkts_n = 0;
        mpw->len = length;
        mpw->total_len = 0;
-       mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
+       mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
        mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
                                  (txq->wqe_ci << 8) |
                                  MLX5_OPCODE_TSO);
@@ -899,18 +941,30 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
        unsigned int i = 0;
        unsigned int j = 0;
        unsigned int max;
+       uint16_t max_wqe;
        unsigned int comp;
        unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE;
        struct mlx5_mpw mpw = {
                .state = MLX5_MPW_STATE_CLOSED,
        };
+       /*
+        * Compute the maximum number of WQE which can be consumed by inline
+        * code.
+        * - 2 DSEG for:
+        *   - 1 control segment,
+        *   - 1 Ethernet segment,
+        * - N Dseg from the inline request.
+        */
+       const unsigned int wqe_inl_n =
+               ((2 * MLX5_WQE_DWORD_SIZE +
+                 txq->max_inline * RTE_CACHE_LINE_SIZE) +
+                RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
 
        if (unlikely(!pkts_n))
                return 0;
        /* Prefetch first packet cacheline. */
-       tx_prefetch_cqe(txq, txq->cq_ci);
-       tx_prefetch_wqe(txq, txq->wqe_ci);
-       tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+       rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
+       rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
        /* Start processing. */
        txq_complete(txq);
        max = (elts_n - (elts_head - txq->elts_tail));
@@ -936,6 +990,11 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                        break;
                max -= segs_n;
                --pkts_n;
+               /*
+                * Compute max_wqe in case less WQE were consumed in previous
+                * iteration.
+                */
+               max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
                /* Should we enable HW CKSUM offload */
                if (buf->ol_flags &
                    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
@@ -961,9 +1020,20 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                if (mpw.state == MLX5_MPW_STATE_CLOSED) {
                        if ((segs_n != 1) ||
                            (length > inline_room)) {
+                               /*
+                                * Multi-Packet WQE consumes at most two WQE.
+                                * mlx5_mpw_new() expects to be able to use
+                                * such resources.
+                                */
+                               if (unlikely(max_wqe < 2))
+                                       break;
+                               max_wqe -= 2;
                                mlx5_mpw_new(txq, &mpw, length);
                                mpw.wqe->eseg.cs_flags = cs_flags;
                        } else {
+                               if (unlikely(max_wqe < wqe_inl_n))
+                                       break;
+                               max_wqe -= wqe_inl_n;
                                mlx5_mpw_inline_new(txq, &mpw, length);
                                mpw.wqe->eseg.cs_flags = cs_flags;
                        }
@@ -1011,14 +1081,15 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                        addr = rte_pktmbuf_mtod(buf, uintptr_t);
                        (*txq->elts)[elts_head] = buf;
                        /* Maximum number of bytes before wrapping. */
-                       max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] -
+                       max = ((((uintptr_t)(txq->wqes)) +
+                               (1 << txq->wqe_n) *
+                               MLX5_WQE_SIZE) -
                               (uintptr_t)mpw.data.raw);
                        if (length > max) {
                                rte_memcpy((void *)(uintptr_t)mpw.data.raw,
                                           (void *)addr,
                                           max);
-                               mpw.data.raw =
-                                       (volatile void *)&(*txq->wqes)[0];
+                               mpw.data.raw = (volatile void *)txq->wqes;
                                rte_memcpy((void *)(uintptr_t)mpw.data.raw,
                                           (void *)(addr + max),
                                           length - max);
@@ -1027,13 +1098,15 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                                rte_memcpy((void *)(uintptr_t)mpw.data.raw,
                                           (void *)addr,
                                           length);
-                               mpw.data.raw += length;
+
+                               if (length == max)
+                                       mpw.data.raw =
+                                               (volatile void *)txq->wqes;
+                               else
+                                       mpw.data.raw += length;
                        }
-                       if ((uintptr_t)mpw.data.raw ==
-                           (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n])
-                               mpw.data.raw =
-                                       (volatile void *)&(*txq->wqes)[0];
                        ++mpw.pkts_n;
+                       mpw.total_len += length;
                        ++j;
                        if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) {
                                mlx5_mpw_inline_close(txq, &mpw);
@@ -1043,7 +1116,6 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                                inline_room -= length;
                        }
                }
-               mpw.total_len += length;
                elts_head = elts_head_next;
 #ifdef MLX5_PMD_SOFT_COUNTERS
                /* Increment sent bytes counter. */
@@ -1077,7 +1149,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                mlx5_mpw_inline_close(txq, &mpw);
        else if (mpw.state == MLX5_MPW_STATE_OPENED)
                mlx5_mpw_close(txq, &mpw);
-       mlx5_tx_dbrec(txq);
+       mlx5_tx_dbrec(txq, mpw.wqe);
        txq->elts_head = elts_head;
        return i;
 }
@@ -1097,30 +1169,28 @@ static inline uint32_t
 rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
 {
        uint32_t pkt_type;
-       uint8_t flags = cqe->l4_hdr_type_etc;
+       uint16_t flags = ntohs(cqe->hdr_type_etc);
 
-       if (cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET)
+       if (cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) {
                pkt_type =
-                       TRANSPOSE(flags,
-                                 MLX5_CQE_RX_OUTER_IPV4_PACKET,
-                                 RTE_PTYPE_L3_IPV4) |
-                       TRANSPOSE(flags,
-                                 MLX5_CQE_RX_OUTER_IPV6_PACKET,
-                                 RTE_PTYPE_L3_IPV6) |
                        TRANSPOSE(flags,
                                  MLX5_CQE_RX_IPV4_PACKET,
-                                 RTE_PTYPE_INNER_L3_IPV4) |
+                                 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN) |
                        TRANSPOSE(flags,
                                  MLX5_CQE_RX_IPV6_PACKET,
-                                 RTE_PTYPE_INNER_L3_IPV6);
-       else
+                                 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN);
+               pkt_type |= ((cqe->pkt_info & MLX5_CQE_RX_OUTER_PACKET) ?
+                            RTE_PTYPE_L3_IPV6_EXT_UNKNOWN :
+                            RTE_PTYPE_L3_IPV4_EXT_UNKNOWN);
+       } else {
                pkt_type =
                        TRANSPOSE(flags,
                                  MLX5_CQE_L3_HDR_TYPE_IPV6,
-                                 RTE_PTYPE_L3_IPV6) |
+                                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN) |
                        TRANSPOSE(flags,
                                  MLX5_CQE_L3_HDR_TYPE_IPV4,
-                                 RTE_PTYPE_L3_IPV4);
+                                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN);
+       }
        return pkt_type;
 }
 
@@ -1147,6 +1217,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
        struct rxq_zip *zip = &rxq->zip;
        uint16_t cqe_n = cqe_cnt + 1;
        int len = 0;
+       uint16_t idx, end;
 
        /* Process compressed data in the CQE and mini arrays. */
        if (zip->ai) {
@@ -1157,6 +1228,14 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
                len = ntohl((*mc)[zip->ai & 7].byte_cnt);
                *rss_hash = ntohl((*mc)[zip->ai & 7].rx_hash_result);
                if ((++zip->ai & 7) == 0) {
+                       /* Invalidate consumed CQEs */
+                       idx = zip->ca;
+                       end = zip->na;
+                       while (idx != end) {
+                               (*rxq->cqes)[idx & cqe_cnt].op_own =
+                                       MLX5_CQE_INVALIDATE;
+                               ++idx;
+                       }
                        /*
                         * Increment consumer index to skip the number of
                         * CQEs consumed. Hardware leaves holes in the CQ
@@ -1166,8 +1245,9 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
                        zip->na += 8;
                }
                if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
-                       uint16_t idx = rxq->cq_ci;
-                       uint16_t end = zip->cq_ci;
+                       /* Invalidate the rest */
+                       idx = zip->ca;
+                       end = zip->cq_ci;
 
                        while (idx != end) {
                                (*rxq->cqes)[idx & cqe_cnt].op_own =
@@ -1203,7 +1283,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
                         * special case the second one is located 7 CQEs after
                         * the initial CQE instead of 8 for subsequent ones.
                         */
-                       zip->ca = rxq->cq_ci & cqe_cnt;
+                       zip->ca = rxq->cq_ci;
                        zip->na = zip->ca + 7;
                        /* Compute the next non compressed CQE. */
                        --rxq->cq_ci;
@@ -1212,6 +1292,13 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
                        len = ntohl((*mc)[0].byte_cnt);
                        *rss_hash = ntohl((*mc)[0].rx_hash_result);
                        zip->ai = 1;
+                       /* Prefetch all the entries to be invalidated */
+                       idx = zip->ca;
+                       end = zip->cq_ci;
+                       while (idx != end) {
+                               rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_cnt]);
+                               ++idx;
+                       }
                } else {
                        len = ntohl(cqe->byte_cnt);
                        *rss_hash = ntohl(cqe->rx_hash_res);
@@ -1238,28 +1325,22 @@ static inline uint32_t
 rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
 {
        uint32_t ol_flags = 0;
-       uint8_t l3_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L3_HDR_TYPE_MASK;
-       uint8_t l4_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L4_HDR_TYPE_MASK;
-
-       if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) ||
-           (l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6))
-               ol_flags |= TRANSPOSE(cqe->hds_ip_ext,
-                                     MLX5_CQE_L3_OK,
-                                     PKT_RX_IP_CKSUM_GOOD);
-       if ((l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP) ||
-           (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_EMP_ACK) ||
-           (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_ACK) ||
-           (l4_hdr == MLX5_CQE_L4_HDR_TYPE_UDP))
-               ol_flags |= TRANSPOSE(cqe->hds_ip_ext,
-                                     MLX5_CQE_L4_OK,
-                                     PKT_RX_L4_CKSUM_GOOD);
+       uint16_t flags = ntohs(cqe->hdr_type_etc);
+
+       ol_flags =
+               TRANSPOSE(flags,
+                         MLX5_CQE_RX_L3_HDR_VALID,
+                         PKT_RX_IP_CKSUM_GOOD) |
+               TRANSPOSE(flags,
+                         MLX5_CQE_RX_L4_HDR_VALID,
+                         PKT_RX_L4_CKSUM_GOOD);
        if ((cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
                ol_flags |=
-                       TRANSPOSE(cqe->l4_hdr_type_etc,
-                                 MLX5_CQE_RX_OUTER_IP_CSUM_OK,
+                       TRANSPOSE(flags,
+                                 MLX5_CQE_RX_L3_HDR_VALID,
                                  PKT_RX_IP_CKSUM_GOOD) |
-                       TRANSPOSE(cqe->l4_hdr_type_etc,
-                                 MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK,
+                       TRANSPOSE(flags,
+                                 MLX5_CQE_RX_L4_HDR_VALID,
                                  PKT_RX_L4_CKSUM_GOOD);
        return ol_flags;
 }
@@ -1316,10 +1397,10 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        }
                        while (pkt != seg) {
                                assert(pkt != (*rxq->elts)[idx]);
-                               seg = NEXT(pkt);
+                               rep = NEXT(pkt);
                                rte_mbuf_refcnt_set(pkt, 0);
                                __rte_mbuf_raw_free(pkt);
-                               pkt = seg;
+                               pkt = rep;
                        }
                        break;
                }
@@ -1344,10 +1425,20 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        /* Update packet information. */
                        pkt->packet_type = 0;
                        pkt->ol_flags = 0;
-                       if (rxq->rss_hash) {
+                       if (rss_hash_res && rxq->rss_hash) {
                                pkt->hash.rss = rss_hash_res;
                                pkt->ol_flags = PKT_RX_RSS_HASH;
                        }
+                       if (rxq->mark &&
+                           ((cqe->sop_drop_qpn !=
+                             htonl(MLX5_FLOW_MARK_INVALID)) ||
+                            (cqe->sop_drop_qpn !=
+                             htonl(MLX5_FLOW_MARK_DEFAULT)))) {
+                               pkt->hash.fdir.hi =
+                                       mlx5_flow_mark_get(cqe->sop_drop_qpn);
+                               pkt->ol_flags &= ~PKT_RX_RSS_HASH;
+                               pkt->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+                       }
                        if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
                            rxq->crc_present) {
                                if (rxq->csum) {
@@ -1356,7 +1447,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                        pkt->ol_flags |=
                                                rxq_cq_to_ol_flags(rxq, cqe);
                                }
-                               if (cqe->l4_hdr_type_etc &
+                               if (cqe->hdr_type_etc &
                                    MLX5_CQE_VLAN_STRIPPED) {
                                        pkt->ol_flags |= PKT_RX_VLAN_PKT |
                                                PKT_RX_VLAN_STRIPPED;
index f45e3f5..41a34d7 100644 (file)
@@ -114,7 +114,8 @@ struct rxq {
        unsigned int elts_n:4; /* Log 2 of Mbufs. */
        unsigned int port_id:8;
        unsigned int rss_hash:1; /* RSS hash result is enabled. */
-       unsigned int :9; /* Remaining bits. */
+       unsigned int mark:1; /* Marked flow available on the queue. */
+       unsigned int :8; /* Remaining bits. */
        volatile uint32_t *rq_db;
        volatile uint32_t *cq_db;
        uint16_t rq_ci;
@@ -178,8 +179,8 @@ struct hash_rxq_init {
                        uint16_t size;
                } hdr;
                struct ibv_exp_flow_spec_tcp_udp tcp_udp;
-               struct ibv_exp_flow_spec_ipv4_ext ipv4;
-               struct ibv_exp_flow_spec_ipv6_ext ipv6;
+               struct ibv_exp_flow_spec_ipv4 ipv4;
+               struct ibv_exp_flow_spec_ipv6 ipv6;
                struct ibv_exp_flow_spec_eth eth;
        } flow_spec; /* Flow specification template. */
        const struct hash_rxq_init *underlayer; /* Pointer to underlayer. */
@@ -240,13 +241,6 @@ struct hash_rxq {
                [MLX5_MAX_SPECIAL_FLOWS][MLX5_MAX_VLAN_IDS];
 };
 
-/** C extension macro for environments lacking C11 features. */
-#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L
-#define RTE_STD_C11 __extension__
-#else
-#define RTE_STD_C11
-#endif
-
 /* TX queue descriptor. */
 RTE_STD_C11
 struct txq {
@@ -255,15 +249,14 @@ struct txq {
        uint16_t elts_comp; /* Counter since last completion request. */
        uint16_t cq_ci; /* Consumer index for completion queue. */
        uint16_t wqe_ci; /* Consumer index for work queue. */
+       uint16_t wqe_pi; /* Producer index for work queue. */
        uint16_t elts_n:4; /* (*elts)[] length (in log2). */
        uint16_t cqe_n:4; /* Number of CQ elements (in log2). */
        uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */
-       uint16_t bf_buf_size:4; /* Log2 Blueflame size. */
-       uint16_t bf_offset; /* Blueflame offset. */
        uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
        uint32_t qp_num_8s; /* QP number shifted by 8. */
        volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
-       volatile struct mlx5_wqe64 (*wqes)[]; /* Work queue. */
+       volatile void *wqes; /* Work queue (use volatile to write into). */
        volatile uint32_t *qp_db; /* Work queue doorbell. */
        volatile uint32_t *cq_db; /* Completion queue doorbell. */
        volatile void *bf_reg; /* Blueflame register. */
index c6087d4..20c957e 100644 (file)
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/sockios.h>
+#include <linux/ethtool.h>
+
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
 #pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_ethdev.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 #include "mlx5_rxtx.h"
 #include "mlx5_defs.h"
 
-#include <linux/ethtool.h>
-#include <linux/sockios.h>
-
-static void
-mlx5_stats_read_hw(struct rte_eth_dev *dev,
-                struct rte_eth_stats *stats){
-    struct priv *priv = mlx5_get_priv(dev);
-    struct mlx5_stats_priv * lps = &priv->m_stats;
-    unsigned int i;
-
-    struct rte_eth_stats tmp = {0};
-    struct ethtool_stats    *et_stats   = (struct ethtool_stats    *)lps->et_stats;
-    struct ifreq ifr;
-
-    et_stats->cmd = ETHTOOL_GSTATS;
-    et_stats->n_stats = lps->n_stats;
-
-    ifr.ifr_data = (caddr_t) et_stats;
-
-    if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { 
-        WARN("unable to get statistic values for mlnx5 "); 
-    }
-
-    tmp.ibytes += et_stats->data[lps->inx_rx_vport_unicast_bytes] +
-                  et_stats->data[lps->inx_rx_vport_multicast_bytes] +
-                  et_stats->data[lps->inx_rx_vport_broadcast_bytes];
-
-    tmp.ipackets += et_stats->data[lps->inx_rx_vport_unicast_packets] +
-                et_stats->data[lps->inx_rx_vport_multicast_packets] +
-                et_stats->data[lps->inx_rx_vport_broadcast_packets];
-
-    tmp.ierrors +=     (et_stats->data[lps->inx_rx_wqe_err] +
-                    et_stats->data[lps->inx_rx_crc_errors_phy] +
-                    et_stats->data[lps->inx_rx_in_range_len_errors_phy] +
-                    et_stats->data[lps->inx_rx_symbol_err_phy]);
+struct mlx5_counter_ctrl {
+       /* Name of the counter. */
+       char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
+       /* Name of the counter on the device table. */
+       char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
+};
+
+static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
+       {
+               .dpdk_name = "rx_port_unicast_bytes",
+               .ctr_name = "rx_vport_unicast_bytes",
+       },
+       {
+               .dpdk_name = "rx_port_multicast_bytes",
+               .ctr_name = "rx_vport_multicast_bytes",
+       },
+       {
+               .dpdk_name = "rx_port_broadcast_bytes",
+               .ctr_name = "rx_vport_broadcast_bytes",
+       },
+       {
+               .dpdk_name = "rx_port_unicast_packets",
+               .ctr_name = "rx_vport_unicast_packets",
+       },
+       {
+               .dpdk_name = "rx_port_multicast_packets",
+               .ctr_name = "rx_vport_multicast_packets",
+       },
+       {
+               .dpdk_name = "rx_port_broadcast_packets",
+               .ctr_name = "rx_vport_broadcast_packets",
+       },
+       {
+               .dpdk_name = "tx_port_unicast_bytes",
+               .ctr_name = "tx_vport_unicast_bytes",
+       },
+       {
+               .dpdk_name = "tx_port_multicast_bytes",
+               .ctr_name = "tx_vport_multicast_bytes",
+       },
+       {
+               .dpdk_name = "tx_port_broadcast_bytes",
+               .ctr_name = "tx_vport_broadcast_bytes",
+       },
+       {
+               .dpdk_name = "tx_port_unicast_packets",
+               .ctr_name = "tx_vport_unicast_packets",
+       },
+       {
+               .dpdk_name = "tx_port_multicast_packets",
+               .ctr_name = "tx_vport_multicast_packets",
+       },
+       {
+               .dpdk_name = "tx_port_broadcast_packets",
+               .ctr_name = "tx_vport_broadcast_packets",
+       },
+       {
+               .dpdk_name = "rx_wqe_err",
+               .ctr_name = "rx_wqe_err",
+       },
+       {
+               .dpdk_name = "rx_crc_errors_phy",
+               .ctr_name = "rx_crc_errors_phy",
+       },
+       {
+               .dpdk_name = "rx_in_range_len_errors_phy",
+               .ctr_name = "rx_in_range_len_errors_phy",
+       },
+       {
+               .dpdk_name = "rx_symbol_err_phy",
+               .ctr_name = "rx_symbol_err_phy",
+       },
+       {
+               .dpdk_name = "tx_errors_phy",
+               .ctr_name = "tx_errors_phy",
+       },
+};
+
+static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);
 
-    tmp.obytes += et_stats->data[lps->inx_tx_vport_unicast_bytes] +
-                  et_stats->data[lps->inx_tx_vport_multicast_bytes] +
-                  et_stats->data[lps->inx_tx_vport_broadcast_bytes];
-
-    tmp.opackets += (et_stats->data[lps->inx_tx_vport_unicast_packets] +
-                     et_stats->data[lps->inx_tx_vport_multicast_packets] +
-                     et_stats->data[lps->inx_tx_vport_broadcast_packets]);
-
-    tmp.oerrors += et_stats->data[lps->inx_tx_errors_phy];
-
-    /* SW Rx */
-    for (i = 0; (i != priv->rxqs_n); ++i) {
-        struct rxq *rxq = (*priv->rxqs)[i];
-        if (rxq) {
-            tmp.imissed += rxq->stats.idropped;
-            tmp.rx_nombuf += rxq->stats.rx_nombuf;
-        }
-    }
-
-    /*SW Tx */
-    for (i = 0; (i != priv->txqs_n); ++i) {
-        struct txq *txq = (*priv->txqs)[i];
-        if (txq) {
-            tmp.oerrors += txq->stats.odropped;
-        }
-    }
-
-    *stats =tmp;
+/**
+ * Read device counters table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[out] stats
+ *   Counters table output buffer.
+ *
+ * @return
+ *   0 on success and stats is filled, negative on error.
+ */
+static int
+priv_read_dev_counters(struct priv *priv, uint64_t *stats)
+{
+       struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+       unsigned int i;
+       struct ifreq ifr;
+       unsigned int stats_sz = (xstats_ctrl->stats_n * sizeof(uint64_t)) +
+                                sizeof(struct ethtool_stats);
+       struct ethtool_stats et_stats[(stats_sz + (
+                                     sizeof(struct ethtool_stats) - 1)) /
+                                     sizeof(struct ethtool_stats)];
+
+       et_stats->cmd = ETHTOOL_GSTATS;
+       et_stats->n_stats = xstats_ctrl->stats_n;
+       ifr.ifr_data = (caddr_t)et_stats;
+       if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
+               WARN("unable to read statistic values from device");
+               return -1;
+       }
+       for (i = 0; i != xstats_n; ++i)
+               stats[i] = (uint64_t)
+                          et_stats->data[xstats_ctrl->dev_table_idx[i]];
+       return 0;
 }
 
+/**
+ * Init the structures to read device counters.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
 void
-mlx5_stats_free(struct rte_eth_dev *dev)
+priv_xstats_init(struct priv *priv)
 {
-    struct priv *priv = mlx5_get_priv(dev);
-    struct mlx5_stats_priv * lps = &priv->m_stats;
-
-    if ( lps->et_stats ){
-        free(lps->et_stats);
-        lps->et_stats=0;
-    }
+       struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+       unsigned int i;
+       unsigned int j;
+       char ifname[IF_NAMESIZE];
+       struct ifreq ifr;
+       struct ethtool_drvinfo drvinfo;
+       struct ethtool_gstrings *strings = NULL;
+       unsigned int dev_stats_n;
+       unsigned int str_sz;
+
+       if (priv_get_ifname(priv, &ifname)) {
+               WARN("unable to get interface name");
+               return;
+       }
+       /* How many statistics are available. */
+       drvinfo.cmd = ETHTOOL_GDRVINFO;
+       ifr.ifr_data = (caddr_t)&drvinfo;
+       if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
+               WARN("unable to get driver info");
+               return;
+       }
+       dev_stats_n = drvinfo.n_stats;
+       if (dev_stats_n < 1) {
+               WARN("no extended statistics available");
+               return;
+       }
+       xstats_ctrl->stats_n = dev_stats_n;
+       /* Allocate memory to grab stat names and values. */
+       str_sz = dev_stats_n * ETH_GSTRING_LEN;
+       strings = (struct ethtool_gstrings *)
+                 rte_malloc("xstats_strings",
+                            str_sz + sizeof(struct ethtool_gstrings), 0);
+       if (!strings) {
+               WARN("unable to allocate memory for xstats");
+               return;
+       }
+       strings->cmd = ETHTOOL_GSTRINGS;
+       strings->string_set = ETH_SS_STATS;
+       strings->len = dev_stats_n;
+       ifr.ifr_data = (caddr_t)strings;
+       if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
+               WARN("unable to get statistic names");
+               goto free;
+       }
+       for (j = 0; j != xstats_n; ++j)
+               xstats_ctrl->dev_table_idx[j] = dev_stats_n;
+       for (i = 0; i != dev_stats_n; ++i) {
+               const char *curr_string = (const char *)
+                       &strings->data[i * ETH_GSTRING_LEN];
+
+               for (j = 0; j != xstats_n; ++j) {
+                       if (!strcmp(mlx5_counters_init[j].ctr_name,
+                                   curr_string)) {
+                               xstats_ctrl->dev_table_idx[j] = i;
+                               break;
+                       }
+               }
+       }
+       for (j = 0; j != xstats_n; ++j) {
+               if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
+                       WARN("counter \"%s\" is not recognized",
+                            mlx5_counters_init[j].dpdk_name);
+                       goto free;
+               }
+       }
+       /* Copy to base at first time. */
+       assert(xstats_n <= MLX5_MAX_XSTATS);
+       priv_read_dev_counters(priv, xstats_ctrl->base);
+free:
+       rte_free(strings);
 }
 
-
-static void
-mlx5_stats_init(struct rte_eth_dev *dev)
+/**
+ * Get device extended statistics.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[out] stats
+ *   Pointer to rte extended stats table.
+ *
+ * @return
+ *   Number of extended stats on success and stats is filled,
+ *   negative on error.
+ */
+static int
+priv_xstats_get(struct priv *priv, struct rte_eth_xstat *stats)
 {
-    struct priv *priv = mlx5_get_priv(dev);
-    struct mlx5_stats_priv * lps = &priv->m_stats;
-    struct rte_eth_stats tmp = {0};
-
-    unsigned int i;
-    unsigned int idx;
-    char ifname[IF_NAMESIZE];
-    struct ifreq ifr;
-
-    struct ethtool_stats    *et_stats   = NULL;
-    struct ethtool_drvinfo drvinfo;
-    struct ethtool_gstrings *strings = NULL;
-    unsigned int n_stats, sz_str, sz_stats;
-
-    if (priv_get_ifname(priv, &ifname)) {
-            WARN("unable to get interface name");
-            return;
-    }
-    /* How many statistics are available ? */
-    drvinfo.cmd = ETHTOOL_GDRVINFO;
-    ifr.ifr_data = (caddr_t) &drvinfo;
-    if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
-            WARN("unable to get driver info for %s", ifname);
-            return;
-    }
-
-    n_stats = drvinfo.n_stats;
-    if (n_stats < 1) {
-            WARN("no statistics available for %s", ifname);
-            return;
-    }
-    lps->n_stats = n_stats;
-
-    /* Allocate memory to grab stat names and values */ 
-    sz_str = n_stats * ETH_GSTRING_LEN; 
-    sz_stats = n_stats * sizeof(uint64_t); 
-    strings = calloc(1, sz_str + sizeof(struct ethtool_gstrings)); 
-    if (!strings) { 
-        WARN("unable to allocate memory for strings"); 
-        return;
-    } 
-
-    et_stats = calloc(1, sz_stats + sizeof(struct ethtool_stats)); 
-    if (!et_stats) { 
-        free(strings);
-        WARN("unable to allocate memory for stats"); 
-    } 
-
-    strings->cmd = ETHTOOL_GSTRINGS; 
-    strings->string_set = ETH_SS_STATS; 
-    strings->len = n_stats; 
-    ifr.ifr_data = (caddr_t) strings; 
-    if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { 
-        WARN("unable to get statistic names for %s", ifname); 
-        free(strings);
-        free(et_stats);
-        return;
-    } 
-
-    for (i = 0; (i != n_stats); ++i) {
-
-            const char * curr_string = (const char*) &(strings->data[i * ETH_GSTRING_LEN]);
-
-            if (!strcmp("rx_vport_unicast_bytes", curr_string)) lps->inx_rx_vport_unicast_bytes = i;
-            if (!strcmp("rx_vport_multicast_bytes", curr_string)) lps->inx_rx_vport_multicast_bytes = i;
-            if (!strcmp("rx_vport_broadcast_bytes", curr_string)) lps->inx_rx_vport_broadcast_bytes = i;
-
-            if (!strcmp("rx_vport_unicast_packets", curr_string)) lps->inx_rx_vport_unicast_packets = i;
-            if (!strcmp("rx_vport_multicast_packets", curr_string)) lps->inx_rx_vport_multicast_packets = i;
-            if (!strcmp("rx_vport_broadcast_packets", curr_string)) lps->inx_rx_vport_broadcast_packets = i;
-
-            if (!strcmp("tx_vport_unicast_bytes", curr_string)) lps->inx_tx_vport_unicast_bytes = i;
-            if (!strcmp("tx_vport_multicast_bytes", curr_string)) lps->inx_tx_vport_multicast_bytes = i;
-            if (!strcmp("tx_vport_broadcast_bytes", curr_string)) lps->inx_tx_vport_broadcast_bytes = i;
-
-            if (!strcmp("tx_vport_unicast_packets", curr_string)) lps->inx_tx_vport_unicast_packets = i;
-            if (!strcmp("tx_vport_multicast_packets", curr_string)) lps->inx_tx_vport_multicast_packets = i;
-            if (!strcmp("tx_vport_broadcast_packets", curr_string)) lps->inx_tx_vport_broadcast_packets = i;
-
-            if (!strcmp("rx_wqe_err", curr_string)) lps->inx_rx_wqe_err = i;
-            if (!strcmp("rx_crc_errors_phy", curr_string)) lps->inx_rx_crc_errors_phy = i;
-            if (!strcmp("rx_in_range_len_errors_phy", curr_string)) lps->inx_rx_in_range_len_errors_phy = i;
-            if (!strcmp("rx_symbol_err_phy", curr_string)) lps->inx_rx_symbol_err_phy = i;
-
-            if (!strcmp("tx_errors_phy", curr_string)) lps->inx_tx_errors_phy = i;
-    }
-
-    lps->et_stats =(void *)et_stats;
-
-    if (!lps->inx_rx_vport_unicast_bytes ||
-    !lps->inx_rx_vport_multicast_bytes ||
-    !lps->inx_rx_vport_broadcast_bytes || 
-    !lps->inx_rx_vport_unicast_packets ||
-    !lps->inx_rx_vport_multicast_packets ||
-    !lps->inx_rx_vport_broadcast_packets ||
-    !lps->inx_tx_vport_unicast_bytes || 
-    !lps->inx_tx_vport_multicast_bytes ||
-    !lps->inx_tx_vport_broadcast_bytes ||
-    !lps->inx_tx_vport_unicast_packets ||
-    !lps->inx_tx_vport_multicast_packets ||
-    !lps->inx_tx_vport_broadcast_packets ||
-    !lps->inx_rx_wqe_err ||
-    !lps->inx_rx_crc_errors_phy ||
-    !lps->inx_rx_in_range_len_errors_phy) {
-        WARN("Counters are not recognized %s", ifname);
-        return;
-    }
-
-    mlx5_stats_read_hw(dev,&tmp);
-
-    /* copy yo shadow at first time */
-    lps->m_shadow = tmp;
-
-    free(strings);
+       struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+       unsigned int i;
+       unsigned int n = xstats_n;
+       uint64_t counters[n];
+
+       if (priv_read_dev_counters(priv, counters) < 0)
+               return -1;
+       for (i = 0; i != xstats_n; ++i) {
+               stats[i].id = i;
+               stats[i].value = (counters[i] - xstats_ctrl->base[i]);
+       }
+       return n;
 }
 
-
+/**
+ * Reset device extended statistics.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
 static void
-mlx5_stats_diff(struct rte_eth_stats *a,
-                struct rte_eth_stats *b,
-                struct rte_eth_stats *c){
-    #define MLX5_DIFF(cnt) { a->cnt = (b->cnt - c->cnt);  }
-
-    MLX5_DIFF(ipackets);
-    MLX5_DIFF(opackets); 
-    MLX5_DIFF(ibytes); 
-    MLX5_DIFF(obytes);
-    MLX5_DIFF(imissed);
-
-    MLX5_DIFF(ierrors); 
-    MLX5_DIFF(oerrors); 
-    MLX5_DIFF(rx_nombuf);
+priv_xstats_reset(struct priv *priv)
+{
+       struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+       unsigned int i;
+       unsigned int n = xstats_n;
+       uint64_t counters[n];
+
+       if (priv_read_dev_counters(priv, counters) < 0)
+               return;
+       for (i = 0; i != n; ++i)
+               xstats_ctrl->base[i] = counters[i];
 }
 
-
-
+/**
+ * DPDK callback to get device statistics.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] stats
+ *   Stats structure output buffer.
+ */
 void
 mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
        struct priv *priv = mlx5_get_priv(dev);
-
-    struct mlx5_stats_priv * lps = &priv->m_stats;
-    priv_lock(priv);
-
-    if (lps->et_stats == NULL) {
-        mlx5_stats_init(dev);
-    }
-    struct rte_eth_stats tmp = {0};
-
-    mlx5_stats_read_hw(dev,&tmp);
-
-    mlx5_stats_diff(stats,
-                    &tmp,
-                    &lps->m_shadow);
-
+       struct rte_eth_stats tmp = {0};
+       unsigned int i;
+       unsigned int idx;
+
+       priv_lock(priv);
+       /* Add software counters. */
+       for (i = 0; (i != priv->rxqs_n); ++i) {
+               struct rxq *rxq = (*priv->rxqs)[i];
+
+               if (rxq == NULL)
+                       continue;
+               idx = rxq->stats.idx;
+               if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+#ifdef MLX5_PMD_SOFT_COUNTERS
+                       tmp.q_ipackets[idx] += rxq->stats.ipackets;
+                       tmp.q_ibytes[idx] += rxq->stats.ibytes;
+#endif
+                       tmp.q_errors[idx] += (rxq->stats.idropped +
+                                             rxq->stats.rx_nombuf);
+               }
+#ifdef MLX5_PMD_SOFT_COUNTERS
+               tmp.ipackets += rxq->stats.ipackets;
+               tmp.ibytes += rxq->stats.ibytes;
+#endif
+               tmp.ierrors += rxq->stats.idropped;
+               tmp.rx_nombuf += rxq->stats.rx_nombuf;
+       }
+       for (i = 0; (i != priv->txqs_n); ++i) {
+               struct txq *txq = (*priv->txqs)[i];
+
+               if (txq == NULL)
+                       continue;
+               idx = txq->stats.idx;
+               if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+#ifdef MLX5_PMD_SOFT_COUNTERS
+                       tmp.q_opackets[idx] += txq->stats.opackets;
+                       tmp.q_obytes[idx] += txq->stats.obytes;
+#endif
+                       tmp.q_errors[idx] += txq->stats.odropped;
+               }
+#ifdef MLX5_PMD_SOFT_COUNTERS
+               tmp.opackets += txq->stats.opackets;
+               tmp.obytes += txq->stats.obytes;
+#endif
+               tmp.oerrors += txq->stats.odropped;
+       }
+#ifndef MLX5_PMD_SOFT_COUNTERS
+       /* FIXME: retrieve and add hardware counters. */
+#endif
+       *stats = tmp;
        priv_unlock(priv);
 }
 
@@ -294,20 +370,103 @@ void
 mlx5_stats_reset(struct rte_eth_dev *dev)
 {
        struct priv *priv = dev->data->dev_private;
-    struct mlx5_stats_priv * lps = &priv->m_stats;
-
-    priv_lock(priv);
-
-    if (lps->et_stats == NULL) {
-        mlx5_stats_init(dev);
-    }
-    struct rte_eth_stats tmp = {0};
-
+       unsigned int i;
+       unsigned int idx;
+
+       priv_lock(priv);
+       for (i = 0; (i != priv->rxqs_n); ++i) {
+               if ((*priv->rxqs)[i] == NULL)
+                       continue;
+               idx = (*priv->rxqs)[i]->stats.idx;
+               (*priv->rxqs)[i]->stats =
+                       (struct mlx5_rxq_stats){ .idx = idx };
+       }
+       for (i = 0; (i != priv->txqs_n); ++i) {
+               if ((*priv->txqs)[i] == NULL)
+                       continue;
+               idx = (*priv->txqs)[i]->stats.idx;
+               (*priv->txqs)[i]->stats =
+                       (struct mlx5_txq_stats){ .idx = idx };
+       }
+#ifndef MLX5_PMD_SOFT_COUNTERS
+       /* FIXME: reset hardware counters. */
+#endif
+       priv_unlock(priv);
+}
 
-    mlx5_stats_read_hw(dev,&tmp);
+/**
+ * DPDK callback to get extended device statistics.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] stats
+ *   Stats table output buffer.
+ * @param n
+ *   The size of the stats table.
+ *
+ * @return
+ *   Number of xstats on success, negative on failure.
+ */
+int
+mlx5_xstats_get(struct rte_eth_dev *dev,
+               struct rte_eth_xstat *stats, unsigned int n)
+{
+       struct priv *priv = mlx5_get_priv(dev);
+       int ret = xstats_n;
+
+       if (n >= xstats_n && stats) {
+               priv_lock(priv);
+               ret = priv_xstats_get(priv, stats);
+               priv_unlock(priv);
+       }
+       return ret;
+}
 
-    /* copy to shadow */
-    lps->m_shadow = tmp;
+/**
+ * DPDK callback to clear device extended statistics.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+void
+mlx5_xstats_reset(struct rte_eth_dev *dev)
+{
+       struct priv *priv = mlx5_get_priv(dev);
 
+       priv_lock(priv);
+       priv_xstats_reset(priv);
        priv_unlock(priv);
 }
+
+/**
+ * DPDK callback to retrieve names of extended device statistics
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] xstats_names
+ *   Buffer to insert names into.
+ * @param n
+ *   Number of names.
+ *
+ * @return
+ *   Number of xstats names.
+ */
+int
+mlx5_xstats_get_names(struct rte_eth_dev *dev,
+               struct rte_eth_xstat_name *xstats_names, unsigned int n)
+{
+       struct priv *priv = mlx5_get_priv(dev);
+       unsigned int i;
+
+       if (n >= xstats_n && xstats_names) {
+               priv_lock(priv);
+               for (i = 0; i != xstats_n; ++i) {
+                       strncpy(xstats_names[i].name,
+                               mlx5_counters_init[i].dpdk_name,
+                               RTE_ETH_XSTATS_NAME_SIZE);
+                       xstats_names[i].name[RTE_ETH_XSTATS_NAME_SIZE - 1] = 0;
+               }
+               priv_unlock(priv);
+       }
+       return xstats_n;
+}
index d4dccd8..30addd2 100644 (file)
@@ -90,6 +90,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
        if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE)
                priv_fdir_enable(priv);
        priv_dev_interrupt_handler_install(priv, dev);
+       err = priv_flow_start(priv);
+       priv_xstats_init(priv);
        priv_unlock(priv);
        return -err;
 }
@@ -120,6 +122,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
        priv_mac_addrs_disable(priv);
        priv_destroy_hash_rxqs(priv);
        priv_fdir_disable(priv);
+       priv_flow_stop(priv);
        priv_dev_interrupt_handler_uninstall(priv, dev);
        priv->started = 0;
        priv_unlock(priv);
index 053665d..949035b 100644 (file)
@@ -82,7 +82,9 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
        for (i = 0; (i != elts_n); ++i)
                (*txq_ctrl->txq.elts)[i] = NULL;
        for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) {
-               volatile struct mlx5_wqe64 *wqe = &(*txq_ctrl->txq.wqes)[i];
+               volatile struct mlx5_wqe64 *wqe =
+                       (volatile struct mlx5_wqe64 *)
+                       txq_ctrl->txq.wqes + i;
 
                memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
        }
@@ -214,14 +216,10 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
        }
        tmpl->txq.cqe_n = log2above(ibcq->cqe);
        tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8;
-       tmpl->txq.wqes =
-               (volatile struct mlx5_wqe64 (*)[])
-               (uintptr_t)qp->gen_data.sqstart;
+       tmpl->txq.wqes = qp->gen_data.sqstart;
        tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt);
        tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR];
        tmpl->txq.bf_reg = qp->gen_data.bf->reg;
-       tmpl->txq.bf_offset = qp->gen_data.bf->offset;
-       tmpl->txq.bf_buf_size = log2above(qp->gen_data.bf->buf_size);
        tmpl->txq.cq_db = cq->dbrec;
        tmpl->txq.cqes =
                (volatile struct mlx5_cqe (*)[])
@@ -412,7 +410,7 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
                .obj = tmpl.qp,
                /* Enable multi-packet send if supported. */
                .family_flags =
-                       ((priv->mps && !priv->sriov) ?
+                       (priv->mps ?
                         IBV_EXP_QP_BURST_CREATE_ENABLE_MULTI_PACKET_SEND_WR :
                         0),
        };
index 7a24884..57203e2 100644 (file)
@@ -35,7 +35,7 @@
 #include <rte_ethdev.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
-#include <rte_dev.h>
+#include <rte_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_spinlock.h>
 
@@ -88,7 +88,6 @@ struct pmd_internals {
 
 
 static struct ether_addr eth_addr = { .addr_bytes = {0} };
-static const char *drivername = "Null PMD";
 static struct rte_eth_link pmd_link = {
        .link_speed = ETH_SPEED_NUM_10G,
        .link_duplex = ETH_LINK_FULL_DUPLEX,
@@ -295,13 +294,11 @@ eth_dev_info(struct rte_eth_dev *dev,
                return;
 
        internals = dev->data->dev_private;
-       dev_info->driver_name = drivername;
        dev_info->max_mac_addrs = 1;
        dev_info->max_rx_pktlen = (uint32_t)-1;
        dev_info->max_rx_queues = RTE_DIM(internals->rx_null_queues);
        dev_info->max_tx_queues = RTE_DIM(internals->tx_null_queues);
        dev_info->min_rx_bufsize = 0;
-       dev_info->pci_dev = NULL;
        dev_info->reta_size = internals->reta_size;
        dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
 }
@@ -480,6 +477,8 @@ static const struct eth_dev_ops ops = {
        .rss_hash_conf_get = eth_rss_hash_conf_get
 };
 
+static struct rte_vdev_driver pmd_null_drv;
+
 int
 eth_dev_null_create(const char *name,
                const unsigned numa_node,
@@ -517,7 +516,7 @@ eth_dev_null_create(const char *name,
                goto error;
 
        /* reserve an ethdev entry */
-       eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+       eth_dev = rte_eth_dev_allocate(name);
        if (eth_dev == NULL)
                goto error;
 
@@ -550,12 +549,10 @@ eth_dev_null_create(const char *name,
        eth_dev->data = data;
        eth_dev->dev_ops = &ops;
 
-       TAILQ_INIT(&eth_dev->link_intr_cbs);
-
        eth_dev->driver = NULL;
        data->dev_flags = RTE_ETH_DEV_DETACHABLE;
        data->kdrv = RTE_KDRV_NONE;
-       data->drv_name = drivername;
+       data->drv_name = pmd_null_drv.driver.name;
        data->numa_node = numa_node;
 
        /* finally assign rx and tx ops */
@@ -611,7 +608,7 @@ get_packet_copy_arg(const char *key __rte_unused,
 }
 
 static int
-rte_pmd_null_devinit(const char *name, const char *params)
+rte_pmd_null_probe(const char *name, const char *params)
 {
        unsigned numa_node;
        unsigned packet_size = default_packet_size;
@@ -663,7 +660,7 @@ free_kvlist:
 }
 
 static int
-rte_pmd_null_devuninit(const char *name)
+rte_pmd_null_remove(const char *name)
 {
        struct rte_eth_dev *eth_dev = NULL;
 
@@ -686,13 +683,13 @@ rte_pmd_null_devuninit(const char *name)
        return 0;
 }
 
-static struct rte_driver pmd_null_drv = {
-       .type = PMD_VDEV,
-       .init = rte_pmd_null_devinit,
-       .uninit = rte_pmd_null_devuninit,
+static struct rte_vdev_driver pmd_null_drv = {
+       .probe = rte_pmd_null_probe,
+       .remove = rte_pmd_null_remove,
 };
 
-PMD_REGISTER_DRIVER(pmd_null_drv, eth_null);
-DRIVER_REGISTER_PARAM_STRING(eth_null,
+RTE_PMD_REGISTER_VDEV(net_null, pmd_null_drv);
+RTE_PMD_REGISTER_ALIAS(net_null, eth_null);
+RTE_PMD_REGISTER_PARAM_STRING(net_null,
        "size=<int> "
        "copy=<int>");
index a7048c7..6f9cc1a 100644 (file)
@@ -38,7 +38,7 @@
 #include <rte_memcpy.h>
 #include <rte_memzone.h>
 #include <rte_string_fns.h>
-#include <rte_dev.h>
+#include <rte_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_errno.h>
 
@@ -75,7 +75,6 @@ struct pmd_internals {
 };
 
 
-static const char *drivername = "Rings PMD";
 static struct rte_eth_link pmd_link = {
                .link_speed = ETH_SPEED_NUM_10G,
                .link_duplex = ETH_LINK_FULL_DUPLEX,
@@ -173,13 +172,11 @@ eth_dev_info(struct rte_eth_dev *dev,
                struct rte_eth_dev_info *dev_info)
 {
        struct pmd_internals *internals = dev->data->dev_private;
-       dev_info->driver_name = drivername;
        dev_info->max_mac_addrs = 1;
        dev_info->max_rx_pktlen = (uint32_t)-1;
        dev_info->max_rx_queues = (uint16_t)internals->max_rx_queues;
        dev_info->max_tx_queues = (uint16_t)internals->max_tx_queues;
        dev_info->min_rx_bufsize = 0;
-       dev_info->pci_dev = NULL;
 }
 
 static void
@@ -259,6 +256,8 @@ static const struct eth_dev_ops ops = {
        .mac_addr_add = eth_mac_addr_add,
 };
 
+static struct rte_vdev_driver pmd_ring_drv;
+
 static int
 do_eth_dev_ring_create(const char *name,
                struct rte_ring * const rx_queues[], const unsigned nb_rx_queues,
@@ -303,7 +302,7 @@ do_eth_dev_ring_create(const char *name,
        }
 
        /* reserve an ethdev entry */
-       eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+       eth_dev = rte_eth_dev_allocate(name);
        if (eth_dev == NULL) {
                rte_errno = ENOSPC;
                goto error;
@@ -343,11 +342,9 @@ do_eth_dev_ring_create(const char *name,
        eth_dev->dev_ops = &ops;
        data->dev_flags = RTE_ETH_DEV_DETACHABLE;
        data->kdrv = RTE_KDRV_NONE;
-       data->drv_name = drivername;
+       data->drv_name = pmd_ring_drv.driver.name;
        data->numa_node = numa_node;
 
-       TAILQ_INIT(&(eth_dev->link_intr_cbs));
-
        /* finally assign rx and tx ops */
        eth_dev->rx_pkt_burst = eth_ring_rx;
        eth_dev->tx_pkt_burst = eth_ring_tx;
@@ -505,7 +502,7 @@ out:
 }
 
 static int
-rte_pmd_ring_devinit(const char *name, const char *params)
+rte_pmd_ring_probe(const char *name, const char *params)
 {
        struct rte_kvargs *kvlist = NULL;
        int ret = 0;
@@ -557,7 +554,7 @@ rte_pmd_ring_devinit(const char *name, const char *params)
                                goto out_free;
 
                        for (info->count = 0; info->count < info->total; info->count++) {
-                               ret = eth_dev_ring_create(name,
+                               ret = eth_dev_ring_create(info->list[info->count].name,
                                                          info->list[info->count].node,
                                                          info->list[info->count].action);
                                if ((ret == -1) &&
@@ -580,7 +577,7 @@ out_free:
 }
 
 static int
-rte_pmd_ring_devuninit(const char *name)
+rte_pmd_ring_remove(const char *name)
 {
        struct rte_eth_dev *eth_dev = NULL;
        struct pmd_internals *internals = NULL;
@@ -599,36 +596,34 @@ rte_pmd_ring_devuninit(const char *name)
 
        eth_dev_stop(eth_dev);
 
-       if (eth_dev->data) {
-               internals = eth_dev->data->dev_private;
-               if (internals->action == DEV_CREATE) {
-                       /*
-                        * it is only necessary to delete the rings in rx_queues because
-                        * they are the same used in tx_queues
-                        */
-                       for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
-                               r = eth_dev->data->rx_queues[i];
-                               rte_ring_free(r->rng);
-                       }
+       internals = eth_dev->data->dev_private;
+       if (internals->action == DEV_CREATE) {
+               /*
+                * it is only necessary to delete the rings in rx_queues because
+                * they are the same used in tx_queues
+                */
+               for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+                       r = eth_dev->data->rx_queues[i];
+                       rte_ring_free(r->rng);
                }
-
-               rte_free(eth_dev->data->rx_queues);
-               rte_free(eth_dev->data->tx_queues);
-               rte_free(eth_dev->data->dev_private);
        }
 
+       rte_free(eth_dev->data->rx_queues);
+       rte_free(eth_dev->data->tx_queues);
+       rte_free(eth_dev->data->dev_private);
+
        rte_free(eth_dev->data);
 
        rte_eth_dev_release_port(eth_dev);
        return 0;
 }
 
-static struct rte_driver pmd_ring_drv = {
-       .type = PMD_VDEV,
-       .init = rte_pmd_ring_devinit,
-       .uninit = rte_pmd_ring_devuninit,
+static struct rte_vdev_driver pmd_ring_drv = {
+       .probe = rte_pmd_ring_probe,
+       .remove = rte_pmd_ring_remove,
 };
 
-PMD_REGISTER_DRIVER(pmd_ring_drv, eth_ring);
-DRIVER_REGISTER_PARAM_STRING(eth_ring,
-       "nodeaction=[attach|detach]");
+RTE_PMD_REGISTER_VDEV(net_ring, pmd_ring_drv);
+RTE_PMD_REGISTER_ALIAS(net_ring, eth_ring);
+RTE_PMD_REGISTER_PARAM_STRING(net_ring,
+       ETH_RING_NUMA_NODE_ACTION_ARG "=name:node:action(ATTACH|CREATE)");
index 483d789..fe7a6b3 100644 (file)
@@ -62,7 +62,7 @@
  */
 #define RTE_SZE2_PACKET_HEADER_SIZE_ALIGNED 8
 
-#define RTE_SZEDATA2_DRIVER_NAME rte_szedata2_pmd
+#define RTE_SZEDATA2_DRIVER_NAME net_szedata2
 #define RTE_SZEDATA2_PCI_DRIVER_NAME "rte_szedata2_pmd"
 
 #define SZEDATA2_DEV_PATH_FMT "/dev/szedataII%u"
@@ -91,6 +91,7 @@ struct pmd_internals {
        uint16_t max_rx_queues;
        uint16_t max_tx_queues;
        char sze_dev[PATH_MAX];
+       struct rte_mem_resource *pci_rsc;
 };
 
 static struct ether_addr eth_addr = {
@@ -1030,6 +1031,7 @@ eth_dev_info(struct rte_eth_dev *dev,
                struct rte_eth_dev_info *dev_info)
 {
        struct pmd_internals *internals = dev->data->dev_private;
+       dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device);
        dev_info->if_index = 0;
        dev_info->max_mac_addrs = 1;
        dev_info->max_rx_pktlen = (uint32_t)-1;
@@ -1144,8 +1146,10 @@ eth_link_update(struct rte_eth_dev *dev,
        struct rte_eth_link link;
        struct rte_eth_link *link_ptr = &link;
        struct rte_eth_link *dev_link = &dev->data->dev_link;
+       struct pmd_internals *internals = (struct pmd_internals *)
+               dev->data->dev_private;
        volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR(
-                       dev, SZEDATA2_CGMII_IBUF_BASE_OFF,
+                       internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF,
                        volatile struct szedata2_cgmii_ibuf *);
 
        switch (cgmii_link_speed(ibuf)) {
@@ -1180,11 +1184,13 @@ eth_link_update(struct rte_eth_dev *dev,
 static int
 eth_dev_set_link_up(struct rte_eth_dev *dev)
 {
+       struct pmd_internals *internals = (struct pmd_internals *)
+               dev->data->dev_private;
        volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR(
-                       dev, SZEDATA2_CGMII_IBUF_BASE_OFF,
+                       internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF,
                        volatile struct szedata2_cgmii_ibuf *);
        volatile struct szedata2_cgmii_obuf *obuf = SZEDATA2_PCI_RESOURCE_PTR(
-                       dev, SZEDATA2_CGMII_OBUF_BASE_OFF,
+                       internals->pci_rsc, SZEDATA2_CGMII_OBUF_BASE_OFF,
                        volatile struct szedata2_cgmii_obuf *);
 
        cgmii_ibuf_enable(ibuf);
@@ -1195,11 +1201,13 @@ eth_dev_set_link_up(struct rte_eth_dev *dev)
 static int
 eth_dev_set_link_down(struct rte_eth_dev *dev)
 {
+       struct pmd_internals *internals = (struct pmd_internals *)
+               dev->data->dev_private;
        volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR(
-                       dev, SZEDATA2_CGMII_IBUF_BASE_OFF,
+                       internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF,
                        volatile struct szedata2_cgmii_ibuf *);
        volatile struct szedata2_cgmii_obuf *obuf = SZEDATA2_PCI_RESOURCE_PTR(
-                       dev, SZEDATA2_CGMII_OBUF_BASE_OFF,
+                       internals->pci_rsc, SZEDATA2_CGMII_OBUF_BASE_OFF,
                        volatile struct szedata2_cgmii_obuf *);
 
        cgmii_ibuf_disable(ibuf);
@@ -1281,8 +1289,10 @@ eth_mac_addr_set(struct rte_eth_dev *dev __rte_unused,
 static void
 eth_promiscuous_enable(struct rte_eth_dev *dev)
 {
+       struct pmd_internals *internals = (struct pmd_internals *)
+               dev->data->dev_private;
        volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR(
-                       dev, SZEDATA2_CGMII_IBUF_BASE_OFF,
+                       internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF,
                        volatile struct szedata2_cgmii_ibuf *);
        cgmii_ibuf_mac_mode_write(ibuf, SZEDATA2_MAC_CHMODE_PROMISC);
 }
@@ -1290,8 +1300,10 @@ eth_promiscuous_enable(struct rte_eth_dev *dev)
 static void
 eth_promiscuous_disable(struct rte_eth_dev *dev)
 {
+       struct pmd_internals *internals = (struct pmd_internals *)
+               dev->data->dev_private;
        volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR(
-                       dev, SZEDATA2_CGMII_IBUF_BASE_OFF,
+                       internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF,
                        volatile struct szedata2_cgmii_ibuf *);
        cgmii_ibuf_mac_mode_write(ibuf, SZEDATA2_MAC_CHMODE_ONLY_VALID);
 }
@@ -1299,8 +1311,10 @@ eth_promiscuous_disable(struct rte_eth_dev *dev)
 static void
 eth_allmulticast_enable(struct rte_eth_dev *dev)
 {
+       struct pmd_internals *internals = (struct pmd_internals *)
+               dev->data->dev_private;
        volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR(
-                       dev, SZEDATA2_CGMII_IBUF_BASE_OFF,
+                       internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF,
                        volatile struct szedata2_cgmii_ibuf *);
        cgmii_ibuf_mac_mode_write(ibuf, SZEDATA2_MAC_CHMODE_ALL_MULTICAST);
 }
@@ -1308,8 +1322,10 @@ eth_allmulticast_enable(struct rte_eth_dev *dev)
 static void
 eth_allmulticast_disable(struct rte_eth_dev *dev)
 {
+       struct pmd_internals *internals = (struct pmd_internals *)
+               dev->data->dev_private;
        volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR(
-                       dev, SZEDATA2_CGMII_IBUF_BASE_OFF,
+                       internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF,
                        volatile struct szedata2_cgmii_ibuf *);
        cgmii_ibuf_mac_mode_write(ibuf, SZEDATA2_MAC_CHMODE_ONLY_VALID);
 }
@@ -1349,7 +1365,7 @@ static const struct eth_dev_ops ops = {
  *          -1 on error
  */
 static int
-get_szedata2_index(struct rte_eth_dev *dev, uint32_t *index)
+get_szedata2_index(const struct rte_pci_addr *pcislot_addr, uint32_t *index)
 {
        DIR *dir;
        struct dirent *entry;
@@ -1357,7 +1373,6 @@ get_szedata2_index(struct rte_eth_dev *dev, uint32_t *index)
        uint32_t tmp_index;
        FILE *fd;
        char pcislot_path[PATH_MAX];
-       struct rte_pci_addr pcislot_addr = dev->pci_dev->addr;
        uint32_t domain;
        uint32_t bus;
        uint32_t devid;
@@ -1392,10 +1407,10 @@ get_szedata2_index(struct rte_eth_dev *dev, uint32_t *index)
                if (ret != 4)
                        continue;
 
-               if (pcislot_addr.domain == domain &&
-                               pcislot_addr.bus == bus &&
-                               pcislot_addr.devid == devid &&
-                               pcislot_addr.function == function) {
+               if (pcislot_addr->domain == domain &&
+                               pcislot_addr->bus == bus &&
+                               pcislot_addr->devid == devid &&
+                               pcislot_addr->function == function) {
                        *index = tmp_index;
                        closedir(dir);
                        return 0;
@@ -1415,9 +1430,10 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev)
        struct szedata *szedata_temp;
        int ret;
        uint32_t szedata2_index;
-       struct rte_pci_addr *pci_addr = &dev->pci_dev->addr;
-       struct rte_pci_resource *pci_rsc =
-               &dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER];
+       struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
+       struct rte_pci_addr *pci_addr = &pci_dev->addr;
+       struct rte_mem_resource *pci_rsc =
+               &pci_dev->mem_resource[PCI_RESOURCE_NUMBER];
        char rsc_filename[PATH_MAX];
        void *pci_resource_ptr = NULL;
        int fd;
@@ -1427,7 +1443,7 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev)
                        pci_addr->function);
 
        /* Get index of szedata2 device file and create path to device file */
-       ret = get_szedata2_index(dev, &szedata2_index);
+       ret = get_szedata2_index(pci_addr, &szedata2_index);
        if (ret != 0) {
                RTE_LOG(ERR, PMD, "Failed to get szedata2 device index!\n");
                return -ENODEV;
@@ -1471,10 +1487,10 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev)
        /* Set function callbacks for Ethernet API */
        dev->dev_ops = &ops;
 
-       rte_eth_copy_pci_info(dev, dev->pci_dev);
+       rte_eth_copy_pci_info(dev, pci_dev);
 
-       /* mmap pci resource0 file to rte_pci_resource structure */
-       if (dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].phys_addr ==
+       /* mmap pci resource0 file to rte_mem_resource structure */
+       if (pci_dev->mem_resource[PCI_RESOURCE_NUMBER].phys_addr ==
                        0) {
                RTE_LOG(ERR, PMD, "Missing resource%u file\n",
                                PCI_RESOURCE_NUMBER);
@@ -1491,7 +1507,7 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev)
        }
 
        pci_resource_ptr = mmap(0,
-                       dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len,
+                       pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len,
                        PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
        close(fd);
        if (pci_resource_ptr == NULL) {
@@ -1499,8 +1515,8 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev)
                                rsc_filename, fd);
                return -EINVAL;
        }
-       dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr =
-               pci_resource_ptr;
+       pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr = pci_resource_ptr;
+       internals->pci_rsc = pci_rsc;
 
        RTE_LOG(DEBUG, PMD, "resource%u phys_addr = 0x%llx len = %llu "
                        "virt addr = %llx\n", PCI_RESOURCE_NUMBER,
@@ -1516,8 +1532,8 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev)
                        RTE_CACHE_LINE_SIZE);
        if (data->mac_addrs == NULL) {
                RTE_LOG(ERR, PMD, "Could not alloc space for MAC address!\n");
-               munmap(dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr,
-                       dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len);
+               munmap(pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr,
+                      pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len);
                return -EINVAL;
        }
 
@@ -1537,12 +1553,13 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev)
 static int
 rte_szedata2_eth_dev_uninit(struct rte_eth_dev *dev)
 {
-       struct rte_pci_addr *pci_addr = &dev->pci_dev->addr;
+       struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
+       struct rte_pci_addr *pci_addr = &pci_dev->addr;
 
        rte_free(dev->data->mac_addrs);
        dev->data->mac_addrs = NULL;
-       munmap(dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr,
-               dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len);
+       munmap(pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr,
+              pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len);
 
        RTE_LOG(INFO, PMD, "szedata2 device ("
                        PCI_PRI_FMT ") successfully uninitialized\n",
@@ -1572,33 +1589,16 @@ static const struct rte_pci_id rte_szedata2_pci_id_table[] = {
 
 static struct eth_driver szedata2_eth_driver = {
        .pci_drv = {
-               .name     = RTE_SZEDATA2_PCI_DRIVER_NAME,
                .id_table = rte_szedata2_pci_id_table,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init     = rte_szedata2_eth_dev_init,
        .eth_dev_uninit   = rte_szedata2_eth_dev_uninit,
        .dev_private_size = sizeof(struct pmd_internals),
 };
 
-static int
-rte_szedata2_init(const char *name __rte_unused,
-               const char *args __rte_unused)
-{
-       rte_eth_driver_register(&szedata2_eth_driver);
-       return 0;
-}
-
-static int
-rte_szedata2_uninit(const char *name __rte_unused)
-{
-       return 0;
-}
-
-static struct rte_driver rte_szedata2_driver = {
-       .type = PMD_PDEV,
-       .init = rte_szedata2_init,
-       .uninit = rte_szedata2_uninit,
-};
-
-PMD_REGISTER_DRIVER(rte_szedata2_driver, RTE_SZEDATA2_DRIVER_NAME);
-DRIVER_REGISTER_PCI_TABLE(RTE_SZEDATA2_DRIVER_NAME, rte_szedata2_pci_id_table);
+RTE_PMD_REGISTER_PCI(RTE_SZEDATA2_DRIVER_NAME, szedata2_eth_driver.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(RTE_SZEDATA2_DRIVER_NAME, rte_szedata2_pci_id_table);
+RTE_PMD_REGISTER_KMOD_DEP(RTE_SZEDATA2_DRIVER_NAME,
+       "* combo6core & combov3 & szedata2 & szedata2_cv3");
index 522cf47..afe8a38 100644 (file)
@@ -117,94 +117,82 @@ struct szedata {
  * @return Byte from PCI resource at offset "offset".
  */
 static inline uint8_t
-pci_resource_read8(struct rte_eth_dev *dev, uint32_t offset)
+pci_resource_read8(struct rte_mem_resource *rsc, uint32_t offset)
 {
-       return *((uint8_t *)((uint8_t *)
-                       dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr +
-                       offset));
+       return *((uint8_t *)((uint8_t *)rsc->addr + offset));
 }
 
 /*
  * @return Two bytes from PCI resource starting at offset "offset".
  */
 static inline uint16_t
-pci_resource_read16(struct rte_eth_dev *dev, uint32_t offset)
+pci_resource_read16(struct rte_mem_resource *rsc, uint32_t offset)
 {
-       return rte_le_to_cpu_16(*((uint16_t *)((uint8_t *)
-                       dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr +
-                       offset)));
+       return rte_le_to_cpu_16(*((uint16_t *)((uint8_t *)rsc->addr +
+                                              offset)));
 }
 
 /*
  * @return Four bytes from PCI resource starting at offset "offset".
  */
 static inline uint32_t
-pci_resource_read32(struct rte_eth_dev *dev, uint32_t offset)
+pci_resource_read32(struct rte_mem_resource *rsc, uint32_t offset)
 {
-       return rte_le_to_cpu_32(*((uint32_t *)((uint8_t *)
-                       dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr +
-                       offset)));
+       return rte_le_to_cpu_32(*((uint32_t *)((uint8_t *)rsc->addr +
+                                              offset)));
 }
 
 /*
  * @return Eight bytes from PCI resource starting at offset "offset".
  */
 static inline uint64_t
-pci_resource_read64(struct rte_eth_dev *dev, uint32_t offset)
+pci_resource_read64(struct rte_mem_resource *rsc, uint32_t offset)
 {
-       return rte_le_to_cpu_64(*((uint64_t *)((uint8_t *)
-                       dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr +
-                       offset)));
+       return rte_le_to_cpu_64(*((uint64_t *)((uint8_t *)rsc->addr +
+                                              offset)));
 }
 
 /*
  * Write one byte to PCI resource address space at offset "offset".
  */
 static inline void
-pci_resource_write8(struct rte_eth_dev *dev, uint32_t offset, uint8_t val)
+pci_resource_write8(struct rte_mem_resource *rsc, uint32_t offset, uint8_t val)
 {
-       *((uint8_t *)((uint8_t *)
-                       dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr +
-                       offset)) = val;
+       *((uint8_t *)((uint8_t *)rsc->addr + offset)) = val;
 }
 
 /*
  * Write two bytes to PCI resource address space at offset "offset".
  */
 static inline void
-pci_resource_write16(struct rte_eth_dev *dev, uint32_t offset, uint16_t val)
+pci_resource_write16(struct rte_mem_resource *rsc, uint32_t offset,
+                    uint16_t val)
 {
-       *((uint16_t *)((uint8_t *)
-                       dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr +
-                       offset)) = rte_cpu_to_le_16(val);
+       *((uint16_t *)((uint8_t *)rsc->addr + offset)) = rte_cpu_to_le_16(val);
 }
 
 /*
  * Write four bytes to PCI resource address space at offset "offset".
  */
 static inline void
-pci_resource_write32(struct rte_eth_dev *dev, uint32_t offset, uint32_t val)
+pci_resource_write32(struct rte_mem_resource *rsc, uint32_t offset,
+                    uint32_t val)
 {
-       *((uint32_t *)((uint8_t *)
-                       dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr +
-                       offset)) = rte_cpu_to_le_32(val);
+       *((uint32_t *)((uint8_t *)rsc->addr + offset)) = rte_cpu_to_le_32(val);
 }
 
 /*
  * Write eight bytes to PCI resource address space at offset "offset".
  */
 static inline void
-pci_resource_write64(struct rte_eth_dev *dev, uint32_t offset, uint64_t val)
+pci_resource_write64(struct rte_mem_resource *rsc, uint32_t offset,
+                    uint64_t val)
 {
-       *((uint64_t *)((uint8_t *)
-                       dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr +
-                       offset)) = rte_cpu_to_le_64(val);
+       *((uint64_t *)((uint8_t *)rsc->addr + offset)) = rte_cpu_to_le_64(val);
 }
 
-#define SZEDATA2_PCI_RESOURCE_PTR(dev, offset, type) \
-       ((type)((uint8_t *) \
-       ((dev)->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr) \
-       + (offset)))
+#define SZEDATA2_PCI_RESOURCE_PTR(rsc, offset, type) \
+       ((type)(((uint8_t *)(rsc)->addr) + (offset)))
 
 enum szedata2_link_speed {
        SZEDATA2_LINK_SPEED_DEFAULT = 0,
index 35e67b9..1d572b5 100644 (file)
@@ -103,7 +103,8 @@ static int virtio_dev_queue_stats_mapping_set(
  * The set of PCI devices this driver supports
  */
 static const struct rte_pci_id pci_id_virtio_map[] = {
-       { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_DEVICEID_MIN) },
+       { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_LEGACY_DEVICEID_NET) },
+       { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_MODERN_DEVICEID_NET) },
        { .vendor_id = 0, /* sentinel */ },
 };
 
@@ -125,8 +126,8 @@ static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
        {"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
        {"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
        {"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
-       {"size_1024_1517_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
-       {"size_1518_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
+       {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
+       {"size_1519_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
 };
 
 /* [rt]x_qX_ is prepended to the name string here */
@@ -142,8 +143,8 @@ static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
        {"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
        {"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
        {"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
-       {"size_1024_1517_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
-       {"size_1518_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
+       {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
+       {"size_1519_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
 };
 
 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
@@ -151,6 +152,8 @@ static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
                            sizeof(rte_virtio_txq_stat_strings[0]))
 
+struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
+
 static int
 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
                int *dlen, int pkt_num)
@@ -279,28 +282,65 @@ virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
        return 0;
 }
 
-void
-virtio_dev_queue_release(struct virtqueue *vq)
+static void
+virtio_dev_queue_release(void *queue __rte_unused)
 {
-       struct virtio_hw *hw;
+       /* do nothing */
+}
 
-       if (vq) {
-               hw = vq->hw;
-               if (vq->configured)
-                       hw->vtpci_ops->del_queue(hw, vq);
+static int
+virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx)
+{
+       if (vtpci_queue_idx == hw->max_queue_pairs * 2)
+               return VTNET_CQ;
+       else if (vtpci_queue_idx % 2 == 0)
+               return VTNET_RQ;
+       else
+               return VTNET_TQ;
+}
 
-               rte_free(vq->sw_ring);
-               rte_free(vq);
-       }
+static uint16_t
+virtio_get_nr_vq(struct virtio_hw *hw)
+{
+       uint16_t nr_vq = hw->max_queue_pairs * 2;
+
+       if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
+               nr_vq += 1;
+
+       return nr_vq;
+}
+
+static void
+virtio_init_vring(struct virtqueue *vq)
+{
+       int size = vq->vq_nentries;
+       struct vring *vr = &vq->vq_ring;
+       uint8_t *ring_mem = vq->vq_ring_virt_mem;
+
+       PMD_INIT_FUNC_TRACE();
+
+       /*
+        * Reinitialise since virtio port might have been stopped and restarted
+        */
+       memset(ring_mem, 0, vq->vq_ring_size);
+       vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
+       vq->vq_used_cons_idx = 0;
+       vq->vq_desc_head_idx = 0;
+       vq->vq_avail_idx = 0;
+       vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
+       vq->vq_free_cnt = vq->vq_nentries;
+       memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
+
+       vring_desc_init(vr->desc, size);
+
+       /*
+        * Disable device(host) interrupting guest
+        */
+       virtqueue_disable_intr(vq);
 }
 
-int virtio_dev_queue_setup(struct rte_eth_dev *dev,
-                       int queue_type,
-                       uint16_t queue_idx,
-                       uint16_t vtpci_queue_idx,
-                       uint16_t nb_desc,
-                       unsigned int socket_id,
-                       void **pvq)
+static int
+virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
 {
        char vq_name[VIRTQUEUE_MAX_NAME_SZ];
        char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
@@ -311,9 +351,9 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
        struct virtnet_tx *txvq = NULL;
        struct virtnet_ctl *cvq = NULL;
        struct virtqueue *vq;
-       const char *queue_names[] = {"rvq", "txq", "cvq"};
-       size_t sz_vq, sz_q = 0, sz_hdr_mz = 0;
+       size_t sz_hdr_mz = 0;
        void *sw_ring = NULL;
+       int queue_type = virtio_get_queue_type(hw, vtpci_queue_idx);
        int ret;
 
        PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx);
@@ -322,8 +362,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
         * Read the virtqueue size from the Queue Size field
         * Always power of 2 and if 0 virtqueue does not exist
         */
-       vq_size = hw->vtpci_ops->get_queue_num(hw, vtpci_queue_idx);
-       PMD_INIT_LOG(DEBUG, "vq_size: %u nb_desc:%u", vq_size, nb_desc);
+       vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx);
+       PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
        if (vq_size == 0) {
                PMD_INIT_LOG(ERR, "virtqueue does not exist");
                return -EINVAL;
@@ -334,40 +374,35 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
                return -EINVAL;
        }
 
-       snprintf(vq_name, sizeof(vq_name), "port%d_%s%d",
-                dev->data->port_id, queue_names[queue_type], queue_idx);
+       snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
+                dev->data->port_id, vtpci_queue_idx);
 
-       sz_vq = RTE_ALIGN_CEIL(sizeof(*vq) +
+       size = RTE_ALIGN_CEIL(sizeof(*vq) +
                                vq_size * sizeof(struct vq_desc_extra),
                                RTE_CACHE_LINE_SIZE);
-       if (queue_type == VTNET_RQ) {
-               sz_q = sz_vq + sizeof(*rxvq);
-       } else if (queue_type == VTNET_TQ) {
-               sz_q = sz_vq + sizeof(*txvq);
+       if (queue_type == VTNET_TQ) {
                /*
                 * For each xmit packet, allocate a virtio_net_hdr
                 * and indirect ring elements
                 */
                sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
        } else if (queue_type == VTNET_CQ) {
-               sz_q = sz_vq + sizeof(*cvq);
                /* Allocate a page for control vq command, data and status */
                sz_hdr_mz = PAGE_SIZE;
        }
 
-       vq = rte_zmalloc_socket(vq_name, sz_q, RTE_CACHE_LINE_SIZE, socket_id);
+       vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
+                               SOCKET_ID_ANY);
        if (vq == NULL) {
                PMD_INIT_LOG(ERR, "can not allocate vq");
                return -ENOMEM;
        }
+       hw->vqs[vtpci_queue_idx] = vq;
+
        vq->hw = hw;
        vq->vq_queue_index = vtpci_queue_idx;
        vq->vq_nentries = vq_size;
 
-       if (nb_desc == 0 || nb_desc > vq_size)
-               nb_desc = vq_size;
-       vq->vq_free_cnt = nb_desc;
-
        /*
         * Reserve a memzone for vring elements
         */
@@ -376,7 +411,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
        PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
                     size, vq->vq_ring_size);
 
-       mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, socket_id,
+       mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
+                                        SOCKET_ID_ANY,
                                         0, VIRTIO_PCI_VRING_ALIGN);
        if (mz == NULL) {
                if (rte_errno == EEXIST)
@@ -396,12 +432,13 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
        PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
                     (uint64_t)(uintptr_t)mz->addr);
 
+       virtio_init_vring(vq);
+
        if (sz_hdr_mz) {
-               snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_%s%d_hdr",
-                        dev->data->port_id, queue_names[queue_type],
-                        queue_idx);
+               snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
+                        dev->data->port_id, vtpci_queue_idx);
                hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
-                                                    socket_id, 0,
+                                                    SOCKET_ID_ANY, 0,
                                                     RTE_CACHE_LINE_SIZE);
                if (hdr_mz == NULL) {
                        if (rte_errno == EEXIST)
@@ -418,7 +455,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
                               sizeof(vq->sw_ring[0]);
 
                sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
-                                            RTE_CACHE_LINE_SIZE, socket_id);
+                               RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
                if (!sw_ring) {
                        PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
                        ret = -ENOMEM;
@@ -426,37 +463,33 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
                }
 
                vq->sw_ring = sw_ring;
-               rxvq = (struct virtnet_rx *)RTE_PTR_ADD(vq, sz_vq);
+               rxvq = &vq->rxq;
                rxvq->vq = vq;
                rxvq->port_id = dev->data->port_id;
-               rxvq->queue_id = queue_idx;
                rxvq->mz = mz;
-               *pvq = rxvq;
        } else if (queue_type == VTNET_TQ) {
-               txvq = (struct virtnet_tx *)RTE_PTR_ADD(vq, sz_vq);
+               txvq = &vq->txq;
                txvq->vq = vq;
                txvq->port_id = dev->data->port_id;
-               txvq->queue_id = queue_idx;
                txvq->mz = mz;
                txvq->virtio_net_hdr_mz = hdr_mz;
                txvq->virtio_net_hdr_mem = hdr_mz->phys_addr;
-
-               *pvq = txvq;
        } else if (queue_type == VTNET_CQ) {
-               cvq = (struct virtnet_ctl *)RTE_PTR_ADD(vq, sz_vq);
+               cvq = &vq->cq;
                cvq->vq = vq;
                cvq->mz = mz;
                cvq->virtio_net_hdr_mz = hdr_mz;
                cvq->virtio_net_hdr_mem = hdr_mz->phys_addr;
                memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
-               *pvq = cvq;
+
+               hw->cvq = cvq;
        }
 
-       /* For virtio_user case (that is when dev->pci_dev is NULL), we use
+       /* For virtio_user case (that is when hw->dev is NULL), we use
         * virtual address. And we need properly set _offset_, please see
         * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information.
         */
-       if (dev->pci_dev)
+       if (!hw->virtio_user_dev)
                vq->offset = offsetof(struct rte_mbuf, buf_physaddr);
        else {
                vq->vq_ring_mem = (uintptr_t)mz->addr;
@@ -488,13 +521,11 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
                }
        }
 
-       if (hw->vtpci_ops->setup_queue(hw, vq) < 0) {
+       if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
                PMD_INIT_LOG(ERR, "setup_queue failed");
-               virtio_dev_queue_release(vq);
                return -EINVAL;
        }
 
-       vq->configured = 1;
        return 0;
 
 fail_q_alloc:
@@ -506,58 +537,88 @@ fail_q_alloc:
        return ret;
 }
 
-static int
-virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx,
-               uint32_t socket_id)
+static void
+virtio_free_queues(struct virtio_hw *hw)
 {
-       struct virtnet_ctl *cvq;
-       int ret;
-       struct virtio_hw *hw = dev->data->dev_private;
+       uint16_t nr_vq = virtio_get_nr_vq(hw);
+       struct virtqueue *vq;
+       int queue_type;
+       uint16_t i;
 
-       PMD_INIT_FUNC_TRACE();
-       ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX,
-                       vtpci_queue_idx, 0, socket_id, (void **)&cvq);
-       if (ret < 0) {
-               PMD_INIT_LOG(ERR, "control vq initialization failed");
-               return ret;
+       for (i = 0; i < nr_vq; i++) {
+               vq = hw->vqs[i];
+               if (!vq)
+                       continue;
+
+               queue_type = virtio_get_queue_type(hw, i);
+               if (queue_type == VTNET_RQ) {
+                       rte_free(vq->sw_ring);
+                       rte_memzone_free(vq->rxq.mz);
+               } else if (queue_type == VTNET_TQ) {
+                       rte_memzone_free(vq->txq.mz);
+                       rte_memzone_free(vq->txq.virtio_net_hdr_mz);
+               } else {
+                       rte_memzone_free(vq->cq.mz);
+                       rte_memzone_free(vq->cq.virtio_net_hdr_mz);
+               }
+
+               rte_free(vq);
        }
 
-       hw->cvq = cvq;
-       return 0;
+       rte_free(hw->vqs);
 }
 
-static void
-virtio_free_queues(struct rte_eth_dev *dev)
+static int
+virtio_alloc_queues(struct rte_eth_dev *dev)
 {
-       unsigned int i;
-
-       for (i = 0; i < dev->data->nb_rx_queues; i++)
-               virtio_dev_rx_queue_release(dev->data->rx_queues[i]);
+       struct virtio_hw *hw = dev->data->dev_private;
+       uint16_t nr_vq = virtio_get_nr_vq(hw);
+       uint16_t i;
+       int ret;
 
-       dev->data->nb_rx_queues = 0;
+       hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
+       if (!hw->vqs) {
+               PMD_INIT_LOG(ERR, "failed to allocate vqs");
+               return -ENOMEM;
+       }
 
-       for (i = 0; i < dev->data->nb_tx_queues; i++)
-               virtio_dev_tx_queue_release(dev->data->tx_queues[i]);
+       for (i = 0; i < nr_vq; i++) {
+               ret = virtio_init_queue(dev, i);
+               if (ret < 0) {
+                       virtio_free_queues(hw);
+                       return ret;
+               }
+       }
 
-       dev->data->nb_tx_queues = 0;
+       return 0;
 }
 
+static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
+
 static void
 virtio_dev_close(struct rte_eth_dev *dev)
 {
        struct virtio_hw *hw = dev->data->dev_private;
+       struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
 
        PMD_INIT_LOG(DEBUG, "virtio_dev_close");
 
-       if (hw->started == 1)
-               virtio_dev_stop(dev);
-
        /* reset the NIC */
        if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
-               vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
+               VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
+       if (intr_conf->rxq)
+               virtio_queues_unbind_intr(dev);
+
+       if (intr_conf->lsc || intr_conf->rxq) {
+               rte_intr_disable(dev->intr_handle);
+               rte_intr_efd_disable(dev->intr_handle);
+               rte_free(dev->intr_handle->intr_vec);
+               dev->intr_handle->intr_vec = NULL;
+       }
+
        vtpci_reset(hw);
        virtio_dev_free_mbufs(dev);
-       virtio_free_queues(dev);
+       virtio_free_queues(hw);
 }
 
 static void
@@ -569,7 +630,7 @@ virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
        int ret;
 
        if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
-               PMD_INIT_LOG(INFO, "host does not support rx control\n");
+               PMD_INIT_LOG(INFO, "host does not support rx control");
                return;
        }
 
@@ -592,7 +653,7 @@ virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
        int ret;
 
        if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
-               PMD_INIT_LOG(INFO, "host does not support rx control\n");
+               PMD_INIT_LOG(INFO, "host does not support rx control");
                return;
        }
 
@@ -615,7 +676,7 @@ virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
        int ret;
 
        if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
-               PMD_INIT_LOG(INFO, "host does not support rx control\n");
+               PMD_INIT_LOG(INFO, "host does not support rx control");
                return;
        }
 
@@ -638,7 +699,7 @@ virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
        int ret;
 
        if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
-               PMD_INIT_LOG(INFO, "host does not support rx control\n");
+               PMD_INIT_LOG(INFO, "host does not support rx control");
                return;
        }
 
@@ -652,6 +713,43 @@ virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
                PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
 }
 
+#define VLAN_TAG_LEN           4    /* 802.3ac tag (not DMA'd) */
+static int
+virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+       struct virtio_hw *hw = dev->data->dev_private;
+       uint32_t ether_hdr_len = ETHER_HDR_LEN + VLAN_TAG_LEN +
+                                hw->vtnet_hdr_size;
+       uint32_t frame_size = mtu + ether_hdr_len;
+
+       if (mtu < ETHER_MIN_MTU || frame_size > VIRTIO_MAX_RX_PKTLEN) {
+               PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
+                       ETHER_MIN_MTU, VIRTIO_MAX_RX_PKTLEN - ether_hdr_len);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int
+virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+       struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
+       struct virtqueue *vq = rxvq->vq;
+
+       virtqueue_enable_intr(vq);
+       return 0;
+}
+
+static int
+virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+       struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
+       struct virtqueue *vq = rxvq->vq;
+
+       virtqueue_disable_intr(vq);
+       return 0;
+}
+
 /*
  * dev_ops for virtio, bare necessities for basic operation
  */
@@ -664,7 +762,7 @@ static const struct eth_dev_ops virtio_eth_dev_ops = {
        .promiscuous_disable     = virtio_dev_promiscuous_disable,
        .allmulticast_enable     = virtio_dev_allmulticast_enable,
        .allmulticast_disable    = virtio_dev_allmulticast_disable,
-
+       .mtu_set                 = virtio_mtu_set,
        .dev_infos_get           = virtio_dev_info_get,
        .stats_get               = virtio_dev_stats_get,
        .xstats_get              = virtio_dev_xstats_get,
@@ -673,9 +771,12 @@ static const struct eth_dev_ops virtio_eth_dev_ops = {
        .xstats_reset            = virtio_dev_stats_reset,
        .link_update             = virtio_dev_link_update,
        .rx_queue_setup          = virtio_dev_rx_queue_setup,
-       .rx_queue_release        = virtio_dev_rx_queue_release,
+       .rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
+       .rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
+       .rx_queue_release        = virtio_dev_queue_release,
+       .rx_descriptor_done      = virtio_dev_rx_queue_done,
        .tx_queue_setup          = virtio_dev_tx_queue_setup,
-       .tx_queue_release        = virtio_dev_tx_queue_release,
+       .tx_queue_release        = virtio_dev_queue_release,
        /* collect stats per queue */
        .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
        .vlan_filter_set         = virtio_vlan_filter_set,
@@ -830,6 +931,7 @@ virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
                for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
                        xstats[count].value = *(uint64_t *)(((char *)rxvq) +
                                rte_virtio_rxq_stat_strings[t].offset);
+                       xstats[count].id = count;
                        count++;
                }
        }
@@ -845,6 +947,7 @@ virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
                for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
                        xstats[count].value = *(uint64_t *)(((char *)txvq) +
                                rte_virtio_txq_stat_strings[t].offset);
+                       xstats[count].id = count;
                        count++;
                }
        }
@@ -1042,17 +1145,16 @@ virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 }
 
 static int
-virtio_negotiate_features(struct virtio_hw *hw)
+virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
 {
        uint64_t host_features;
 
        /* Prepare guest_features: feature that driver wants to support */
-       hw->guest_features = VIRTIO_PMD_GUEST_FEATURES;
        PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
-               hw->guest_features);
+               req_features);
 
        /* Read device(host) feature bits */
-       host_features = hw->vtpci_ops->get_features(hw);
+       host_features = VTPCI_OPS(hw)->get_features(hw);
        PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
                host_features);
 
@@ -1060,6 +1162,7 @@ virtio_negotiate_features(struct virtio_hw *hw)
         * Negotiate features: Subset of device feature bits are written back
         * guest feature bits.
         */
+       hw->guest_features = req_features;
        hw->guest_features = vtpci_negotiate_features(hw, host_features);
        PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
                hw->guest_features);
@@ -1078,6 +1181,8 @@ virtio_negotiate_features(struct virtio_hw *hw)
                }
        }
 
+       hw->req_guest_features = req_features;
+
        return 0;
 }
 
@@ -1086,7 +1191,7 @@ virtio_negotiate_features(struct virtio_hw *hw)
  * if link state changed.
  */
 static void
-virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
+virtio_interrupt_handler(struct rte_intr_handle *handle,
                         void *param)
 {
        struct rte_eth_dev *dev = param;
@@ -1097,13 +1202,13 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
        isr = vtpci_isr(hw);
        PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
 
-       if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
+       if (rte_intr_enable(handle) < 0)
                PMD_DRV_LOG(ERR, "interrupt enable failed");
 
        if (isr & VIRTIO_PCI_ISR_CONFIG) {
                if (virtio_dev_link_update(dev, 0) == 0)
                        _rte_eth_dev_callback_process(dev,
-                                                     RTE_ETH_EVENT_INTR_LSC);
+                                                     RTE_ETH_EVENT_INTR_LSC, NULL);
        }
 
 }
@@ -1118,47 +1223,105 @@ rx_func_get(struct rte_eth_dev *eth_dev)
                eth_dev->rx_pkt_burst = &virtio_recv_pkts;
 }
 
-/*
- * This function is based on probe() function in virtio_pci.c
- * It returns 0 on success.
+/* Only support 1:1 queue/interrupt mapping so far.
+ * TODO: support n:1 queue/interrupt mapping when there are limited number of
+ * interrupt vectors (<N+1).
  */
-int
-eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
+static int
+virtio_queues_bind_intr(struct rte_eth_dev *dev)
 {
-       struct virtio_hw *hw = eth_dev->data->dev_private;
-       struct virtio_net_config *config;
-       struct virtio_net_config local_config;
-       struct rte_pci_device *pci_dev;
-       uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE;
-       int ret;
+       uint32_t i;
+       struct virtio_hw *hw = dev->data->dev_private;
 
-       RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));
+       PMD_INIT_LOG(INFO, "queue/interrupt binding");
+       for (i = 0; i < dev->data->nb_rx_queues; ++i) {
+               dev->intr_handle->intr_vec[i] = i + 1;
+               if (VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
+                                                VIRTIO_MSI_NO_VECTOR) {
+                       PMD_DRV_LOG(ERR, "failed to set queue vector");
+                       return -EBUSY;
+               }
+       }
 
-       eth_dev->dev_ops = &virtio_eth_dev_ops;
-       eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
+       return 0;
+}
 
-       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-               rx_func_get(eth_dev);
-               return 0;
+static void
+virtio_queues_unbind_intr(struct rte_eth_dev *dev)
+{
+       uint32_t i;
+       struct virtio_hw *hw = dev->data->dev_private;
+
+       PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
+       for (i = 0; i < dev->data->nb_rx_queues; ++i)
+               VTPCI_OPS(hw)->set_queue_irq(hw,
+                                            hw->vqs[i * VTNET_CQ],
+                                            VIRTIO_MSI_NO_VECTOR);
+}
+
+static int
+virtio_configure_intr(struct rte_eth_dev *dev)
+{
+       struct virtio_hw *hw = dev->data->dev_private;
+
+       if (!rte_intr_cap_multiple(dev->intr_handle)) {
+               PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
+               return -ENOTSUP;
        }
 
-       /* Allocate memory for storing MAC addresses */
-       eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0);
-       if (eth_dev->data->mac_addrs == NULL) {
-               PMD_INIT_LOG(ERR,
-                       "Failed to allocate %d bytes needed to store MAC addresses",
-                       VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN);
-               return -ENOMEM;
+       if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
+               PMD_INIT_LOG(ERR, "Fail to create eventfd");
+               return -1;
        }
 
-       pci_dev = eth_dev->pci_dev;
+       if (!dev->intr_handle->intr_vec) {
+               dev->intr_handle->intr_vec =
+                       rte_zmalloc("intr_vec",
+                                   hw->max_queue_pairs * sizeof(int), 0);
+               if (!dev->intr_handle->intr_vec) {
+                       PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
+                                    hw->max_queue_pairs);
+                       return -ENOMEM;
+               }
+       }
 
-       if (pci_dev) {
-               ret = vtpci_init(pci_dev, hw, &dev_flags);
-               if (ret)
-                       return ret;
+       /* Re-register callback to update max_intr */
+       rte_intr_callback_unregister(dev->intr_handle,
+                                    virtio_interrupt_handler,
+                                    dev);
+       rte_intr_callback_register(dev->intr_handle,
+                                  virtio_interrupt_handler,
+                                  dev);
+
+       /* DO NOT try to remove this! This function will enable msix, or QEMU
+        * will encounter SIGSEGV when DRIVER_OK is sent.
+        * And for legacy devices, this should be done before queue/vec binding
+        * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
+        * (22) will be ignored.
+        */
+       if (rte_intr_enable(dev->intr_handle) < 0) {
+               PMD_DRV_LOG(ERR, "interrupt enable failed");
+               return -1;
        }
 
+       if (virtio_queues_bind_intr(dev) < 0) {
+               PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
+               return -1;
+       }
+
+       return 0;
+}
+
+/* reset device and renegotiate features if needed */
+static int
+virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
+{
+       struct virtio_hw *hw = eth_dev->data->dev_private;
+       struct virtio_net_config *config;
+       struct virtio_net_config local_config;
+       struct rte_pci_device *pci_dev = NULL;
+       int ret;
+
        /* Reset the device although not necessary at startup */
        vtpci_reset(hw);
 
@@ -1167,15 +1330,19 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
        /* Tell the host we've known how to drive the device. */
        vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
-       if (virtio_negotiate_features(hw) < 0)
+       if (virtio_negotiate_features(hw, req_features) < 0)
                return -1;
 
+       if (eth_dev->device) {
+               pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
+               rte_eth_copy_pci_info(eth_dev, pci_dev);
+       }
+
        /* If host does not support status then disable LSC */
        if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
-               dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
-
-       rte_eth_copy_pci_info(eth_dev, pci_dev);
-       eth_dev->data->dev_flags = dev_flags;
+               eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
+       else
+               eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
 
        rx_func_get(eth_dev);
 
@@ -1223,16 +1390,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
                        config->max_virtqueue_pairs = 1;
                }
 
-               hw->max_rx_queues =
-                       (VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ?
-                       VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs;
-               hw->max_tx_queues =
-                       (VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ?
-                       VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs;
-
-               virtio_dev_cq_queue_setup(eth_dev,
-                                       config->max_virtqueue_pairs * 2,
-                                       SOCKET_ID_ANY);
+               hw->max_queue_pairs = config->max_virtqueue_pairs;
 
                PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
                                config->max_virtqueue_pairs);
@@ -1243,23 +1401,142 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
                                config->mac[2], config->mac[3],
                                config->mac[4], config->mac[5]);
        } else {
-               hw->max_rx_queues = 1;
-               hw->max_tx_queues = 1;
+               PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
+               hw->max_queue_pairs = 1;
        }
 
-       PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
-                       hw->max_rx_queues, hw->max_tx_queues);
+       ret = virtio_alloc_queues(eth_dev);
+       if (ret < 0)
+               return ret;
+
+       if (eth_dev->data->dev_conf.intr_conf.rxq) {
+               if (virtio_configure_intr(eth_dev) < 0) {
+                       PMD_INIT_LOG(ERR, "failed to configure interrupt");
+                       return -1;
+               }
+       }
+
+       vtpci_reinit_complete(hw);
+
        if (pci_dev)
                PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
                        eth_dev->data->port_id, pci_dev->id.vendor_id,
                        pci_dev->id.device_id);
 
+       return 0;
+}
+
+/*
+ * Remap the PCI device again (IO port map for legacy device and
+ * memory map for modern device), so that the secondary process
+ * could have the PCI initiated correctly.
+ */
+static int
+virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
+{
+       if (hw->modern) {
+               /*
+                * We don't have to re-parse the PCI config space, since
+                * rte_eal_pci_map_device() makes sure the mapped address
+                * in secondary process would equal to the one mapped in
+                * the primary process: error will be returned if that
+                * requirement is not met.
+                *
+                * That said, we could simply reuse all cap pointers
+                * (such as dev_cfg, common_cfg, etc.) parsed from the
+                * primary process, which is stored in shared memory.
+                */
+               if (rte_eal_pci_map_device(pci_dev)) {
+                       PMD_INIT_LOG(DEBUG, "failed to map pci device!");
+                       return -1;
+               }
+       } else {
+               if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
+                       return -1;
+       }
+
+       return 0;
+}
+
+static void
+virtio_set_vtpci_ops(struct virtio_hw *hw)
+{
+#ifdef RTE_VIRTIO_USER
+       if (hw->virtio_user_dev)
+               VTPCI_OPS(hw) = &virtio_user_ops;
+       else
+#endif
+       if (hw->modern)
+               VTPCI_OPS(hw) = &modern_ops;
+       else
+               VTPCI_OPS(hw) = &legacy_ops;
+}
+
+/*
+ * This function is based on probe() function in virtio_pci.c
+ * It returns 0 on success.
+ */
+int
+eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
+{
+       struct virtio_hw *hw = eth_dev->data->dev_private;
+       uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE;
+       int ret;
+
+       RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));
+
+       eth_dev->dev_ops = &virtio_eth_dev_ops;
+       eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
+
+       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+               if (!hw->virtio_user_dev) {
+                       ret = virtio_remap_pci(RTE_DEV_TO_PCI(eth_dev->device),
+                                              hw);
+                       if (ret)
+                               return ret;
+               }
+
+               virtio_set_vtpci_ops(hw);
+               if (hw->use_simple_rxtx) {
+                       eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple;
+                       eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
+               } else {
+                       rx_func_get(eth_dev);
+               }
+               return 0;
+       }
+
+       /* Allocate memory for storing MAC addresses */
+       eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0);
+       if (eth_dev->data->mac_addrs == NULL) {
+               PMD_INIT_LOG(ERR,
+                       "Failed to allocate %d bytes needed to store MAC addresses",
+                       VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN);
+               return -ENOMEM;
+       }
+
+       hw->port_id = eth_dev->data->port_id;
+       /* For virtio_user case the hw->virtio_user_dev is populated by
+        * virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called.
+        */
+       if (!hw->virtio_user_dev) {
+               ret = vtpci_init(RTE_DEV_TO_PCI(eth_dev->device), hw,
+                                &dev_flags);
+               if (ret)
+                       return ret;
+       }
+
+       eth_dev->data->dev_flags = dev_flags;
+
+       /* reset device and negotiate default features */
+       ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
+       if (ret < 0)
+               return ret;
+
        /* Setup interrupt callback  */
        if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
-               rte_intr_callback_register(&pci_dev->intr_handle,
-                                  virtio_interrupt_handler, eth_dev);
-
-       virtio_dev_cq_start(eth_dev);
+               rte_intr_callback_register(eth_dev->intr_handle,
+                       virtio_interrupt_handler, eth_dev);
 
        return 0;
 }
@@ -1267,35 +1544,28 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 static int
 eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 {
-       struct rte_pci_device *pci_dev;
-       struct virtio_hw *hw = eth_dev->data->dev_private;
-
        PMD_INIT_FUNC_TRACE();
 
        if (rte_eal_process_type() == RTE_PROC_SECONDARY)
                return -EPERM;
 
-       /* Close it anyway since there's no way to know if closed */
+       virtio_dev_stop(eth_dev);
        virtio_dev_close(eth_dev);
 
-       pci_dev = eth_dev->pci_dev;
-
        eth_dev->dev_ops = NULL;
        eth_dev->tx_pkt_burst = NULL;
        eth_dev->rx_pkt_burst = NULL;
 
-       if (hw->cvq)
-               virtio_dev_queue_release(hw->cvq->vq);
-
        rte_free(eth_dev->data->mac_addrs);
        eth_dev->data->mac_addrs = NULL;
 
        /* reset interrupt callback  */
        if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
-               rte_intr_callback_unregister(&pci_dev->intr_handle,
+               rte_intr_callback_unregister(eth_dev->intr_handle,
                                                virtio_interrupt_handler,
                                                eth_dev);
-       rte_eal_pci_unmap_device(pci_dev);
+       if (eth_dev->device)
+               rte_eal_pci_unmap_device(RTE_DEV_TO_PCI(eth_dev->device));
 
        PMD_INIT_LOG(DEBUG, "dev_uninit completed");
 
@@ -1304,32 +1574,29 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 
 static struct eth_driver rte_virtio_pmd = {
        .pci_drv = {
-               .name = "rte_virtio_pmd",
+               .driver = {
+                       .name = "net_virtio",
+               },
                .id_table = pci_id_virtio_map,
-               .drv_flags = RTE_PCI_DRV_DETACHABLE,
+               .drv_flags = 0,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_virtio_dev_init,
        .eth_dev_uninit = eth_virtio_dev_uninit,
        .dev_private_size = sizeof(struct virtio_hw),
 };
 
-/*
- * Driver initialization routine.
- * Invoked once at EAL init time.
- * Register itself as the [Poll Mode] Driver of PCI virtio devices.
- * Returns 0 on success.
- */
-static int
-rte_virtio_pmd_init(const char *name __rte_unused,
-                   const char *param __rte_unused)
+RTE_INIT(rte_virtio_pmd_init);
+static void
+rte_virtio_pmd_init(void)
 {
        if (rte_eal_iopl_init() != 0) {
                PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD");
-               return -1;
+               return;
        }
 
-       rte_eth_driver_register(&rte_virtio_pmd);
-       return 0;
+       rte_eal_pci_register(&rte_virtio_pmd.pci_drv);
 }
 
 /*
@@ -1341,14 +1608,44 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 {
        const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
        struct virtio_hw *hw = dev->data->dev_private;
+       uint64_t req_features;
+       int ret;
 
        PMD_INIT_LOG(DEBUG, "configure");
+       req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
+       if (rxmode->hw_ip_checksum)
+               req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
+       if (rxmode->enable_lro)
+               req_features |=
+                       (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
+                       (1ULL << VIRTIO_NET_F_GUEST_TSO6);
+
+       /* if request features changed, reinit the device */
+       if (req_features != hw->req_guest_features) {
+               ret = virtio_init_device(dev, req_features);
+               if (ret < 0)
+                       return ret;
+       }
 
-       if (rxmode->hw_ip_checksum) {
-               PMD_DRV_LOG(ERR, "HW IP checksum not supported");
-               return -EINVAL;
+       if (rxmode->hw_ip_checksum &&
+               !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
+               PMD_DRV_LOG(NOTICE,
+                       "rx ip checksum not available on this host");
+               return -ENOTSUP;
+       }
+
+       if (rxmode->enable_lro &&
+               (!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
+                       !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4))) {
+               PMD_DRV_LOG(NOTICE,
+                       "lro not available on this host");
+               return -ENOTSUP;
        }
 
+       /* start control queue */
+       if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
+               virtio_dev_cq_start(dev);
+
        hw->vlan_strip = rxmode->hw_vlan_strip;
 
        if (rxmode->hw_vlan_filter
@@ -1359,7 +1656,9 @@ virtio_dev_configure(struct rte_eth_dev *dev)
        }
 
        if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
-               if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
+               /* Enable vector (0) for Link State Intrerrupt */
+               if (VTPCI_OPS(hw)->set_config_irq(hw, 0) ==
+                               VIRTIO_MSI_NO_VECTOR) {
                        PMD_DRV_LOG(ERR, "failed to set config vector");
                        return -EBUSY;
                }
@@ -1372,9 +1671,9 @@ static int
 virtio_dev_start(struct rte_eth_dev *dev)
 {
        uint16_t nb_queues, i;
-       struct virtio_hw *hw = dev->data->dev_private;
        struct virtnet_rx *rxvq;
        struct virtnet_tx *txvq __rte_unused;
+       struct virtio_hw *hw = dev->data->dev_private;
 
        /* check if lsc interrupt feature is enabled */
        if (dev->data->dev_conf.intr_conf.lsc) {
@@ -1382,8 +1681,17 @@ virtio_dev_start(struct rte_eth_dev *dev)
                        PMD_DRV_LOG(ERR, "link status not supported by host");
                        return -ENOTSUP;
                }
+       }
+
+       /* Enable uio/vfio intr/eventfd mapping: althrough we already did that
+        * in device configure, but it could be unmapped  when device is
+        * stopped.
+        */
+       if (dev->data->dev_conf.intr_conf.lsc ||
+           dev->data->dev_conf.intr_conf.rxq) {
+               rte_intr_disable(dev->intr_handle);
 
-               if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
+               if (rte_intr_enable(dev->intr_handle) < 0) {
                        PMD_DRV_LOG(ERR, "interrupt enable failed");
                        return -EIO;
                }
@@ -1392,29 +1700,19 @@ virtio_dev_start(struct rte_eth_dev *dev)
        /* Initialize Link state */
        virtio_dev_link_update(dev, 0);
 
-       /* On restart after stop do not touch queues */
-       if (hw->started)
-               return 0;
-
-       /* Do final configuration before rx/tx engine starts */
-       virtio_dev_rxtx_start(dev);
-       vtpci_reinit_complete(hw);
-
-       hw->started = 1;
-
        /*Notify the backend
         *Otherwise the tap backend might already stop its queue due to fullness.
         *vhost backend will have no chance to be waked up
         */
-       nb_queues = dev->data->nb_rx_queues;
-       if (nb_queues > 1) {
+       nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
+       if (hw->max_queue_pairs > 1) {
                if (virtio_set_multiple_queues(dev, nb_queues) != 0)
                        return -EINVAL;
        }
 
        PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
 
-       for (i = 0; i < nb_queues; i++) {
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
                rxvq = dev->data->rx_queues[i];
                virtqueue_notify(rxvq->vq);
        }
@@ -1486,14 +1784,12 @@ static void
 virtio_dev_stop(struct rte_eth_dev *dev)
 {
        struct rte_eth_link link;
-       struct virtio_hw *hw = dev->data->dev_private;
+       struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
 
        PMD_INIT_LOG(DEBUG, "stop");
 
-       hw->started = 0;
-
-       if (dev->data->dev_conf.intr_conf.lsc)
-               rte_intr_disable(&dev->pci_dev->intr_handle);
+       if (intr_conf->lsc || intr_conf->rxq)
+               rte_intr_disable(dev->intr_handle);
 
        memset(&link, 0, sizeof(link));
        virtio_dev_atomic_write_link_status(dev, &link);
@@ -1536,21 +1832,43 @@ virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complet
 static void
 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
+       uint64_t tso_mask, host_features;
        struct virtio_hw *hw = dev->data->dev_private;
 
-       if (dev->pci_dev)
-               dev_info->driver_name = dev->driver->pci_drv.name;
-       else
-               dev_info->driver_name = "virtio_user PMD";
-       dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
-       dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
+       dev_info->pci_dev = dev->device ? RTE_DEV_TO_PCI(dev->device) : NULL;
+       dev_info->max_rx_queues =
+               RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
+       dev_info->max_tx_queues =
+               RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
        dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
        dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
        dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
        dev_info->default_txconf = (struct rte_eth_txconf) {
                .txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS
        };
-       /* TRex patch */
+
+       host_features = VTPCI_OPS(hw)->get_features(hw);
+       dev_info->rx_offload_capa = 0;
+       if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
+               dev_info->rx_offload_capa |=
+                       DEV_RX_OFFLOAD_TCP_CKSUM |
+                       DEV_RX_OFFLOAD_UDP_CKSUM;
+       }
+       tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
+               (1ULL << VIRTIO_NET_F_GUEST_TSO6);
+       if ((host_features & tso_mask) == tso_mask)
+               dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
+
+       dev_info->tx_offload_capa = 0;
+       if (hw->guest_features & (1ULL << VIRTIO_NET_F_CSUM)) {
+               dev_info->tx_offload_capa |=
+                       DEV_TX_OFFLOAD_UDP_CKSUM |
+                       DEV_TX_OFFLOAD_TCP_CKSUM;
+       }
+       tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
+               (1ULL << VIRTIO_NET_F_HOST_TSO6);
+       if ((hw->guest_features & tso_mask) == tso_mask)
+               dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
        dev_info->speed_capa = ETH_LINK_SPEED_10G;
 }
 
@@ -1565,10 +1883,6 @@ __rte_unused uint8_t is_rx)
        return 0;
 }
 
-static struct rte_driver rte_virtio_driver = {
-       .type = PMD_PDEV,
-       .init = rte_virtio_pmd_init,
-};
-
-PMD_REGISTER_DRIVER(rte_virtio_driver, virtio_net);
-DRIVER_REGISTER_PCI_TABLE(virtio_net, pci_id_virtio_map);
+RTE_PMD_EXPORT_NAME(net_virtio, __COUNTER__);
+RTE_PMD_REGISTER_PCI_TABLE(net_virtio, pci_id_virtio_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_virtio, "* igb_uio | uio_pci_generic | vfio");
index 2ecec6e..777a14b 100644 (file)
 #define PAGE_SIZE 4096
 #endif
 
-#define VIRTIO_MAX_RX_QUEUES 128
-#define VIRTIO_MAX_TX_QUEUES 128
+#define VIRTIO_MAX_RX_QUEUES 128U
+#define VIRTIO_MAX_TX_QUEUES 128U
 #define VIRTIO_MAX_MAC_ADDRS 64
 #define VIRTIO_MIN_RX_BUFSIZE 64
 #define VIRTIO_MAX_RX_PKTLEN  9728
 
 /* Features desired/implemented by this driver. */
-#define VIRTIO_PMD_GUEST_FEATURES              \
+#define VIRTIO_PMD_DEFAULT_GUEST_FEATURES      \
        (1u << VIRTIO_NET_F_MAC           |     \
         1u << VIRTIO_NET_F_STATUS        |     \
         1u << VIRTIO_NET_F_MQ            |     \
         1u << VIRTIO_NET_F_CTRL_VQ       |     \
         1u << VIRTIO_NET_F_CTRL_RX       |     \
         1u << VIRTIO_NET_F_CTRL_VLAN     |     \
+        1u << VIRTIO_NET_F_CSUM          |     \
+        1u << VIRTIO_NET_F_HOST_TSO4     |     \
+        1u << VIRTIO_NET_F_HOST_TSO6     |     \
         1u << VIRTIO_NET_F_MRG_RXBUF     |     \
-        1ULL << VIRTIO_F_VERSION_1)
-
+        1u << VIRTIO_RING_F_INDIRECT_DESC |    \
+        1ULL << VIRTIO_F_VERSION_1       |     \
+        1ULL << VIRTIO_F_IOMMU_PLATFORM)
+
+#define VIRTIO_PMD_SUPPORTED_GUEST_FEATURES    \
+       (VIRTIO_PMD_DEFAULT_GUEST_FEATURES |    \
+        1u << VIRTIO_NET_F_GUEST_CSUM     |    \
+        1u << VIRTIO_NET_F_GUEST_TSO4     |    \
+        1u << VIRTIO_NET_F_GUEST_TSO6)
 /*
  * CQ function prototype
  */
@@ -73,31 +83,18 @@ void virtio_dev_cq_start(struct rte_eth_dev *dev);
 /*
  * RX/TX function prototypes
  */
-void virtio_dev_rxtx_start(struct rte_eth_dev *dev);
-
-int virtio_dev_queue_setup(struct rte_eth_dev *dev,
-                       int queue_type,
-                       uint16_t queue_idx,
-                       uint16_t vtpci_queue_idx,
-                       uint16_t nb_desc,
-                       unsigned int socket_id,
-                       void **pvq);
 
-void virtio_dev_queue_release(struct virtqueue *vq);
+int virtio_dev_rx_queue_done(void *rxq, uint16_t offset);
 
 int  virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
                uint16_t nb_rx_desc, unsigned int socket_id,
                const struct rte_eth_rxconf *rx_conf,
                struct rte_mempool *mb_pool);
 
-void virtio_dev_rx_queue_release(void *rxq);
-
 int  virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
                uint16_t nb_tx_desc, unsigned int socket_id,
                const struct rte_eth_txconf *tx_conf);
 
-void virtio_dev_tx_queue_release(void *txq);
-
 uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                uint16_t nb_pkts);
 
@@ -115,13 +112,4 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 
-/*
- * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
- * frames larger than 1514 bytes. We do not yet support software LRO
- * via tcp_lro_rx().
- */
-#define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \
-                           VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN)
-
-
 #endif /* _VIRTIO_ETHDEV_H_ */
index f1a7ca7..ce9a9d3 100644 (file)
@@ -37,6 +37,8 @@
  #include <fcntl.h>
 #endif
 
+#include <rte_io.h>
+
 #include "virtio_pci.h"
 #include "virtio_logs.h"
 #include "virtqueue.h"
@@ -92,17 +94,17 @@ legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
        while (length > 0) {
                if (length >= 4) {
                        size = 4;
-                       rte_eal_pci_ioport_read(&hw->io, dst, size,
+                       rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
                                VIRTIO_PCI_CONFIG(hw) + offset);
                        *(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
                } else if (length >= 2) {
                        size = 2;
-                       rte_eal_pci_ioport_read(&hw->io, dst, size,
+                       rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
                                VIRTIO_PCI_CONFIG(hw) + offset);
                        *(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
                } else {
                        size = 1;
-                       rte_eal_pci_ioport_read(&hw->io, dst, size,
+                       rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
                                VIRTIO_PCI_CONFIG(hw) + offset);
                }
 
@@ -111,7 +113,7 @@ legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
                length -= size;
        }
 #else
-       rte_eal_pci_ioport_read(&hw->io, dst, length,
+       rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, length,
                                VIRTIO_PCI_CONFIG(hw) + offset);
 #endif
 }
@@ -131,16 +133,16 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
                if (length >= 4) {
                        size = 4;
                        tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
-                       rte_eal_pci_ioport_write(&hw->io, &tmp.u32, size,
+                       rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
                                VIRTIO_PCI_CONFIG(hw) + offset);
                } else if (length >= 2) {
                        size = 2;
                        tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
-                       rte_eal_pci_ioport_write(&hw->io, &tmp.u16, size,
+                       rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
                                VIRTIO_PCI_CONFIG(hw) + offset);
                } else {
                        size = 1;
-                       rte_eal_pci_ioport_write(&hw->io, src, size,
+                       rte_eal_pci_ioport_write(VTPCI_IO(hw), src, size,
                                VIRTIO_PCI_CONFIG(hw) + offset);
                }
 
@@ -149,7 +151,7 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
                length -= size;
        }
 #else
-       rte_eal_pci_ioport_write(&hw->io, src, length,
+       rte_eal_pci_ioport_write(VTPCI_IO(hw), src, length,
                                 VIRTIO_PCI_CONFIG(hw) + offset);
 #endif
 }
@@ -159,7 +161,8 @@ legacy_get_features(struct virtio_hw *hw)
 {
        uint32_t dst;
 
-       rte_eal_pci_ioport_read(&hw->io, &dst, 4, VIRTIO_PCI_HOST_FEATURES);
+       rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 4,
+                               VIRTIO_PCI_HOST_FEATURES);
        return dst;
 }
 
@@ -171,7 +174,7 @@ legacy_set_features(struct virtio_hw *hw, uint64_t features)
                        "only 32 bit features are allowed for legacy virtio!");
                return;
        }
-       rte_eal_pci_ioport_write(&hw->io, &features, 4,
+       rte_eal_pci_ioport_write(VTPCI_IO(hw), &features, 4,
                                 VIRTIO_PCI_GUEST_FEATURES);
 }
 
@@ -180,14 +183,14 @@ legacy_get_status(struct virtio_hw *hw)
 {
        uint8_t dst;
 
-       rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_STATUS);
+       rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
        return dst;
 }
 
 static void
 legacy_set_status(struct virtio_hw *hw, uint8_t status)
 {
-       rte_eal_pci_ioport_write(&hw->io, &status, 1, VIRTIO_PCI_STATUS);
+       rte_eal_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
 }
 
 static void
@@ -201,7 +204,7 @@ legacy_get_isr(struct virtio_hw *hw)
 {
        uint8_t dst;
 
-       rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_ISR);
+       rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
        return dst;
 }
 
@@ -211,8 +214,23 @@ legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
 {
        uint16_t dst;
 
-       rte_eal_pci_ioport_write(&hw->io, &vec, 2, VIRTIO_MSI_CONFIG_VECTOR);
-       rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_MSI_CONFIG_VECTOR);
+       rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2,
+                                VIRTIO_MSI_CONFIG_VECTOR);
+       rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2,
+                               VIRTIO_MSI_CONFIG_VECTOR);
+       return dst;
+}
+
+static uint16_t
+legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
+{
+       uint16_t dst;
+
+       rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
+                                VIRTIO_PCI_QUEUE_SEL);
+       rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2,
+                                VIRTIO_MSI_QUEUE_VECTOR);
+       rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR);
        return dst;
 }
 
@@ -221,8 +239,9 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 {
        uint16_t dst;
 
-       rte_eal_pci_ioport_write(&hw->io, &queue_id, 2, VIRTIO_PCI_QUEUE_SEL);
-       rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_PCI_QUEUE_NUM);
+       rte_eal_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2,
+                                VIRTIO_PCI_QUEUE_SEL);
+       rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM);
        return dst;
 }
 
@@ -234,10 +253,10 @@ legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
        if (!check_vq_phys_addr_ok(vq))
                return -1;
 
-       rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2,
+       rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
                         VIRTIO_PCI_QUEUE_SEL);
        src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
-       rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN);
+       rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
 
        return 0;
 }
@@ -247,15 +266,15 @@ legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
        uint32_t src = 0;
 
-       rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2,
+       rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
                         VIRTIO_PCI_QUEUE_SEL);
-       rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN);
+       rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
 }
 
 static void
 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-       rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2,
+       rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
                         VIRTIO_PCI_QUEUE_NOTIFY);
 }
 
@@ -289,7 +308,7 @@ static int
 legacy_virtio_resource_init(struct rte_pci_device *pci_dev,
                            struct virtio_hw *hw, uint32_t *dev_flags)
 {
-       if (rte_eal_pci_ioport_map(pci_dev, 0, &hw->io) < 0)
+       if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
                return -1;
 
        if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN)
@@ -300,7 +319,7 @@ legacy_virtio_resource_init(struct rte_pci_device *pci_dev,
        return 0;
 }
 
-static const struct virtio_pci_ops legacy_ops = {
+const struct virtio_pci_ops legacy_ops = {
        .read_dev_cfg   = legacy_read_dev_config,
        .write_dev_cfg  = legacy_write_dev_config,
        .reset          = legacy_reset,
@@ -310,54 +329,18 @@ static const struct virtio_pci_ops legacy_ops = {
        .set_features   = legacy_set_features,
        .get_isr        = legacy_get_isr,
        .set_config_irq = legacy_set_config_irq,
+       .set_queue_irq  = legacy_set_queue_irq,
        .get_queue_num  = legacy_get_queue_num,
        .setup_queue    = legacy_setup_queue,
        .del_queue      = legacy_del_queue,
        .notify_queue   = legacy_notify_queue,
 };
 
-
-static inline uint8_t
-io_read8(uint8_t *addr)
-{
-       return *(volatile uint8_t *)addr;
-}
-
-static inline void
-io_write8(uint8_t val, uint8_t *addr)
-{
-       *(volatile uint8_t *)addr = val;
-}
-
-static inline uint16_t
-io_read16(uint16_t *addr)
-{
-       return *(volatile uint16_t *)addr;
-}
-
-static inline void
-io_write16(uint16_t val, uint16_t *addr)
-{
-       *(volatile uint16_t *)addr = val;
-}
-
-static inline uint32_t
-io_read32(uint32_t *addr)
-{
-       return *(volatile uint32_t *)addr;
-}
-
-static inline void
-io_write32(uint32_t val, uint32_t *addr)
-{
-       *(volatile uint32_t *)addr = val;
-}
-
 static inline void
 io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
 {
-       io_write32(val & ((1ULL << 32) - 1), lo);
-       io_write32(val >> 32,                hi);
+       rte_write32(val & ((1ULL << 32) - 1), lo);
+       rte_write32(val >> 32,               hi);
 }
 
 static void
@@ -369,13 +352,13 @@ modern_read_dev_config(struct virtio_hw *hw, size_t offset,
        uint8_t old_gen, new_gen;
 
        do {
-               old_gen = io_read8(&hw->common_cfg->config_generation);
+               old_gen = rte_read8(&hw->common_cfg->config_generation);
 
                p = dst;
                for (i = 0;  i < length; i++)
-                       *p++ = io_read8((uint8_t *)hw->dev_cfg + offset + i);
+                       *p++ = rte_read8((uint8_t *)hw->dev_cfg + offset + i);
 
-               new_gen = io_read8(&hw->common_cfg->config_generation);
+               new_gen = rte_read8(&hw->common_cfg->config_generation);
        } while (old_gen != new_gen);
 }
 
@@ -387,7 +370,7 @@ modern_write_dev_config(struct virtio_hw *hw, size_t offset,
        const uint8_t *p = src;
 
        for (i = 0;  i < length; i++)
-               io_write8(*p++, (uint8_t *)hw->dev_cfg + offset + i);
+               rte_write8((*p++), (((uint8_t *)hw->dev_cfg) + offset + i));
 }
 
 static uint64_t
@@ -395,11 +378,11 @@ modern_get_features(struct virtio_hw *hw)
 {
        uint32_t features_lo, features_hi;
 
-       io_write32(0, &hw->common_cfg->device_feature_select);
-       features_lo = io_read32(&hw->common_cfg->device_feature);
+       rte_write32(0, &hw->common_cfg->device_feature_select);
+       features_lo = rte_read32(&hw->common_cfg->device_feature);
 
-       io_write32(1, &hw->common_cfg->device_feature_select);
-       features_hi = io_read32(&hw->common_cfg->device_feature);
+       rte_write32(1, &hw->common_cfg->device_feature_select);
+       features_hi = rte_read32(&hw->common_cfg->device_feature);
 
        return ((uint64_t)features_hi << 32) | features_lo;
 }
@@ -407,25 +390,25 @@ modern_get_features(struct virtio_hw *hw)
 static void
 modern_set_features(struct virtio_hw *hw, uint64_t features)
 {
-       io_write32(0, &hw->common_cfg->guest_feature_select);
-       io_write32(features & ((1ULL << 32) - 1),
-               &hw->common_cfg->guest_feature);
+       rte_write32(0, &hw->common_cfg->guest_feature_select);
+       rte_write32(features & ((1ULL << 32) - 1),
+                   &hw->common_cfg->guest_feature);
 
-       io_write32(1, &hw->common_cfg->guest_feature_select);
-       io_write32(features >> 32,
-               &hw->common_cfg->guest_feature);
+       rte_write32(1, &hw->common_cfg->guest_feature_select);
+       rte_write32(features >> 32,
+                   &hw->common_cfg->guest_feature);
 }
 
 static uint8_t
 modern_get_status(struct virtio_hw *hw)
 {
-       return io_read8(&hw->common_cfg->device_status);
+       return rte_read8(&hw->common_cfg->device_status);
 }
 
 static void
 modern_set_status(struct virtio_hw *hw, uint8_t status)
 {
-       io_write8(status, &hw->common_cfg->device_status);
+       rte_write8(status, &hw->common_cfg->device_status);
 }
 
 static void
@@ -438,21 +421,29 @@ modern_reset(struct virtio_hw *hw)
 static uint8_t
 modern_get_isr(struct virtio_hw *hw)
 {
-       return io_read8(hw->isr);
+       return rte_read8(hw->isr);
 }
 
 static uint16_t
 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
 {
-       io_write16(vec, &hw->common_cfg->msix_config);
-       return io_read16(&hw->common_cfg->msix_config);
+       rte_write16(vec, &hw->common_cfg->msix_config);
+       return rte_read16(&hw->common_cfg->msix_config);
+}
+
+static uint16_t
+modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec)
+{
+       rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+       rte_write16(vec, &hw->common_cfg->queue_msix_vector);
+       return rte_read16(&hw->common_cfg->queue_msix_vector);
 }
 
 static uint16_t
 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 {
-       io_write16(queue_id, &hw->common_cfg->queue_select);
-       return io_read16(&hw->common_cfg->queue_size);
+       rte_write16(queue_id, &hw->common_cfg->queue_select);
+       return rte_read16(&hw->common_cfg->queue_size);
 }
 
 static int
@@ -470,7 +461,7 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
                                                         ring[vq->vq_nentries]),
                                   VIRTIO_PCI_VRING_ALIGN);
 
-       io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+       rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
 
        io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
                                      &hw->common_cfg->queue_desc_hi);
@@ -479,11 +470,11 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
        io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
                                      &hw->common_cfg->queue_used_hi);
 
-       notify_off = io_read16(&hw->common_cfg->queue_notify_off);
+       notify_off = rte_read16(&hw->common_cfg->queue_notify_off);
        vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
                                notify_off * hw->notify_off_multiplier);
 
-       io_write16(1, &hw->common_cfg->queue_enable);
+       rte_write16(1, &hw->common_cfg->queue_enable);
 
        PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
        PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
@@ -498,7 +489,7 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 static void
 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-       io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+       rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
 
        io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
                                  &hw->common_cfg->queue_desc_hi);
@@ -507,16 +498,16 @@ modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
        io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
                                  &hw->common_cfg->queue_used_hi);
 
-       io_write16(0, &hw->common_cfg->queue_enable);
+       rte_write16(0, &hw->common_cfg->queue_enable);
 }
 
 static void
 modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
 {
-       io_write16(1, vq->notify_addr);
+       rte_write16(1, vq->notify_addr);
 }
 
-static const struct virtio_pci_ops modern_ops = {
+const struct virtio_pci_ops modern_ops = {
        .read_dev_cfg   = modern_read_dev_config,
        .write_dev_cfg  = modern_write_dev_config,
        .reset          = modern_reset,
@@ -526,6 +517,7 @@ static const struct virtio_pci_ops modern_ops = {
        .set_features   = modern_set_features,
        .get_isr        = modern_get_isr,
        .set_config_irq = modern_set_config_irq,
+       .set_queue_irq  = modern_set_queue_irq,
        .get_queue_num  = modern_get_queue_num,
        .setup_queue    = modern_setup_queue,
        .del_queue      = modern_del_queue,
@@ -537,14 +529,14 @@ void
 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
                      void *dst, int length)
 {
-       hw->vtpci_ops->read_dev_cfg(hw, offset, dst, length);
+       VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length);
 }
 
 void
 vtpci_write_dev_config(struct virtio_hw *hw, size_t offset,
                       const void *src, int length)
 {
-       hw->vtpci_ops->write_dev_cfg(hw, offset, src, length);
+       VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length);
 }
 
 uint64_t
@@ -557,7 +549,7 @@ vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features)
         * host all support.
         */
        features = host_features & hw->guest_features;
-       hw->vtpci_ops->set_features(hw, features);
+       VTPCI_OPS(hw)->set_features(hw, features);
 
        return features;
 }
@@ -565,9 +557,9 @@ vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features)
 void
 vtpci_reset(struct virtio_hw *hw)
 {
-       hw->vtpci_ops->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+       VTPCI_OPS(hw)->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
        /* flush status write */
-       hw->vtpci_ops->get_status(hw);
+       VTPCI_OPS(hw)->get_status(hw);
 }
 
 void
@@ -580,29 +572,21 @@ void
 vtpci_set_status(struct virtio_hw *hw, uint8_t status)
 {
        if (status != VIRTIO_CONFIG_STATUS_RESET)
-               status |= hw->vtpci_ops->get_status(hw);
+               status |= VTPCI_OPS(hw)->get_status(hw);
 
-       hw->vtpci_ops->set_status(hw, status);
+       VTPCI_OPS(hw)->set_status(hw, status);
 }
 
 uint8_t
 vtpci_get_status(struct virtio_hw *hw)
 {
-       return hw->vtpci_ops->get_status(hw);
+       return VTPCI_OPS(hw)->get_status(hw);
 }
 
 uint8_t
 vtpci_isr(struct virtio_hw *hw)
 {
-       return hw->vtpci_ops->get_isr(hw);
-}
-
-
-/* Enable one vector (0) for Link State Intrerrupt */
-uint16_t
-vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
-{
-       return hw->vtpci_ops->set_config_irq(hw, vec);
+       return VTPCI_OPS(hw)->get_isr(hw);
 }
 
 static void *
@@ -727,8 +711,6 @@ int
 vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
           uint32_t *dev_flags)
 {
-       hw->dev = dev;
-
        /*
         * Try if we can succeed reading virtio pci caps, which exists
         * only on modern pci device. If failed, we fallback to legacy
@@ -736,8 +718,8 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
         */
        if (virtio_read_caps(dev, hw) == 0) {
                PMD_INIT_LOG(INFO, "modern virtio pci detected.");
-               hw->vtpci_ops = &modern_ops;
-               hw->modern    = 1;
+               virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops;
+               hw->modern = 1;
                *dev_flags |= RTE_ETH_DEV_INTR_LSC;
                return 0;
        }
@@ -745,8 +727,9 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
        PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
        if (legacy_virtio_resource_init(dev, hw, dev_flags) < 0) {
                if (dev->kdrv == RTE_KDRV_UNKNOWN &&
-                   (!dev->devargs ||
-                    dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)) {
+                   (!dev->device.devargs ||
+                    dev->device.devargs->type !=
+                       RTE_DEVTYPE_WHITELISTED_PCI)) {
                        PMD_INIT_LOG(INFO,
                                "skip kernel managed virtio device.");
                        return 1;
@@ -754,7 +737,7 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
                return -1;
        }
 
-       hw->vtpci_ops = &legacy_ops;
+       virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops;
        hw->use_msix = legacy_virtio_has_msix(&dev->addr);
        hw->modern   = 0;
 
index dd7693f..59e45c4 100644 (file)
@@ -44,8 +44,8 @@ struct virtnet_ctl;
 
 /* VirtIO PCI vendor/device ID. */
 #define VIRTIO_PCI_VENDORID     0x1AF4
-#define VIRTIO_PCI_DEVICEID_MIN 0x1000
-#define VIRTIO_PCI_DEVICEID_MAX 0x103F
+#define VIRTIO_PCI_LEGACY_DEVICEID_NET 0x1000
+#define VIRTIO_PCI_MODERN_DEVICEID_NET 0x1041
 
 /* VirtIO ABI version, this must match exactly. */
 #define VIRTIO_PCI_ABI_VERSION 0
@@ -138,6 +138,7 @@ struct virtnet_ctl;
 #define VIRTIO_RING_F_INDIRECT_DESC    28
 
 #define VIRTIO_F_VERSION_1             32
+#define VIRTIO_F_IOMMU_PLATFORM        33
 
 /*
  * Some VirtIO feature bits (currently bits 28 through 31) are
@@ -145,7 +146,7 @@ struct virtnet_ctl;
  * rest are per-device feature bits.
  */
 #define VIRTIO_TRANSPORT_F_START 28
-#define VIRTIO_TRANSPORT_F_END   32
+#define VIRTIO_TRANSPORT_F_END   34
 
 /* The Guest publishes the used index for which it expects an interrupt
  * at the end of the avail ring. Host should ignore the avail->flags field. */
@@ -234,6 +235,9 @@ struct virtio_pci_ops {
 
        uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec);
 
+       uint16_t (*set_queue_irq)(struct virtio_hw *hw, struct virtqueue *vq,
+                       uint16_t vec);
+
        uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id);
        int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq);
        void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq);
@@ -244,26 +248,43 @@ struct virtio_net_config;
 
 struct virtio_hw {
        struct virtnet_ctl *cvq;
-       struct rte_pci_ioport io;
+       uint64_t    req_guest_features;
        uint64_t    guest_features;
-       uint32_t    max_tx_queues;
-       uint32_t    max_rx_queues;
+       uint32_t    max_queue_pairs;
        uint16_t    vtnet_hdr_size;
        uint8_t     vlan_strip;
        uint8_t     use_msix;
-       uint8_t     started;
        uint8_t     modern;
+       uint8_t     use_simple_rxtx;
+       uint8_t     port_id;
        uint8_t     mac_addr[ETHER_ADDR_LEN];
        uint32_t    notify_off_multiplier;
        uint8_t     *isr;
        uint16_t    *notify_base;
-       struct rte_pci_device *dev;
        struct virtio_pci_common_cfg *common_cfg;
        struct virtio_net_config *dev_cfg;
-       const struct virtio_pci_ops *vtpci_ops;
        void        *virtio_user_dev;
+
+       struct virtqueue **vqs;
+};
+
+
+/*
+ * While virtio_hw is stored in shared memory, this structure stores
+ * some infos that may vary in the multiple process model locally.
+ * For example, the vtpci_ops pointer.
+ */
+struct virtio_hw_internal {
+       const struct virtio_pci_ops *vtpci_ops;
+       struct rte_pci_ioport io;
 };
 
+#define VTPCI_OPS(hw)  (virtio_hw_internal[(hw)->port_id].vtpci_ops)
+#define VTPCI_IO(hw)   (&virtio_hw_internal[(hw)->port_id].io)
+
+extern struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
+
+
 /*
  * This structure is just a reference to read
  * net device specific config space; it just a chodu structure
@@ -312,6 +333,8 @@ void vtpci_read_dev_config(struct virtio_hw *, size_t, void *, int);
 
 uint8_t vtpci_isr(struct virtio_hw *);
 
-uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t);
+extern const struct virtio_pci_ops legacy_ops;
+extern const struct virtio_pci_ops modern_ops;
+extern const struct virtio_pci_ops virtio_user_ops;
 
 #endif /* _VIRTIO_PCI_H_ */
index 724517e..cab6e8f 100644 (file)
 #include <rte_string_fns.h>
 #include <rte_errno.h>
 #include <rte_byteorder.h>
+#include <rte_cpuflags.h>
+#include <rte_net.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
 
 #include "virtio_logs.h"
 #include "virtio_ethdev.h"
 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
        ETH_TXQ_FLAGS_NOOFFLOADS)
 
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
-static int use_simple_rxtx;
-#endif
+int
+virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
+{
+       struct virtnet_rx *rxvq = rxq;
+       struct virtqueue *vq = rxvq->vq;
+
+       return VIRTQUEUE_NUSED(vq) >= offset;
+}
 
 static void
 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
@@ -123,7 +133,7 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
                cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
 
                if (unlikely(cookie == NULL)) {
-                       PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
+                       PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
                                vq->vq_used_cons_idx);
                        break;
                }
@@ -208,18 +218,76 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
        return 0;
 }
 
+/* When doing TSO, the IP length is not included in the pseudo header
+ * checksum of the packet given to the PMD, but for virtio it is
+ * expected.
+ */
+static void
+virtio_tso_fix_cksum(struct rte_mbuf *m)
+{
+       /* common case: header is not fragmented */
+       if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
+                       m->l4_len)) {
+               struct ipv4_hdr *iph;
+               struct ipv6_hdr *ip6h;
+               struct tcp_hdr *th;
+               uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
+               uint32_t tmp;
+
+               iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
+               th = RTE_PTR_ADD(iph, m->l3_len);
+               if ((iph->version_ihl >> 4) == 4) {
+                       iph->hdr_checksum = 0;
+                       iph->hdr_checksum = rte_ipv4_cksum(iph);
+                       ip_len = iph->total_length;
+                       ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
+                               m->l3_len);
+               } else {
+                       ip6h = (struct ipv6_hdr *)iph;
+                       ip_paylen = ip6h->payload_len;
+               }
+
+               /* calculate the new phdr checksum not including ip_paylen */
+               prev_cksum = th->cksum;
+               tmp = prev_cksum;
+               tmp += ip_paylen;
+               tmp = (tmp & 0xffff) + (tmp >> 16);
+               new_cksum = tmp;
+
+               /* replace it in the packet */
+               th->cksum = new_cksum;
+       }
+}
+
+static inline int
+tx_offload_enabled(struct virtio_hw *hw)
+{
+       return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
+               vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
+               vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
+}
+
+/* avoid write operation when necessary, to lessen cache issues */
+#define ASSIGN_UNLESS_EQUAL(var, val) do {     \
+       if ((var) != (val))                     \
+               (var) = (val);                  \
+} while (0)
+
 static inline void
 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
                       uint16_t needed, int use_indirect, int can_push)
 {
+       struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
        struct vq_desc_extra *dxp;
        struct virtqueue *vq = txvq->vq;
        struct vring_desc *start_dp;
        uint16_t seg_num = cookie->nb_segs;
        uint16_t head_idx, idx;
        uint16_t head_size = vq->hw->vtnet_hdr_size;
-       unsigned long offs;
+       struct virtio_net_hdr *hdr;
+       int offload;
 
+       offload = tx_offload_enabled(vq->hw);
        head_idx = vq->vq_desc_head_idx;
        idx = head_idx;
        dxp = &vq->vq_descx[idx];
@@ -229,10 +297,18 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
        start_dp = vq->vq_ring.desc;
 
        if (can_push) {
-               /* put on zero'd transmit header (no offloads) */
-               void *hdr = rte_pktmbuf_prepend(cookie, head_size);
-
-               memset(hdr, 0, head_size);
+               /* prepend cannot fail, checked by caller */
+               hdr = (struct virtio_net_hdr *)
+                       rte_pktmbuf_prepend(cookie, head_size);
+               /* if offload disabled, it is not zeroed below, do it now */
+               if (offload == 0) {
+                       ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
+                       ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
+                       ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
+                       ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
+                       ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
+                       ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
+               }
        } else if (use_indirect) {
                /* setup tx ring slot to point to indirect
                 * descriptor list stored in reserved region.
@@ -240,14 +316,11 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
                 * the first slot in indirect ring is already preset
                 * to point to the header in reserved region
                 */
-               struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
-
-               offs = idx * sizeof(struct virtio_tx_region)
-                       + offsetof(struct virtio_tx_region, tx_indir);
-
-               start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
+               start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
+                       RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
                start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
                start_dp[idx].flags = VRING_DESC_F_INDIRECT;
+               hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
 
                /* loop below will fill in rest of the indirect elements */
                start_dp = txr[idx].tx_indir;
@@ -256,15 +329,59 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
                /* setup first tx ring slot to point to header
                 * stored in reserved region.
                 */
-               offs = idx * sizeof(struct virtio_tx_region)
-                       + offsetof(struct virtio_tx_region, tx_hdr);
-
-               start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
+               start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
+                       RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
                start_dp[idx].len   = vq->hw->vtnet_hdr_size;
                start_dp[idx].flags = VRING_DESC_F_NEXT;
+               hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
+
                idx = start_dp[idx].next;
        }
 
+       /* Checksum Offload / TSO */
+       if (offload) {
+               if (cookie->ol_flags & PKT_TX_TCP_SEG)
+                       cookie->ol_flags |= PKT_TX_TCP_CKSUM;
+
+               switch (cookie->ol_flags & PKT_TX_L4_MASK) {
+               case PKT_TX_UDP_CKSUM:
+                       hdr->csum_start = cookie->l2_len + cookie->l3_len;
+                       hdr->csum_offset = offsetof(struct udp_hdr,
+                               dgram_cksum);
+                       hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+                       break;
+
+               case PKT_TX_TCP_CKSUM:
+                       hdr->csum_start = cookie->l2_len + cookie->l3_len;
+                       hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
+                       hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+                       break;
+
+               default:
+                       ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
+                       ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
+                       ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
+                       break;
+               }
+
+               /* TCP Segmentation Offload */
+               if (cookie->ol_flags & PKT_TX_TCP_SEG) {
+                       virtio_tso_fix_cksum(cookie);
+                       hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
+                               VIRTIO_NET_HDR_GSO_TCPV6 :
+                               VIRTIO_NET_HDR_GSO_TCPV4;
+                       hdr->gso_size = cookie->tso_segsz;
+                       hdr->hdr_len =
+                               cookie->l2_len +
+                               cookie->l3_len +
+                               cookie->l4_len;
+               } else {
+                       ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
+                       ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
+                       ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
+               }
+       }
+
        do {
                start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
                start_dp[idx].len   = cookie->data_len;
@@ -282,207 +399,120 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
        vq_update_avail_ring(vq, head_idx);
 }
 
-static void
-virtio_dev_vring_start(struct virtqueue *vq)
-{
-       int size = vq->vq_nentries;
-       struct vring *vr = &vq->vq_ring;
-       uint8_t *ring_mem = vq->vq_ring_virt_mem;
-
-       PMD_INIT_FUNC_TRACE();
-
-       /*
-        * Reinitialise since virtio port might have been stopped and restarted
-        */
-       memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
-       vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
-       vq->vq_used_cons_idx = 0;
-       vq->vq_desc_head_idx = 0;
-       vq->vq_avail_idx = 0;
-       vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
-       vq->vq_free_cnt = vq->vq_nentries;
-       memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
-
-       vring_desc_init(vr->desc, size);
-
-       /*
-        * Disable device(host) interrupting guest
-        */
-       virtqueue_disable_intr(vq);
-}
-
 void
 virtio_dev_cq_start(struct rte_eth_dev *dev)
 {
        struct virtio_hw *hw = dev->data->dev_private;
 
        if (hw->cvq && hw->cvq->vq) {
-               virtio_dev_vring_start(hw->cvq->vq);
                VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
        }
 }
 
-void
-virtio_dev_rxtx_start(struct rte_eth_dev *dev)
+int
+virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
+                       uint16_t queue_idx,
+                       uint16_t nb_desc,
+                       unsigned int socket_id __rte_unused,
+                       __rte_unused const struct rte_eth_rxconf *rx_conf,
+                       struct rte_mempool *mp)
 {
-       /*
-        * Start receive and transmit vrings
-        * -    Setup vring structure for all queues
-        * -    Initialize descriptor for the rx vring
-        * -    Allocate blank mbufs for the each rx descriptor
-        *
-        */
-       uint16_t i;
+       uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
+       struct virtio_hw *hw = dev->data->dev_private;
+       struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
+       struct virtnet_rx *rxvq;
+       int error, nbufs;
+       struct rte_mbuf *m;
        uint16_t desc_idx;
 
        PMD_INIT_FUNC_TRACE();
 
-       /* Start rx vring. */
-       for (i = 0; i < dev->data->nb_rx_queues; i++) {
-               struct virtnet_rx *rxvq = dev->data->rx_queues[i];
-               struct virtqueue *vq = rxvq->vq;
-               int error, nbufs;
-               struct rte_mbuf *m;
-
-               virtio_dev_vring_start(vq);
-               if (rxvq->mpool == NULL) {
-                       rte_exit(EXIT_FAILURE,
-                               "Cannot allocate mbufs for rx virtqueue");
-               }
-
-               /* Allocate blank mbufs for the each rx descriptor */
-               nbufs = 0;
-               error = ENOSPC;
-
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
-               if (use_simple_rxtx) {
-                       for (desc_idx = 0; desc_idx < vq->vq_nentries;
-                            desc_idx++) {
-                               vq->vq_ring.avail->ring[desc_idx] = desc_idx;
-                               vq->vq_ring.desc[desc_idx].flags =
-                                       VRING_DESC_F_WRITE;
-                       }
-               }
-#endif
-               memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
-               for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
-                    desc_idx++) {
-                       vq->sw_ring[vq->vq_nentries + desc_idx] =
-                               &rxvq->fake_mbuf;
-               }
-
-               while (!virtqueue_full(vq)) {
-                       m = rte_mbuf_raw_alloc(rxvq->mpool);
-                       if (m == NULL)
-                               break;
+       if (nb_desc == 0 || nb_desc > vq->vq_nentries)
+               nb_desc = vq->vq_nentries;
+       vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
 
-                       /******************************************
-                       *         Enqueue allocated buffers        *
-                       *******************************************/
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
-                       if (use_simple_rxtx)
-                               error = virtqueue_enqueue_recv_refill_simple(vq, m);
-                       else
-#endif
-                               error = virtqueue_enqueue_recv_refill(vq, m);
-                       if (error) {
-                               rte_pktmbuf_free(m);
-                               break;
-                       }
-                       nbufs++;
-               }
+       rxvq = &vq->rxq;
+       rxvq->queue_id = queue_idx;
+       rxvq->mpool = mp;
+       if (rxvq->mpool == NULL) {
+               rte_exit(EXIT_FAILURE,
+                       "Cannot allocate mbufs for rx virtqueue");
+       }
+       dev->data->rx_queues[queue_idx] = rxvq;
 
-               vq_update_avail_idx(vq);
 
-               PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
+       /* Allocate blank mbufs for the each rx descriptor */
+       nbufs = 0;
+       error = ENOSPC;
 
-               VIRTQUEUE_DUMP(vq);
+       if (hw->use_simple_rxtx) {
+               for (desc_idx = 0; desc_idx < vq->vq_nentries;
+                    desc_idx++) {
+                       vq->vq_ring.avail->ring[desc_idx] = desc_idx;
+                       vq->vq_ring.desc[desc_idx].flags =
+                               VRING_DESC_F_WRITE;
+               }
        }
 
-       /* Start tx vring. */
-       for (i = 0; i < dev->data->nb_tx_queues; i++) {
-               struct virtnet_tx *txvq = dev->data->tx_queues[i];
-               struct virtqueue *vq = txvq->vq;
-
-               virtio_dev_vring_start(vq);
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
-               if (use_simple_rxtx) {
-                       uint16_t mid_idx  = vq->vq_nentries >> 1;
-
-                       for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
-                               vq->vq_ring.avail->ring[desc_idx] =
-                                       desc_idx + mid_idx;
-                               vq->vq_ring.desc[desc_idx + mid_idx].next =
-                                       desc_idx;
-                               vq->vq_ring.desc[desc_idx + mid_idx].addr =
-                                       txvq->virtio_net_hdr_mem +
-                                       offsetof(struct virtio_tx_region, tx_hdr);
-                               vq->vq_ring.desc[desc_idx + mid_idx].len =
-                                       vq->hw->vtnet_hdr_size;
-                               vq->vq_ring.desc[desc_idx + mid_idx].flags =
-                                       VRING_DESC_F_NEXT;
-                               vq->vq_ring.desc[desc_idx].flags = 0;
-                       }
-                       for (desc_idx = mid_idx; desc_idx < vq->vq_nentries;
-                            desc_idx++)
-                               vq->vq_ring.avail->ring[desc_idx] = desc_idx;
-               }
-#endif
-               VIRTQUEUE_DUMP(vq);
+       memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
+       for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
+            desc_idx++) {
+               vq->sw_ring[vq->vq_nentries + desc_idx] =
+                       &rxvq->fake_mbuf;
        }
-}
 
-int
-virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
-                       uint16_t queue_idx,
-                       uint16_t nb_desc,
-                       unsigned int socket_id,
-                       __rte_unused const struct rte_eth_rxconf *rx_conf,
-                       struct rte_mempool *mp)
-{
-       uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
-       struct virtnet_rx *rxvq;
-       int ret;
+       while (!virtqueue_full(vq)) {
+               m = rte_mbuf_raw_alloc(rxvq->mpool);
+               if (m == NULL)
+                       break;
 
-       PMD_INIT_FUNC_TRACE();
-       ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
-                       nb_desc, socket_id, (void **)&rxvq);
-       if (ret < 0) {
-               PMD_INIT_LOG(ERR, "rvq initialization failed");
-               return ret;
+               /* Enqueue allocated buffers */
+               if (hw->use_simple_rxtx)
+                       error = virtqueue_enqueue_recv_refill_simple(vq, m);
+               else
+                       error = virtqueue_enqueue_recv_refill(vq, m);
+
+               if (error) {
+                       rte_pktmbuf_free(m);
+                       break;
+               }
+               nbufs++;
        }
 
-       /* Create mempool for rx mbuf allocation */
-       rxvq->mpool = mp;
+       vq_update_avail_idx(vq);
 
-       dev->data->rx_queues[queue_idx] = rxvq;
+       PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
 
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
        virtio_rxq_vec_setup(rxvq);
-#endif
+
+       VIRTQUEUE_DUMP(vq);
 
        return 0;
 }
 
-void
-virtio_dev_rx_queue_release(void *rxq)
+static void
+virtio_update_rxtx_handler(struct rte_eth_dev *dev,
+                          const struct rte_eth_txconf *tx_conf)
 {
-       struct virtnet_rx *rxvq = rxq;
-       struct virtqueue *vq;
-       const struct rte_memzone *mz;
-
-       if (rxvq == NULL)
-               return;
-
-       /*
-        * rxvq is freed when vq is freed, and as mz should be freed after the
-        * del_queue, so we reserve the mz pointer first.
-        */
-       vq = rxvq->vq;
-       mz = rxvq->mz;
+       uint8_t use_simple_rxtx = 0;
+       struct virtio_hw *hw = dev->data->dev_private;
 
-       virtio_dev_queue_release(vq);
-       rte_memzone_free(mz);
+#if defined RTE_ARCH_X86
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3))
+               use_simple_rxtx = 1;
+#elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
+               use_simple_rxtx = 1;
+#endif
+       /* Use simple rx/tx func if single segment and no offloads */
+       if (use_simple_rxtx &&
+           (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
+           !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
+               PMD_INIT_LOG(INFO, "Using simple rx/tx path");
+               dev->tx_pkt_burst = virtio_xmit_pkts_simple;
+               dev->rx_pkt_burst = virtio_recv_pkts_vec;
+               hw->use_simple_rxtx = use_simple_rxtx;
+       }
 }
 
 /*
@@ -496,45 +526,26 @@ int
 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
                        uint16_t queue_idx,
                        uint16_t nb_desc,
-                       unsigned int socket_id,
+                       unsigned int socket_id __rte_unused,
                        const struct rte_eth_txconf *tx_conf)
 {
        uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
-
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
        struct virtio_hw *hw = dev->data->dev_private;
-#endif
+       struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
        struct virtnet_tx *txvq;
-       struct virtqueue *vq;
        uint16_t tx_free_thresh;
-       int ret;
+       uint16_t desc_idx;
 
        PMD_INIT_FUNC_TRACE();
 
-       if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
-           != ETH_TXQ_FLAGS_NOXSUMS) {
-               PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
-               return -EINVAL;
-       }
+       virtio_update_rxtx_handler(dev, tx_conf);
 
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
-       /* Use simple rx/tx func if single segment and no offloads */
-       if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
-            !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
-               PMD_INIT_LOG(INFO, "Using simple rx/tx path");
-               dev->tx_pkt_burst = virtio_xmit_pkts_simple;
-               dev->rx_pkt_burst = virtio_recv_pkts_vec;
-               use_simple_rxtx = 1;
-       }
-#endif
+       if (nb_desc == 0 || nb_desc > vq->vq_nentries)
+               nb_desc = vq->vq_nentries;
+       vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
 
-       ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
-                       nb_desc, socket_id, (void **)&txvq);
-       if (ret < 0) {
-               PMD_INIT_LOG(ERR, "tvq initialization failed");
-               return ret;
-       }
-       vq = txvq->vq;
+       txvq = &vq->txq;
+       txvq->queue_id = queue_idx;
 
        tx_free_thresh = tx_conf->tx_free_thresh;
        if (tx_free_thresh == 0)
@@ -552,32 +563,32 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
        vq->vq_free_thresh = tx_free_thresh;
 
-       dev->data->tx_queues[queue_idx] = txvq;
-       return 0;
-}
-
-void
-virtio_dev_tx_queue_release(void *txq)
-{
-       struct virtnet_tx *txvq = txq;
-       struct virtqueue *vq;
-       const struct rte_memzone *mz;
-       const struct rte_memzone *hdr_mz;
-
-       if (txvq == NULL)
-               return;
+       if (hw->use_simple_rxtx) {
+               uint16_t mid_idx  = vq->vq_nentries >> 1;
+
+               for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
+                       vq->vq_ring.avail->ring[desc_idx] =
+                               desc_idx + mid_idx;
+                       vq->vq_ring.desc[desc_idx + mid_idx].next =
+                               desc_idx;
+                       vq->vq_ring.desc[desc_idx + mid_idx].addr =
+                               txvq->virtio_net_hdr_mem +
+                               offsetof(struct virtio_tx_region, tx_hdr);
+                       vq->vq_ring.desc[desc_idx + mid_idx].len =
+                               vq->hw->vtnet_hdr_size;
+                       vq->vq_ring.desc[desc_idx + mid_idx].flags =
+                               VRING_DESC_F_NEXT;
+                       vq->vq_ring.desc[desc_idx].flags = 0;
+               }
+               for (desc_idx = mid_idx; desc_idx < vq->vq_nentries;
+                    desc_idx++)
+                       vq->vq_ring.avail->ring[desc_idx] = desc_idx;
+       }
 
-       /*
-        * txvq is freed when vq is freed, and as mz should be freed after the
-        * del_queue, so we reserve the mz pointer first.
-        */
-       vq = txvq->vq;
-       mz = txvq->mz;
-       hdr_mz = txvq->virtio_net_hdr_mz;
+       VIRTQUEUE_DUMP(vq);
 
-       virtio_dev_queue_release(vq);
-       rte_memzone_free(mz);
-       rte_memzone_free(hdr_mz);
+       dev->data->tx_queues[queue_idx] = txvq;
+       return 0;
 }
 
 static void
@@ -627,6 +638,86 @@ virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
        }
 }
 
+/* Optionally fill offload information in structure */
+static int
+virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
+{
+       struct rte_net_hdr_lens hdr_lens;
+       uint32_t hdrlen, ptype;
+       int l4_supported = 0;
+
+       /* nothing to do */
+       if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
+               return 0;
+
+       m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
+
+       ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
+       m->packet_type = ptype;
+       if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
+           (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
+           (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
+               l4_supported = 1;
+
+       if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+               hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
+               if (hdr->csum_start <= hdrlen && l4_supported) {
+                       m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
+               } else {
+                       /* Unknown proto or tunnel, do sw cksum. We can assume
+                        * the cksum field is in the first segment since the
+                        * buffers we provided to the host are large enough.
+                        * In case of SCTP, this will be wrong since it's a CRC
+                        * but there's nothing we can do.
+                        */
+                       uint16_t csum, off;
+
+                       rte_raw_cksum_mbuf(m, hdr->csum_start,
+                               rte_pktmbuf_pkt_len(m) - hdr->csum_start,
+                               &csum);
+                       if (likely(csum != 0xffff))
+                               csum = ~csum;
+                       off = hdr->csum_offset + hdr->csum_start;
+                       if (rte_pktmbuf_data_len(m) >= off + 1)
+                               *rte_pktmbuf_mtod_offset(m, uint16_t *,
+                                       off) = csum;
+               }
+       } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
+               m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+       }
+
+       /* GSO request, save required information in mbuf */
+       if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+               /* Check unsupported modes */
+               if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
+                   (hdr->gso_size == 0)) {
+                       return -EINVAL;
+               }
+
+               /* Update mss lengthes in mbuf */
+               m->tso_segsz = hdr->gso_size;
+               switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+                       case VIRTIO_NET_HDR_GSO_TCPV4:
+                       case VIRTIO_NET_HDR_GSO_TCPV6:
+                               m->ol_flags |= PKT_RX_LRO | \
+                                       PKT_RX_L4_CKSUM_NONE;
+                               break;
+                       default:
+                               return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static inline int
+rx_offload_enabled(struct virtio_hw *hw)
+{
+       return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
+               vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
+               vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
+}
+
 #define VIRTIO_MBUF_BURST_SZ 64
 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
 uint16_t
@@ -642,6 +733,8 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        int error;
        uint32_t i, nb_enqueued;
        uint32_t hdr_size;
+       int offload;
+       struct virtio_net_hdr *hdr;
 
        nb_used = VIRTQUEUE_NUSED(vq);
 
@@ -659,6 +752,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        nb_rx = 0;
        nb_enqueued = 0;
        hdr_size = hw->vtnet_hdr_size;
+       offload = rx_offload_enabled(hw);
 
        for (i = 0; i < num ; i++) {
                rxm = rcv_pkts[i];
@@ -683,9 +777,18 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
                rxm->data_len = (uint16_t)(len[i] - hdr_size);
 
+               hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
+                       RTE_PKTMBUF_HEADROOM - hdr_size);
+
                if (hw->vlan_strip)
                        rte_vlan_strip(rxm);
 
+               if (offload && virtio_rx_offload(rxm, hdr) < 0) {
+                       virtio_discard_rxbuf(vq, rxm);
+                       rxvq->stats.errors++;
+                       continue;
+               }
+
                VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
 
                rx_pkts[nb_rx++] = rxm;
@@ -745,6 +848,7 @@ virtio_recv_mergeable_pkts(void *rx_queue,
        uint16_t extra_idx;
        uint32_t seg_res;
        uint32_t hdr_size;
+       int offload;
 
        nb_used = VIRTQUEUE_NUSED(vq);
 
@@ -760,6 +864,7 @@ virtio_recv_mergeable_pkts(void *rx_queue,
        extra_idx = 0;
        seg_res = 0;
        hdr_size = hw->vtnet_hdr_size;
+       offload = rx_offload_enabled(hw);
 
        while (i < nb_used) {
                struct virtio_net_hdr_mrg_rxbuf *header;
@@ -805,6 +910,12 @@ virtio_recv_mergeable_pkts(void *rx_queue,
                rx_pkts[nb_rx] = rxm;
                prev = rxm;
 
+               if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) {
+                       virtio_discard_rxbuf(vq, rxm);
+                       rxvq->stats.errors++;
+                       continue;
+               }
+
                seg_res = seg_num - 1;
 
                while (seg_res != 0) {
@@ -925,7 +1036,8 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                }
 
                /* optimize ring usage */
-               if (vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) &&
+               if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
+                     vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
                    rte_mbuf_refcnt_read(txm) == 1 &&
                    RTE_MBUF_DIRECT(txm) &&
                    txm->nb_segs == 1 &&
index 058b56a..28f82d6 100644 (file)
@@ -86,10 +86,9 @@ struct virtnet_ctl {
        const struct rte_memzone *mz;   /**< mem zone to populate RX ring. */
 };
 
-#ifdef RTE_MACHINE_CPUFLAG_SSSE3
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
 
 int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
        struct rte_mbuf *m);
-#endif
+
 #endif /* _VIRTIO_RXTX_H_ */
index 6517aa8..b651e53 100644 (file)
@@ -37,8 +37,6 @@
 #include <string.h>
 #include <errno.h>
 
-#include <tmmintrin.h>
-
 #include <rte_cycles.h>
 #include <rte_memory.h>
 #include <rte_memzone.h>
 #include <rte_errno.h>
 #include <rte_byteorder.h>
 
-#include "virtio_logs.h"
-#include "virtio_ethdev.h"
-#include "virtqueue.h"
-#include "virtio_rxtx.h"
-
-#define RTE_VIRTIO_VPMD_RX_BURST 32
-#define RTE_VIRTIO_DESC_PER_LOOP 8
-#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+#include "virtio_rxtx_simple.h"
 
 #ifndef __INTEL_COMPILER
 #pragma GCC diagnostic ignored "-Wcast-qual"
@@ -92,257 +83,6 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
        return 0;
 }
 
-static inline void
-virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
-{
-       int i;
-       uint16_t desc_idx;
-       struct rte_mbuf **sw_ring;
-       struct vring_desc *start_dp;
-       int ret;
-       struct virtqueue *vq = rxvq->vq;
-
-       desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
-       sw_ring = &vq->sw_ring[desc_idx];
-       start_dp = &vq->vq_ring.desc[desc_idx];
-
-       ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
-               RTE_VIRTIO_VPMD_RX_REARM_THRESH);
-       if (unlikely(ret)) {
-               rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed +=
-                       RTE_VIRTIO_VPMD_RX_REARM_THRESH;
-               return;
-       }
-
-       for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
-               uintptr_t p;
-
-               p = (uintptr_t)&sw_ring[i]->rearm_data;
-               *(uint64_t *)p = rxvq->mbuf_initializer;
-
-               start_dp[i].addr =
-                       VIRTIO_MBUF_ADDR(sw_ring[i], vq) +
-                       RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size;
-               start_dp[i].len = sw_ring[i]->buf_len -
-                       RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size;
-       }
-
-       vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
-       vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
-       vq_update_avail_idx(vq);
-}
-
-/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
- *
- * This routine is for non-mergeable RX, one desc for each guest buffer.
- * This routine is based on the RX ring layout optimization. Each entry in the
- * avail ring points to the desc with the same index in the desc ring and this
- * will never be changed in the driver.
- *
- * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
- */
-uint16_t
-virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
-       uint16_t nb_pkts)
-{
-       struct virtnet_rx *rxvq = rx_queue;
-       struct virtqueue *vq = rxvq->vq;
-       uint16_t nb_used;
-       uint16_t desc_idx;
-       struct vring_used_elem *rused;
-       struct rte_mbuf **sw_ring;
-       struct rte_mbuf **sw_ring_end;
-       uint16_t nb_pkts_received;
-       __m128i shuf_msk1, shuf_msk2, len_adjust;
-
-       shuf_msk1 = _mm_set_epi8(
-               0xFF, 0xFF, 0xFF, 0xFF,
-               0xFF, 0xFF,             /* vlan tci */
-               5, 4,                   /* dat len */
-               0xFF, 0xFF, 5, 4,       /* pkt len */
-               0xFF, 0xFF, 0xFF, 0xFF  /* packet type */
-
-       );
-
-       shuf_msk2 = _mm_set_epi8(
-               0xFF, 0xFF, 0xFF, 0xFF,
-               0xFF, 0xFF,             /* vlan tci */
-               13, 12,                 /* dat len */
-               0xFF, 0xFF, 13, 12,     /* pkt len */
-               0xFF, 0xFF, 0xFF, 0xFF  /* packet type */
-       );
-
-       /* Subtract the header length.
-       *  In which case do we need the header length in used->len ?
-       */
-       len_adjust = _mm_set_epi16(
-               0, 0,
-               0,
-               (uint16_t)-vq->hw->vtnet_hdr_size,
-               0, (uint16_t)-vq->hw->vtnet_hdr_size,
-               0, 0);
-
-       if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
-               return 0;
-
-       nb_used = VIRTQUEUE_NUSED(vq);
-
-       rte_compiler_barrier();
-
-       if (unlikely(nb_used == 0))
-               return 0;
-
-       nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
-       nb_used = RTE_MIN(nb_used, nb_pkts);
-
-       desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
-       rused = &vq->vq_ring.used->ring[desc_idx];
-       sw_ring  = &vq->sw_ring[desc_idx];
-       sw_ring_end = &vq->sw_ring[vq->vq_nentries];
-
-       _mm_prefetch((const void *)rused, _MM_HINT_T0);
-
-       if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
-               virtio_rxq_rearm_vec(rxvq);
-               if (unlikely(virtqueue_kick_prepare(vq)))
-                       virtqueue_notify(vq);
-       }
-
-       for (nb_pkts_received = 0;
-               nb_pkts_received < nb_used;) {
-               __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
-               __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
-               __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
-
-               mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0));
-               desc[0] = _mm_loadu_si128((__m128i *)(rused + 0));
-               _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]);
-
-               mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2));
-               desc[1] = _mm_loadu_si128((__m128i *)(rused + 2));
-               _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]);
-
-               mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4));
-               desc[2] = _mm_loadu_si128((__m128i *)(rused + 4));
-               _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]);
-
-               mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6));
-               desc[3] = _mm_loadu_si128((__m128i *)(rused + 6));
-               _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]);
-
-               pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2);
-               pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1);
-               pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust);
-               pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust);
-               _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1,
-                       pkt_mb[1]);
-               _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1,
-                       pkt_mb[0]);
-
-               pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2);
-               pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1);
-               pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust);
-               pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust);
-               _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1,
-                       pkt_mb[3]);
-               _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1,
-                       pkt_mb[2]);
-
-               pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2);
-               pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1);
-               pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust);
-               pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust);
-               _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1,
-                       pkt_mb[5]);
-               _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1,
-                       pkt_mb[4]);
-
-               pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2);
-               pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1);
-               pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust);
-               pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust);
-               _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1,
-                       pkt_mb[7]);
-               _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1,
-                       pkt_mb[6]);
-
-               if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
-                       if (sw_ring + nb_used <= sw_ring_end)
-                               nb_pkts_received += nb_used;
-                       else
-                               nb_pkts_received += sw_ring_end - sw_ring;
-                       break;
-               } else {
-                       if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
-                               sw_ring_end)) {
-                               nb_pkts_received += sw_ring_end - sw_ring;
-                               break;
-                       } else {
-                               nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
-
-                               rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
-                               sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
-                               rused   += RTE_VIRTIO_DESC_PER_LOOP;
-                               nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
-                       }
-               }
-       }
-
-       vq->vq_used_cons_idx += nb_pkts_received;
-       vq->vq_free_cnt += nb_pkts_received;
-       rxvq->stats.packets += nb_pkts_received;
-       return nb_pkts_received;
-}
-
-#define VIRTIO_TX_FREE_THRESH 32
-#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
-#define VIRTIO_TX_FREE_NR 32
-/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */
-static inline void
-virtio_xmit_cleanup(struct virtqueue *vq)
-{
-       uint16_t i, desc_idx;
-       uint32_t nb_free = 0;
-       struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
-
-       desc_idx = (uint16_t)(vq->vq_used_cons_idx &
-                  ((vq->vq_nentries >> 1) - 1));
-       m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
-       m = __rte_pktmbuf_prefree_seg(m);
-       if (likely(m != NULL)) {
-               free[0] = m;
-               nb_free = 1;
-               for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
-                       m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
-                       m = __rte_pktmbuf_prefree_seg(m);
-                       if (likely(m != NULL)) {
-                               if (likely(m->pool == free[0]->pool))
-                                       free[nb_free++] = m;
-                               else {
-                                       rte_mempool_put_bulk(free[0]->pool,
-                                               (void **)free,
-                                               RTE_MIN(RTE_DIM(free),
-                                                       nb_free));
-                                       free[0] = m;
-                                       nb_free = 1;
-                               }
-                       }
-               }
-               rte_mempool_put_bulk(free[0]->pool, (void **)free,
-                       RTE_MIN(RTE_DIM(free), nb_free));
-       } else {
-               for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
-                       m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
-                       m = __rte_pktmbuf_prefree_seg(m);
-                       if (m != NULL)
-                               rte_mempool_put(m->pool, m);
-               }
-       }
-
-       vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
-       vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
-}
-
 uint16_t
 virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
        uint16_t nb_pkts)
@@ -423,3 +163,13 @@ virtio_rxq_vec_setup(struct virtnet_rx *rxq)
 
        return 0;
 }
+
+/* Stub for linkage when arch specific implementation is not available */
+uint16_t __attribute__((weak))
+virtio_recv_pkts_vec(void *rx_queue __rte_unused,
+                    struct rte_mbuf **rx_pkts __rte_unused,
+                    uint16_t nb_pkts __rte_unused)
+{
+       rte_panic("Wrong weak function linked by linker\n");
+       return 0;
+}
diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.h b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.h
new file mode 100644 (file)
index 0000000..b08f859
--- /dev/null
@@ -0,0 +1,136 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_RXTX_SIMPLE_H_
+#define _VIRTIO_RXTX_SIMPLE_H_
+
+#include <stdint.h>
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+static inline void
+virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
+{
+       int i;
+       uint16_t desc_idx;
+       struct rte_mbuf **sw_ring;
+       struct vring_desc *start_dp;
+       int ret;
+       struct virtqueue *vq = rxvq->vq;
+
+       desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
+       sw_ring = &vq->sw_ring[desc_idx];
+       start_dp = &vq->vq_ring.desc[desc_idx];
+
+       ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
+               RTE_VIRTIO_VPMD_RX_REARM_THRESH);
+       if (unlikely(ret)) {
+               rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed +=
+                       RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+               return;
+       }
+
+       for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) {
+               uintptr_t p;
+
+               p = (uintptr_t)&sw_ring[i]->rearm_data;
+               *(uint64_t *)p = rxvq->mbuf_initializer;
+
+               start_dp[i].addr =
+                       VIRTIO_MBUF_ADDR(sw_ring[i], vq) +
+                       RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size;
+               start_dp[i].len = sw_ring[i]->buf_len -
+                       RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size;
+       }
+
+       vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+       vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+       vq_update_avail_idx(vq);
+}
+
+#define VIRTIO_TX_FREE_THRESH 32
+#define VIRTIO_TX_MAX_FREE_BUF_SZ 32
+#define VIRTIO_TX_FREE_NR 32
+/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */
+static inline void
+virtio_xmit_cleanup(struct virtqueue *vq)
+{
+       uint16_t i, desc_idx;
+       uint32_t nb_free = 0;
+       struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ];
+
+       desc_idx = (uint16_t)(vq->vq_used_cons_idx &
+                  ((vq->vq_nentries >> 1) - 1));
+       m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+       m = __rte_pktmbuf_prefree_seg(m);
+       if (likely(m != NULL)) {
+               free[0] = m;
+               nb_free = 1;
+               for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+                       m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+                       m = __rte_pktmbuf_prefree_seg(m);
+                       if (likely(m != NULL)) {
+                               if (likely(m->pool == free[0]->pool))
+                                       free[nb_free++] = m;
+                               else {
+                                       rte_mempool_put_bulk(free[0]->pool,
+                                               (void **)free,
+                                               RTE_MIN(RTE_DIM(free),
+                                                       nb_free));
+                                       free[0] = m;
+                                       nb_free = 1;
+                               }
+                       }
+               }
+               rte_mempool_put_bulk(free[0]->pool, (void **)free,
+                       RTE_MIN(RTE_DIM(free), nb_free));
+       } else {
+               for (i = 1; i < VIRTIO_TX_FREE_NR; i++) {
+                       m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie;
+                       m = __rte_pktmbuf_prefree_seg(m);
+                       if (m != NULL)
+                               rte_mempool_put(m->pool, m);
+               }
+       }
+
+       vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR;
+       vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1);
+}
+
+#endif /* _VIRTIO_RXTX_SIMPLE_H_ */
diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c
new file mode 100644 (file)
index 0000000..793eefb
--- /dev/null
@@ -0,0 +1,235 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium networks Ltd. 2016
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_prefetch.h>
+#include <rte_string_fns.h>
+#include <rte_vect.h>
+
+#include "virtio_rxtx_simple.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergeable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+       uint16_t nb_pkts)
+{
+       struct virtnet_rx *rxvq = rx_queue;
+       struct virtqueue *vq = rxvq->vq;
+       uint16_t nb_used;
+       uint16_t desc_idx;
+       struct vring_used_elem *rused;
+       struct rte_mbuf **sw_ring;
+       struct rte_mbuf **sw_ring_end;
+       uint16_t nb_pkts_received;
+
+       uint8x16_t shuf_msk1 = {
+               0xFF, 0xFF, 0xFF, 0xFF, /* packet type */
+               4, 5, 0xFF, 0xFF,       /* pkt len */
+               4, 5,                   /* dat len */
+               0xFF, 0xFF,             /* vlan tci */
+               0xFF, 0xFF, 0xFF, 0xFF
+       };
+
+       uint8x16_t shuf_msk2 = {
+               0xFF, 0xFF, 0xFF, 0xFF, /* packet type */
+               12, 13, 0xFF, 0xFF,     /* pkt len */
+               12, 13,                 /* dat len */
+               0xFF, 0xFF,             /* vlan tci */
+               0xFF, 0xFF, 0xFF, 0xFF
+       };
+
+       /* Subtract the header length.
+        *  In which case do we need the header length in used->len ?
+        */
+       uint16x8_t len_adjust = {
+               0, 0,
+               (uint16_t)vq->hw->vtnet_hdr_size, 0,
+               (uint16_t)vq->hw->vtnet_hdr_size,
+               0,
+               0, 0
+       };
+
+       if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
+               return 0;
+
+       nb_used = VIRTQUEUE_NUSED(vq);
+
+       rte_rmb();
+
+       if (unlikely(nb_used == 0))
+               return 0;
+
+       nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
+       nb_used = RTE_MIN(nb_used, nb_pkts);
+
+       desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+       rused = &vq->vq_ring.used->ring[desc_idx];
+       sw_ring  = &vq->sw_ring[desc_idx];
+       sw_ring_end = &vq->sw_ring[vq->vq_nentries];
+
+       rte_prefetch_non_temporal(rused);
+
+       if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
+               virtio_rxq_rearm_vec(rxvq);
+               if (unlikely(virtqueue_kick_prepare(vq)))
+                       virtqueue_notify(vq);
+       }
+
+       for (nb_pkts_received = 0;
+               nb_pkts_received < nb_used;) {
+               uint64x2_t desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
+               uint64x2_t mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
+               uint64x2_t pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
+
+               mbp[0] = vld1q_u64((uint64_t *)(sw_ring + 0));
+               desc[0] = vld1q_u64((uint64_t *)(rused + 0));
+               vst1q_u64((uint64_t *)&rx_pkts[0], mbp[0]);
+
+               mbp[1] = vld1q_u64((uint64_t *)(sw_ring + 2));
+               desc[1] = vld1q_u64((uint64_t *)(rused + 2));
+               vst1q_u64((uint64_t *)&rx_pkts[2], mbp[1]);
+
+               mbp[2] = vld1q_u64((uint64_t *)(sw_ring + 4));
+               desc[2] = vld1q_u64((uint64_t *)(rused + 4));
+               vst1q_u64((uint64_t *)&rx_pkts[4], mbp[2]);
+
+               mbp[3] = vld1q_u64((uint64_t *)(sw_ring + 6));
+               desc[3] = vld1q_u64((uint64_t *)(rused + 6));
+               vst1q_u64((uint64_t *)&rx_pkts[6], mbp[3]);
+
+               pkt_mb[1] = vreinterpretq_u64_u8(vqtbl1q_u8(
+                               vreinterpretq_u8_u64(desc[0]), shuf_msk2));
+               pkt_mb[0] = vreinterpretq_u64_u8(vqtbl1q_u8(
+                               vreinterpretq_u8_u64(desc[0]), shuf_msk1));
+               pkt_mb[1] = vreinterpretq_u64_u16(vsubq_u16(
+                               vreinterpretq_u16_u64(pkt_mb[1]), len_adjust));
+               pkt_mb[0] = vreinterpretq_u64_u16(vsubq_u16(
+                               vreinterpretq_u16_u64(pkt_mb[0]), len_adjust));
+               vst1q_u64((void *)&rx_pkts[1]->rx_descriptor_fields1,
+                       pkt_mb[1]);
+               vst1q_u64((void *)&rx_pkts[0]->rx_descriptor_fields1,
+                       pkt_mb[0]);
+
+               pkt_mb[3] = vreinterpretq_u64_u8(vqtbl1q_u8(
+                               vreinterpretq_u8_u64(desc[1]), shuf_msk2));
+               pkt_mb[2] = vreinterpretq_u64_u8(vqtbl1q_u8(
+                               vreinterpretq_u8_u64(desc[1]), shuf_msk1));
+               pkt_mb[3] = vreinterpretq_u64_u16(vsubq_u16(
+                               vreinterpretq_u16_u64(pkt_mb[3]), len_adjust));
+               pkt_mb[2] = vreinterpretq_u64_u16(vsubq_u16(
+                               vreinterpretq_u16_u64(pkt_mb[2]), len_adjust));
+               vst1q_u64((void *)&rx_pkts[3]->rx_descriptor_fields1,
+                       pkt_mb[3]);
+               vst1q_u64((void *)&rx_pkts[2]->rx_descriptor_fields1,
+                       pkt_mb[2]);
+
+               pkt_mb[5] = vreinterpretq_u64_u8(vqtbl1q_u8(
+                               vreinterpretq_u8_u64(desc[2]), shuf_msk2));
+               pkt_mb[4] = vreinterpretq_u64_u8(vqtbl1q_u8(
+                               vreinterpretq_u8_u64(desc[2]), shuf_msk1));
+               pkt_mb[5] = vreinterpretq_u64_u16(vsubq_u16(
+                               vreinterpretq_u16_u64(pkt_mb[5]), len_adjust));
+               pkt_mb[4] = vreinterpretq_u64_u16(vsubq_u16(
+                               vreinterpretq_u16_u64(pkt_mb[4]), len_adjust));
+               vst1q_u64((void *)&rx_pkts[5]->rx_descriptor_fields1,
+                       pkt_mb[5]);
+               vst1q_u64((void *)&rx_pkts[4]->rx_descriptor_fields1,
+                       pkt_mb[4]);
+
+               pkt_mb[7] = vreinterpretq_u64_u8(vqtbl1q_u8(
+                               vreinterpretq_u8_u64(desc[3]), shuf_msk2));
+               pkt_mb[6] = vreinterpretq_u64_u8(vqtbl1q_u8(
+                               vreinterpretq_u8_u64(desc[3]), shuf_msk1));
+               pkt_mb[7] = vreinterpretq_u64_u16(vsubq_u16(
+                               vreinterpretq_u16_u64(pkt_mb[7]), len_adjust));
+               pkt_mb[6] = vreinterpretq_u64_u16(vsubq_u16(
+                               vreinterpretq_u16_u64(pkt_mb[6]), len_adjust));
+               vst1q_u64((void *)&rx_pkts[7]->rx_descriptor_fields1,
+                       pkt_mb[7]);
+               vst1q_u64((void *)&rx_pkts[6]->rx_descriptor_fields1,
+                       pkt_mb[6]);
+
+               if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
+                       if (sw_ring + nb_used <= sw_ring_end)
+                               nb_pkts_received += nb_used;
+                       else
+                               nb_pkts_received += sw_ring_end - sw_ring;
+                       break;
+               } else {
+                       if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
+                               sw_ring_end)) {
+                               nb_pkts_received += sw_ring_end - sw_ring;
+                               break;
+                       } else {
+                               nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
+
+                               rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
+                               sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
+                               rused   += RTE_VIRTIO_DESC_PER_LOOP;
+                               nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
+                       }
+               }
+       }
+
+       vq->vq_used_cons_idx += nb_pkts_received;
+       vq->vq_free_cnt += nb_pkts_received;
+       rxvq->stats.packets += nb_pkts_received;
+       return nb_pkts_received;
+}
diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c
new file mode 100644 (file)
index 0000000..87bb5c6
--- /dev/null
@@ -0,0 +1,222 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <tmmintrin.h>
+
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_prefetch.h>
+#include <rte_string_fns.h>
+
+#include "virtio_rxtx_simple.h"
+
+#define RTE_VIRTIO_VPMD_RX_BURST 32
+#define RTE_VIRTIO_DESC_PER_LOOP 8
+#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST
+
+/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
+ *
+ * This routine is for non-mergeable RX, one desc for each guest buffer.
+ * This routine is based on the RX ring layout optimization. Each entry in the
+ * avail ring points to the desc with the same index in the desc ring and this
+ * will never be changed in the driver.
+ *
+ * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet
+ */
+uint16_t
+virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+       uint16_t nb_pkts)
+{
+       struct virtnet_rx *rxvq = rx_queue;
+       struct virtqueue *vq = rxvq->vq;
+       uint16_t nb_used;
+       uint16_t desc_idx;
+       struct vring_used_elem *rused;
+       struct rte_mbuf **sw_ring;
+       struct rte_mbuf **sw_ring_end;
+       uint16_t nb_pkts_received;
+       __m128i shuf_msk1, shuf_msk2, len_adjust;
+
+       shuf_msk1 = _mm_set_epi8(
+               0xFF, 0xFF, 0xFF, 0xFF,
+               0xFF, 0xFF,             /* vlan tci */
+               5, 4,                   /* dat len */
+               0xFF, 0xFF, 5, 4,       /* pkt len */
+               0xFF, 0xFF, 0xFF, 0xFF  /* packet type */
+
+       );
+
+       shuf_msk2 = _mm_set_epi8(
+               0xFF, 0xFF, 0xFF, 0xFF,
+               0xFF, 0xFF,             /* vlan tci */
+               13, 12,                 /* dat len */
+               0xFF, 0xFF, 13, 12,     /* pkt len */
+               0xFF, 0xFF, 0xFF, 0xFF  /* packet type */
+       );
+
+       /* Subtract the header length.
+       *  In which case do we need the header length in used->len ?
+       */
+       len_adjust = _mm_set_epi16(
+               0, 0,
+               0,
+               (uint16_t)-vq->hw->vtnet_hdr_size,
+               0, (uint16_t)-vq->hw->vtnet_hdr_size,
+               0, 0);
+
+       if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
+               return 0;
+
+       nb_used = VIRTQUEUE_NUSED(vq);
+
+       rte_compiler_barrier();
+
+       if (unlikely(nb_used == 0))
+               return 0;
+
+       nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
+       nb_used = RTE_MIN(nb_used, nb_pkts);
+
+       desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+       rused = &vq->vq_ring.used->ring[desc_idx];
+       sw_ring  = &vq->sw_ring[desc_idx];
+       sw_ring_end = &vq->sw_ring[vq->vq_nentries];
+
+       rte_prefetch0(rused);
+
+       if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
+               virtio_rxq_rearm_vec(rxvq);
+               if (unlikely(virtqueue_kick_prepare(vq)))
+                       virtqueue_notify(vq);
+       }
+
+       for (nb_pkts_received = 0;
+               nb_pkts_received < nb_used;) {
+               __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2];
+               __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2];
+               __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP];
+
+               mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0));
+               desc[0] = _mm_loadu_si128((__m128i *)(rused + 0));
+               _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]);
+
+               mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2));
+               desc[1] = _mm_loadu_si128((__m128i *)(rused + 2));
+               _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]);
+
+               mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4));
+               desc[2] = _mm_loadu_si128((__m128i *)(rused + 4));
+               _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]);
+
+               mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6));
+               desc[3] = _mm_loadu_si128((__m128i *)(rused + 6));
+               _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]);
+
+               pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2);
+               pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1);
+               pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust);
+               pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust);
+               _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1,
+                       pkt_mb[1]);
+               _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1,
+                       pkt_mb[0]);
+
+               pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2);
+               pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1);
+               pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust);
+               pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust);
+               _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1,
+                       pkt_mb[3]);
+               _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1,
+                       pkt_mb[2]);
+
+               pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2);
+               pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1);
+               pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust);
+               pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust);
+               _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1,
+                       pkt_mb[5]);
+               _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1,
+                       pkt_mb[4]);
+
+               pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2);
+               pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1);
+               pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust);
+               pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust);
+               _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1,
+                       pkt_mb[7]);
+               _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1,
+                       pkt_mb[6]);
+
+               if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) {
+                       if (sw_ring + nb_used <= sw_ring_end)
+                               nb_pkts_received += nb_used;
+                       else
+                               nb_pkts_received += sw_ring_end - sw_ring;
+                       break;
+               } else {
+                       if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >=
+                               sw_ring_end)) {
+                               nb_pkts_received += sw_ring_end - sw_ring;
+                               break;
+                       } else {
+                               nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP;
+
+                               rx_pkts += RTE_VIRTIO_DESC_PER_LOOP;
+                               sw_ring += RTE_VIRTIO_DESC_PER_LOOP;
+                               rused   += RTE_VIRTIO_DESC_PER_LOOP;
+                               nb_used -= RTE_VIRTIO_DESC_PER_LOOP;
+                       }
+               }
+       }
+
+       vq->vq_used_cons_idx += nb_pkts_received;
+       vq->vq_free_cnt += nb_pkts_received;
+       rxvq->stats.packets += nb_pkts_received;
+       return nb_pkts_received;
+}
diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost.h b/src/dpdk/drivers/net/virtio/virtio_user/vhost.h
new file mode 100644 (file)
index 0000000..5c983bd
--- /dev/null
@@ -0,0 +1,123 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#include "../virtio_pci.h"
+#include "../virtio_logs.h"
+#include "../virtqueue.h"
+
+struct vhost_vring_state {
+       unsigned int index;
+       unsigned int num;
+};
+
+struct vhost_vring_file {
+       unsigned int index;
+       int fd;
+};
+
+struct vhost_vring_addr {
+       unsigned int index;
+       /* Option flags. */
+       unsigned int flags;
+       /* Flag values: */
+       /* Whether log address is valid. If set enables logging. */
+#define VHOST_VRING_F_LOG 0
+
+       /* Start of array of descriptors (virtually contiguous) */
+       uint64_t desc_user_addr;
+       /* Used structure address. Must be 32 bit aligned */
+       uint64_t used_user_addr;
+       /* Available structure address. Must be 16 bit aligned */
+       uint64_t avail_user_addr;
+       /* Logging support. */
+       /* Log writes to used structure, at offset calculated from specified
+        * address. Address must be 32 bit aligned.
+        */
+       uint64_t log_guest_addr;
+};
+
+enum vhost_user_request {
+       VHOST_USER_NONE = 0,
+       VHOST_USER_GET_FEATURES = 1,
+       VHOST_USER_SET_FEATURES = 2,
+       VHOST_USER_SET_OWNER = 3,
+       VHOST_USER_RESET_OWNER = 4,
+       VHOST_USER_SET_MEM_TABLE = 5,
+       VHOST_USER_SET_LOG_BASE = 6,
+       VHOST_USER_SET_LOG_FD = 7,
+       VHOST_USER_SET_VRING_NUM = 8,
+       VHOST_USER_SET_VRING_ADDR = 9,
+       VHOST_USER_SET_VRING_BASE = 10,
+       VHOST_USER_GET_VRING_BASE = 11,
+       VHOST_USER_SET_VRING_KICK = 12,
+       VHOST_USER_SET_VRING_CALL = 13,
+       VHOST_USER_SET_VRING_ERR = 14,
+       VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+       VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+       VHOST_USER_GET_QUEUE_NUM = 17,
+       VHOST_USER_SET_VRING_ENABLE = 18,
+       VHOST_USER_MAX
+};
+
+const char * const vhost_msg_strings[VHOST_USER_MAX];
+
+struct vhost_memory_region {
+       uint64_t guest_phys_addr;
+       uint64_t memory_size; /* bytes */
+       uint64_t userspace_addr;
+       uint64_t mmap_offset;
+};
+
+struct virtio_user_dev;
+
+struct virtio_user_backend_ops {
+       int (*setup)(struct virtio_user_dev *dev);
+       int (*send_request)(struct virtio_user_dev *dev,
+                           enum vhost_user_request req,
+                           void *arg);
+       int (*enable_qp)(struct virtio_user_dev *dev,
+                        uint16_t pair_idx,
+                        int enable);
+};
+
+struct virtio_user_backend_ops ops_user;
+struct virtio_user_backend_ops ops_kernel;
+
+#endif
diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c
new file mode 100644 (file)
index 0000000..05aa6c6
--- /dev/null
@@ -0,0 +1,403 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <rte_memory.h>
+#include <rte_eal_memconfig.h>
+
+#include "vhost.h"
+#include "virtio_user_dev.h"
+#include "vhost_kernel_tap.h"
+
+struct vhost_memory_kernel {
+       uint32_t nregions;
+       uint32_t padding;
+       struct vhost_memory_region regions[0];
+};
+
+/* vhost kernel ioctls */
+#define VHOST_VIRTIO 0xAF
+#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
+#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
+#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
+#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
+#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel)
+#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
+#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
+#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
+#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
+#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
+#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
+#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
+
+static uint64_t max_regions = 64;
+
+static void
+get_vhost_kernel_max_regions(void)
+{
+       int fd;
+       char buf[20] = {'\0'};
+
+       fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY);
+       if (fd < 0)
+               return;
+
+       if (read(fd, buf, sizeof(buf) - 1) > 0)
+               max_regions = strtoull(buf, NULL, 10);
+
+       close(fd);
+}
+
+static uint64_t vhost_req_user_to_kernel[] = {
+       [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER,
+       [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER,
+       [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES,
+       [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES,
+       [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL,
+       [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM,
+       [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE,
+       [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE,
+       [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR,
+       [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK,
+       [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE,
+};
+
+/* By default, vhost kernel module allows 64 regions, but DPDK allows
+ * 256 segments. As a relief, below function merges those virtually
+ * adjacent memsegs into one region.
+ */
+static struct vhost_memory_kernel *
+prepare_vhost_memory_kernel(void)
+{
+       uint32_t i, j, k = 0;
+       struct rte_memseg *seg;
+       struct vhost_memory_region *mr;
+       struct vhost_memory_kernel *vm;
+
+       vm = malloc(sizeof(struct vhost_memory_kernel) +
+                   max_regions *
+                   sizeof(struct vhost_memory_region));
+       if (!vm)
+               return NULL;
+
+       for (i = 0; i < RTE_MAX_MEMSEG; ++i) {
+               seg = &rte_eal_get_configuration()->mem_config->memseg[i];
+               if (!seg->addr)
+                       break;
+
+               int new_region = 1;
+
+               for (j = 0; j < k; ++j) {
+                       mr = &vm->regions[j];
+
+                       if (mr->userspace_addr + mr->memory_size ==
+                           (uint64_t)(uintptr_t)seg->addr) {
+                               mr->memory_size += seg->len;
+                               new_region = 0;
+                               break;
+                       }
+
+                       if ((uint64_t)(uintptr_t)seg->addr + seg->len ==
+                           mr->userspace_addr) {
+                               mr->guest_phys_addr =
+                                       (uint64_t)(uintptr_t)seg->addr;
+                               mr->userspace_addr =
+                                       (uint64_t)(uintptr_t)seg->addr;
+                               mr->memory_size += seg->len;
+                               new_region = 0;
+                               break;
+                       }
+               }
+
+               if (new_region == 0)
+                       continue;
+
+               mr = &vm->regions[k++];
+               /* use vaddr here! */
+               mr->guest_phys_addr = (uint64_t)(uintptr_t)seg->addr;
+               mr->userspace_addr = (uint64_t)(uintptr_t)seg->addr;
+               mr->memory_size = seg->len;
+               mr->mmap_offset = 0;
+
+               if (k >= max_regions) {
+                       free(vm);
+                       return NULL;
+               }
+       }
+
+       vm->nregions = k;
+       vm->padding = 0;
+       return vm;
+}
+
+/* with below features, vhost kernel does not need to do the checksum and TSO,
+ * these info will be passed to virtio_user through virtio net header.
+ */
+#define VHOST_KERNEL_GUEST_OFFLOADS_MASK       \
+       ((1ULL << VIRTIO_NET_F_GUEST_CSUM) |    \
+        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |    \
+        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |    \
+        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |    \
+        (1ULL << VIRTIO_NET_F_GUEST_UFO))
+
+/* with below features, when flows from virtio_user to vhost kernel
+ * (1) if flows goes up through the kernel networking stack, it does not need
+ * to verify checksum, which can save CPU cycles;
+ * (2) if flows goes through a Linux bridge and outside from an interface
+ * (kernel driver), checksum and TSO will be done by GSO in kernel or even
+ * offloaded into real physical device.
+ */
+#define VHOST_KERNEL_HOST_OFFLOADS_MASK                \
+       ((1ULL << VIRTIO_NET_F_HOST_TSO4) |     \
+        (1ULL << VIRTIO_NET_F_HOST_TSO6) |     \
+        (1ULL << VIRTIO_NET_F_CSUM))
+
+static int
+tap_supporte_mq(void)
+{
+       int tapfd;
+       unsigned int tap_features;
+
+       tapfd = open(PATH_NET_TUN, O_RDWR);
+       if (tapfd < 0) {
+               PMD_DRV_LOG(ERR, "fail to open %s: %s",
+                           PATH_NET_TUN, strerror(errno));
+               return -1;
+       }
+
+       if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) {
+               PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno));
+               close(tapfd);
+               return -1;
+       }
+
+       close(tapfd);
+       return tap_features & IFF_MULTI_QUEUE;
+}
+
+static int
+vhost_kernel_ioctl(struct virtio_user_dev *dev,
+                  enum vhost_user_request req,
+                  void *arg)
+{
+       int ret = -1;
+       unsigned int i;
+       uint64_t req_kernel;
+       struct vhost_memory_kernel *vm = NULL;
+       int vhostfd;
+       unsigned int queue_sel;
+
+       PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]);
+
+       req_kernel = vhost_req_user_to_kernel[req];
+
+       if (req_kernel == VHOST_SET_MEM_TABLE) {
+               vm = prepare_vhost_memory_kernel();
+               if (!vm)
+                       return -1;
+               arg = (void *)vm;
+       }
+
+       if (req_kernel == VHOST_SET_FEATURES) {
+               /* We don't need memory protection here */
+               *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+
+               /* VHOST kernel does not know about below flags */
+               *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK;
+               *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK;
+
+               *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ);
+       }
+
+       switch (req_kernel) {
+       case VHOST_SET_VRING_NUM:
+       case VHOST_SET_VRING_ADDR:
+       case VHOST_SET_VRING_BASE:
+       case VHOST_GET_VRING_BASE:
+       case VHOST_SET_VRING_KICK:
+       case VHOST_SET_VRING_CALL:
+               queue_sel = *(unsigned int *)arg;
+               vhostfd = dev->vhostfds[queue_sel / 2];
+               *(unsigned int *)arg = queue_sel % 2;
+               PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u",
+                           vhostfd, *(unsigned int *)arg);
+               break;
+       default:
+               vhostfd = -1;
+       }
+       if (vhostfd == -1) {
+               for (i = 0; i < dev->max_queue_pairs; ++i) {
+                       if (dev->vhostfds[i] < 0)
+                               continue;
+
+                       ret = ioctl(dev->vhostfds[i], req_kernel, arg);
+                       if (ret < 0)
+                               break;
+               }
+       } else {
+               ret = ioctl(vhostfd, req_kernel, arg);
+       }
+
+       if (!ret && req_kernel == VHOST_GET_FEATURES) {
+               /* with tap as the backend, all these features are supported
+                * but not claimed by vhost-net, so we add them back when
+                * reporting to upper layer.
+                */
+               *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK;
+               *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK;
+
+               /* vhost_kernel will not declare this feature, but it does
+                * support multi-queue.
+                */
+               if (tap_supporte_mq())
+                       *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ);
+       }
+
+       if (vm)
+               free(vm);
+
+       if (ret < 0)
+               PMD_DRV_LOG(ERR, "%s failed: %s",
+                           vhost_msg_strings[req], strerror(errno));
+
+       return ret;
+}
+
+/**
+ * Set up environment to talk with a vhost kernel backend.
+ *
+ * @return
+ *   - (-1) if fail to set up;
+ *   - (>=0) if successful.
+ */
+static int
+vhost_kernel_setup(struct virtio_user_dev *dev)
+{
+       int vhostfd;
+       uint32_t i;
+
+       get_vhost_kernel_max_regions();
+
+       for (i = 0; i < dev->max_queue_pairs; ++i) {
+               vhostfd = open(dev->path, O_RDWR);
+               if (vhostfd < 0) {
+                       PMD_DRV_LOG(ERR, "fail to open %s, %s",
+                                   dev->path, strerror(errno));
+                       return -1;
+               }
+
+               dev->vhostfds[i] = vhostfd;
+       }
+
+       return 0;
+}
+
+static int
+vhost_kernel_set_backend(int vhostfd, int tapfd)
+{
+       struct vhost_vring_file f;
+
+       f.fd = tapfd;
+       f.index = 0;
+       if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) {
+               PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s",
+                               strerror(errno));
+               return -1;
+       }
+
+       f.index = 1;
+       if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) {
+               PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s",
+                               strerror(errno));
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev,
+                              uint16_t pair_idx,
+                              int enable)
+{
+       int hdr_size;
+       int vhostfd;
+       int tapfd;
+       int req_mq = (dev->max_queue_pairs > 1);
+
+       vhostfd = dev->vhostfds[pair_idx];
+
+       if (!enable) {
+               if (dev->tapfds[pair_idx]) {
+                       close(dev->tapfds[pair_idx]);
+                       dev->tapfds[pair_idx] = -1;
+               }
+               return vhost_kernel_set_backend(vhostfd, -1);
+       } else if (dev->tapfds[pair_idx] >= 0) {
+               return 0;
+       }
+
+       if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) ||
+           (dev->features & (1ULL << VIRTIO_F_VERSION_1)))
+               hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+       else
+               hdr_size = sizeof(struct virtio_net_hdr);
+
+       tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq);
+       if (tapfd < 0) {
+               PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel");
+               return -1;
+       }
+
+       if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) {
+               PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel");
+               close(tapfd);
+               return -1;
+       }
+
+       dev->tapfds[pair_idx] = tapfd;
+       return 0;
+}
+
+struct virtio_user_backend_ops ops_kernel = {
+       .setup = vhost_kernel_setup,
+       .send_request = vhost_kernel_ioctl,
+       .enable_qp = vhost_kernel_enable_queue_pair
+};
diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c
new file mode 100644 (file)
index 0000000..f585de8
--- /dev/null
@@ -0,0 +1,133 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <errno.h>
+#include <string.h>
+#include <limits.h>
+
+#include "vhost_kernel_tap.h"
+#include "../virtio_logs.h"
+
+int
+vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq)
+{
+       unsigned int tap_features;
+       int sndbuf = INT_MAX;
+       struct ifreq ifr;
+       int tapfd;
+       unsigned int offload =
+                       TUN_F_CSUM |
+                       TUN_F_TSO4 |
+                       TUN_F_TSO6 |
+                       TUN_F_TSO_ECN |
+                       TUN_F_UFO;
+
+       /* TODO:
+        * 1. verify we can get/set vnet_hdr_len, tap_probe_vnet_hdr_len
+        * 2. get number of memory regions from vhost module parameter
+        * max_mem_regions, supported in newer version linux kernel
+        */
+       tapfd = open(PATH_NET_TUN, O_RDWR);
+       if (tapfd < 0) {
+               PMD_DRV_LOG(ERR, "fail to open %s: %s",
+                           PATH_NET_TUN, strerror(errno));
+               return -1;
+       }
+
+       /* Construct ifr */
+       memset(&ifr, 0, sizeof(ifr));
+       ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+       if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) {
+               PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno));
+               goto error;
+       }
+       if (tap_features & IFF_ONE_QUEUE)
+               ifr.ifr_flags |= IFF_ONE_QUEUE;
+
+       /* Let tap instead of vhost-net handle vnet header, as the latter does
+        * not support offloading. And in this case, we should not set feature
+        * bit VHOST_NET_F_VIRTIO_NET_HDR.
+        */
+       if (tap_features & IFF_VNET_HDR) {
+               ifr.ifr_flags |= IFF_VNET_HDR;
+       } else {
+               PMD_DRV_LOG(ERR, "TAP does not support IFF_VNET_HDR");
+               goto error;
+       }
+
+       if (req_mq)
+               ifr.ifr_flags |= IFF_MULTI_QUEUE;
+
+       if (*p_ifname)
+               strncpy(ifr.ifr_name, *p_ifname, IFNAMSIZ);
+       else
+               strncpy(ifr.ifr_name, "tap%d", IFNAMSIZ);
+       if (ioctl(tapfd, TUNSETIFF, (void *)&ifr) == -1) {
+               PMD_DRV_LOG(ERR, "TUNSETIFF failed: %s", strerror(errno));
+               goto error;
+       }
+
+       fcntl(tapfd, F_SETFL, O_NONBLOCK);
+
+       if (ioctl(tapfd, TUNSETVNETHDRSZ, &hdr_size) < 0) {
+               PMD_DRV_LOG(ERR, "TUNSETVNETHDRSZ failed: %s", strerror(errno));
+               goto error;
+       }
+
+       if (ioctl(tapfd, TUNSETSNDBUF, &sndbuf) < 0) {
+               PMD_DRV_LOG(ERR, "TUNSETSNDBUF failed: %s", strerror(errno));
+               goto error;
+       }
+
+       /* TODO: before set the offload capabilities, we'd better (1) check
+        * negotiated features to see if necessary to offload; (2) query tap
+        * to see if it supports the offload capabilities.
+        */
+       if (ioctl(tapfd, TUNSETOFFLOAD, offload) != 0)
+               PMD_DRV_LOG(ERR, "TUNSETOFFLOAD ioctl() failed: %s",
+                          strerror(errno));
+
+       if (!(*p_ifname))
+               *p_ifname = strdup(ifr.ifr_name);
+
+       return tapfd;
+error:
+       close(tapfd);
+       return -1;
+}
diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h
new file mode 100644 (file)
index 0000000..eae340c
--- /dev/null
@@ -0,0 +1,67 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/ioctl.h>
+
+/* TUN ioctls */
+#define TUNSETIFF     _IOW('T', 202, int)
+#define TUNGETFEATURES _IOR('T', 207, unsigned int)
+#define TUNSETOFFLOAD  _IOW('T', 208, unsigned int)
+#define TUNGETIFF      _IOR('T', 210, unsigned int)
+#define TUNSETSNDBUF   _IOW('T', 212, int)
+#define TUNGETVNETHDRSZ _IOR('T', 215, int)
+#define TUNSETVNETHDRSZ _IOW('T', 216, int)
+#define TUNSETQUEUE  _IOW('T', 217, int)
+#define TUNSETVNETLE _IOW('T', 220, int)
+#define TUNSETVNETBE _IOW('T', 222, int)
+
+/* TUNSETIFF ifr flags */
+#define IFF_TAP          0x0002
+#define IFF_NO_PI        0x1000
+#define IFF_ONE_QUEUE    0x2000
+#define IFF_VNET_HDR     0x4000
+#define IFF_MULTI_QUEUE  0x0100
+#define IFF_ATTACH_QUEUE 0x0200
+#define IFF_DETACH_QUEUE 0x0400
+
+/* Features for GSO (TUNSETOFFLOAD). */
+#define TUN_F_CSUM     0x01    /* You can hand me unchecksummed packets. */
+#define TUN_F_TSO4     0x02    /* I can handle TSO for IPv4 packets */
+#define TUN_F_TSO6     0x04    /* I can handle TSO for IPv6 packets */
+#define TUN_F_TSO_ECN  0x08    /* I can handle TSO with ECN bits. */
+#define TUN_F_UFO      0x10    /* I can handle UFO packets */
+
+/* Constants */
+#define PATH_NET_TUN   "/dev/net/tun"
+
+int vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq);
diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost_user.c b/src/dpdk/drivers/net/virtio/virtio_user/vhost_user.c
new file mode 100644 (file)
index 0000000..4ad7b21
--- /dev/null
@@ -0,0 +1,467 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/un.h>
+#include <string.h>
+#include <errno.h>
+
+#include "vhost.h"
+#include "virtio_user_dev.h"
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+struct vhost_memory {
+       uint32_t nregions;
+       uint32_t padding;
+       struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS];
+};
+
+struct vhost_user_msg {
+       enum vhost_user_request request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+       uint32_t flags;
+       uint32_t size; /* the following payload size */
+       union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
+               uint64_t u64;
+               struct vhost_vring_state state;
+               struct vhost_vring_addr addr;
+               struct vhost_memory memory;
+       } payload;
+       int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute((packed));
+
+#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64)
+#define VHOST_USER_PAYLOAD_SIZE \
+       (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE)
+
+static int
+vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num)
+{
+       int r;
+       struct msghdr msgh;
+       struct iovec iov;
+       size_t fd_size = fd_num * sizeof(int);
+       char control[CMSG_SPACE(fd_size)];
+       struct cmsghdr *cmsg;
+
+       memset(&msgh, 0, sizeof(msgh));
+       memset(control, 0, sizeof(control));
+
+       iov.iov_base = (uint8_t *)buf;
+       iov.iov_len = len;
+
+       msgh.msg_iov = &iov;
+       msgh.msg_iovlen = 1;
+       msgh.msg_control = control;
+       msgh.msg_controllen = sizeof(control);
+
+       cmsg = CMSG_FIRSTHDR(&msgh);
+       cmsg->cmsg_len = CMSG_LEN(fd_size);
+       cmsg->cmsg_level = SOL_SOCKET;
+       cmsg->cmsg_type = SCM_RIGHTS;
+       memcpy(CMSG_DATA(cmsg), fds, fd_size);
+
+       do {
+               r = sendmsg(fd, &msgh, 0);
+       } while (r < 0 && errno == EINTR);
+
+       return r;
+}
+
+static int
+vhost_user_read(int fd, struct vhost_user_msg *msg)
+{
+       uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION;
+       int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload;
+
+       ret = recv(fd, (void *)msg, sz_hdr, 0);
+       if (ret < sz_hdr) {
+               PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.",
+                           ret, sz_hdr);
+               goto fail;
+       }
+
+       /* validate msg flags */
+       if (msg->flags != (valid_flags)) {
+               PMD_DRV_LOG(ERR, "Failed to recv msg: flags %x instead of %x.",
+                           msg->flags, valid_flags);
+               goto fail;
+       }
+
+       sz_payload = msg->size;
+       if (sz_payload) {
+               ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0);
+               if (ret < sz_payload) {
+                       PMD_DRV_LOG(ERR,
+                               "Failed to recv msg payload: %d instead of %d.",
+                               ret, msg->size);
+                       goto fail;
+               }
+       }
+
+       return 0;
+
+fail:
+       return -1;
+}
+
+struct hugepage_file_info {
+       uint64_t addr;            /**< virtual addr */
+       size_t   size;            /**< the file size */
+       char     path[PATH_MAX];  /**< path to backing file */
+};
+
+/* Two possible options:
+ * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
+ * array. This is simple but cannot be used in secondary process because
+ * secondary process will close and munmap that file.
+ * 2. Match HUGEFILE_FMT to find hugepage files directly.
+ *
+ * We choose option 2.
+ */
+static int
+get_hugepage_file_info(struct hugepage_file_info huges[], int max)
+{
+       int idx;
+       FILE *f;
+       char buf[BUFSIZ], *tmp, *tail;
+       char *str_underline, *str_start;
+       int huge_index;
+       uint64_t v_start, v_end;
+
+       f = fopen("/proc/self/maps", "r");
+       if (!f) {
+               PMD_DRV_LOG(ERR, "cannot open /proc/self/maps");
+               return -1;
+       }
+
+       idx = 0;
+       while (fgets(buf, sizeof(buf), f) != NULL) {
+               if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) {
+                       PMD_DRV_LOG(ERR, "Failed to parse address");
+                       goto error;
+               }
+
+               tmp = strchr(buf, ' ') + 1; /** skip address */
+               tmp = strchr(tmp, ' ') + 1; /** skip perm */
+               tmp = strchr(tmp, ' ') + 1; /** skip offset */
+               tmp = strchr(tmp, ' ') + 1; /** skip dev */
+               tmp = strchr(tmp, ' ') + 1; /** skip inode */
+               while (*tmp == ' ')         /** skip spaces */
+                       tmp++;
+               tail = strrchr(tmp, '\n');  /** remove newline if exists */
+               if (tail)
+                       *tail = '\0';
+
+               /* Match HUGEFILE_FMT, aka "%s/%smap_%d",
+                * which is defined in eal_filesystem.h
+                */
+               str_underline = strrchr(tmp, '_');
+               if (!str_underline)
+                       continue;
+
+               str_start = str_underline - strlen("map");
+               if (str_start < tmp)
+                       continue;
+
+               if (sscanf(str_start, "map_%d", &huge_index) != 1)
+                       continue;
+
+               if (idx >= max) {
+                       PMD_DRV_LOG(ERR, "Exceed maximum of %d", max);
+                       goto error;
+               }
+               huges[idx].addr = v_start;
+               huges[idx].size = v_end - v_start;
+               snprintf(huges[idx].path, PATH_MAX, "%s", tmp);
+               idx++;
+       }
+
+       fclose(f);
+       return idx;
+
+error:
+       fclose(f);
+       return -1;
+}
+
+static int
+prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
+{
+       int i, num;
+       struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS];
+       struct vhost_memory_region *mr;
+
+       num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS);
+       if (num < 0) {
+               PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user");
+               return -1;
+       }
+
+       for (i = 0; i < num; ++i) {
+               mr = &msg->payload.memory.regions[i];
+               mr->guest_phys_addr = huges[i].addr; /* use vaddr! */
+               mr->userspace_addr = huges[i].addr;
+               mr->memory_size = huges[i].size;
+               mr->mmap_offset = 0;
+               fds[i] = open(huges[i].path, O_RDWR);
+       }
+
+       msg->payload.memory.nregions = num;
+       msg->payload.memory.padding = 0;
+
+       return 0;
+}
+
+static struct vhost_user_msg m;
+
+const char * const vhost_msg_strings[] = {
+       [VHOST_USER_SET_OWNER] = "VHOST_SET_OWNER",
+       [VHOST_USER_RESET_OWNER] = "VHOST_RESET_OWNER",
+       [VHOST_USER_SET_FEATURES] = "VHOST_SET_FEATURES",
+       [VHOST_USER_GET_FEATURES] = "VHOST_GET_FEATURES",
+       [VHOST_USER_SET_VRING_CALL] = "VHOST_SET_VRING_CALL",
+       [VHOST_USER_SET_VRING_NUM] = "VHOST_SET_VRING_NUM",
+       [VHOST_USER_SET_VRING_BASE] = "VHOST_SET_VRING_BASE",
+       [VHOST_USER_GET_VRING_BASE] = "VHOST_GET_VRING_BASE",
+       [VHOST_USER_SET_VRING_ADDR] = "VHOST_SET_VRING_ADDR",
+       [VHOST_USER_SET_VRING_KICK] = "VHOST_SET_VRING_KICK",
+       [VHOST_USER_SET_MEM_TABLE] = "VHOST_SET_MEM_TABLE",
+       [VHOST_USER_SET_VRING_ENABLE] = "VHOST_SET_VRING_ENABLE",
+};
+
+static int
+vhost_user_sock(struct virtio_user_dev *dev,
+               enum vhost_user_request req,
+               void *arg)
+{
+       struct vhost_user_msg msg;
+       struct vhost_vring_file *file = 0;
+       int need_reply = 0;
+       int fds[VHOST_MEMORY_MAX_NREGIONS];
+       int fd_num = 0;
+       int i, len;
+       int vhostfd = dev->vhostfd;
+
+       RTE_SET_USED(m);
+
+       PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]);
+
+       msg.request = req;
+       msg.flags = VHOST_USER_VERSION;
+       msg.size = 0;
+
+       switch (req) {
+       case VHOST_USER_GET_FEATURES:
+               need_reply = 1;
+               break;
+
+       case VHOST_USER_SET_FEATURES:
+       case VHOST_USER_SET_LOG_BASE:
+               msg.payload.u64 = *((__u64 *)arg);
+               msg.size = sizeof(m.payload.u64);
+               break;
+
+       case VHOST_USER_SET_OWNER:
+       case VHOST_USER_RESET_OWNER:
+               break;
+
+       case VHOST_USER_SET_MEM_TABLE:
+               if (prepare_vhost_memory_user(&msg, fds) < 0)
+                       return -1;
+               fd_num = msg.payload.memory.nregions;
+               msg.size = sizeof(m.payload.memory.nregions);
+               msg.size += sizeof(m.payload.memory.padding);
+               msg.size += fd_num * sizeof(struct vhost_memory_region);
+               break;
+
+       case VHOST_USER_SET_LOG_FD:
+               fds[fd_num++] = *((int *)arg);
+               break;
+
+       case VHOST_USER_SET_VRING_NUM:
+       case VHOST_USER_SET_VRING_BASE:
+       case VHOST_USER_SET_VRING_ENABLE:
+               memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
+               msg.size = sizeof(m.payload.state);
+               break;
+
+       case VHOST_USER_GET_VRING_BASE:
+               memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
+               msg.size = sizeof(m.payload.state);
+               need_reply = 1;
+               break;
+
+       case VHOST_USER_SET_VRING_ADDR:
+               memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr));
+               msg.size = sizeof(m.payload.addr);
+               break;
+
+       case VHOST_USER_SET_VRING_KICK:
+       case VHOST_USER_SET_VRING_CALL:
+       case VHOST_USER_SET_VRING_ERR:
+               file = arg;
+               msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
+               msg.size = sizeof(m.payload.u64);
+               if (file->fd > 0)
+                       fds[fd_num++] = file->fd;
+               else
+                       msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
+               break;
+
+       default:
+               PMD_DRV_LOG(ERR, "trying to send unhandled msg type");
+               return -1;
+       }
+
+       len = VHOST_USER_HDR_SIZE + msg.size;
+       if (vhost_user_write(vhostfd, &msg, len, fds, fd_num) < 0) {
+               PMD_DRV_LOG(ERR, "%s failed: %s",
+                           vhost_msg_strings[req], strerror(errno));
+               return -1;
+       }
+
+       if (req == VHOST_USER_SET_MEM_TABLE)
+               for (i = 0; i < fd_num; ++i)
+                       close(fds[i]);
+
+       if (need_reply) {
+               if (vhost_user_read(vhostfd, &msg) < 0) {
+                       PMD_DRV_LOG(ERR, "Received msg failed: %s",
+                                   strerror(errno));
+                       return -1;
+               }
+
+               if (req != msg.request) {
+                       PMD_DRV_LOG(ERR, "Received unexpected msg type");
+                       return -1;
+               }
+
+               switch (req) {
+               case VHOST_USER_GET_FEATURES:
+                       if (msg.size != sizeof(m.payload.u64)) {
+                               PMD_DRV_LOG(ERR, "Received bad msg size");
+                               return -1;
+                       }
+                       *((__u64 *)arg) = msg.payload.u64;
+                       break;
+               case VHOST_USER_GET_VRING_BASE:
+                       if (msg.size != sizeof(m.payload.state)) {
+                               PMD_DRV_LOG(ERR, "Received bad msg size");
+                               return -1;
+                       }
+                       memcpy(arg, &msg.payload.state,
+                              sizeof(struct vhost_vring_state));
+                       break;
+               default:
+                       PMD_DRV_LOG(ERR, "Received unexpected msg type");
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * Set up environment to talk with a vhost user backend.
+ *
+ * @return
+ *   - (-1) if fail;
+ *   - (0) if succeed.
+ */
+static int
+vhost_user_setup(struct virtio_user_dev *dev)
+{
+       int fd;
+       int flag;
+       struct sockaddr_un un;
+
+       fd = socket(AF_UNIX, SOCK_STREAM, 0);
+       if (fd < 0) {
+               PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno));
+               return -1;
+       }
+
+       flag = fcntl(fd, F_GETFD);
+       if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0)
+               PMD_DRV_LOG(WARNING, "fcntl failed, %s", strerror(errno));
+
+       memset(&un, 0, sizeof(un));
+       un.sun_family = AF_UNIX;
+       snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
+       if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+               PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
+               close(fd);
+               return -1;
+       }
+
+       dev->vhostfd = fd;
+       return 0;
+}
+
+static int
+vhost_user_enable_queue_pair(struct virtio_user_dev *dev,
+                            uint16_t pair_idx,
+                            int enable)
+{
+       int i;
+
+       for (i = 0; i < 2; ++i) {
+               struct vhost_vring_state state = {
+                       .index = pair_idx * 2 + i,
+                       .num   = enable,
+               };
+
+               if (vhost_user_sock(dev, VHOST_USER_SET_VRING_ENABLE, &state))
+                       return -1;
+       }
+
+       return 0;
+}
+
+struct virtio_user_backend_ops ops_user = {
+       .setup = vhost_user_setup,
+       .send_request = vhost_user_sock,
+       .enable_qp = vhost_user_enable_queue_pair
+};
diff --git a/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c b/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c
new file mode 100644 (file)
index 0000000..21ed00d
--- /dev/null
@@ -0,0 +1,414 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/eventfd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "vhost.h"
+#include "virtio_user_dev.h"
+#include "../virtio_ethdev.h"
+
+static int
+virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
+{
+       /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come
+        * firstly because vhost depends on this msg to allocate virtqueue
+        * pair.
+        */
+       int callfd;
+       struct vhost_vring_file file;
+
+       /* May use invalid flag, but some backend leverages kickfd and callfd as
+        * criteria to judge if dev is alive. so finally we use real event_fd.
+        */
+       callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+       if (callfd < 0) {
+               PMD_DRV_LOG(ERR, "callfd error, %s", strerror(errno));
+               return -1;
+       }
+       file.index = queue_sel;
+       file.fd = callfd;
+       dev->ops->send_request(dev, VHOST_USER_SET_VRING_CALL, &file);
+       dev->callfds[queue_sel] = callfd;
+
+       return 0;
+}
+
+static int
+virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
+{
+       int kickfd;
+       struct vhost_vring_file file;
+       struct vhost_vring_state state;
+       struct vring *vring = &dev->vrings[queue_sel];
+       struct vhost_vring_addr addr = {
+               .index = queue_sel,
+               .desc_user_addr = (uint64_t)(uintptr_t)vring->desc,
+               .avail_user_addr = (uint64_t)(uintptr_t)vring->avail,
+               .used_user_addr = (uint64_t)(uintptr_t)vring->used,
+               .log_guest_addr = 0,
+               .flags = 0, /* disable log */
+       };
+
+       state.index = queue_sel;
+       state.num = vring->num;
+       dev->ops->send_request(dev, VHOST_USER_SET_VRING_NUM, &state);
+
+       state.index = queue_sel;
+       state.num = 0; /* no reservation */
+       dev->ops->send_request(dev, VHOST_USER_SET_VRING_BASE, &state);
+
+       dev->ops->send_request(dev, VHOST_USER_SET_VRING_ADDR, &addr);
+
+       /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes
+        * lastly because vhost depends on this msg to judge if
+        * virtio is ready.
+        */
+       kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+       if (kickfd < 0) {
+               PMD_DRV_LOG(ERR, "kickfd error, %s", strerror(errno));
+               return -1;
+       }
+       file.index = queue_sel;
+       file.fd = kickfd;
+       dev->ops->send_request(dev, VHOST_USER_SET_VRING_KICK, &file);
+       dev->kickfds[queue_sel] = kickfd;
+
+       return 0;
+}
+
+static int
+virtio_user_queue_setup(struct virtio_user_dev *dev,
+                       int (*fn)(struct virtio_user_dev *, uint32_t))
+{
+       uint32_t i, queue_sel;
+
+       for (i = 0; i < dev->max_queue_pairs; ++i) {
+               queue_sel = 2 * i + VTNET_SQ_RQ_QUEUE_IDX;
+               if (fn(dev, queue_sel) < 0) {
+                       PMD_DRV_LOG(INFO, "setup rx vq fails: %u", i);
+                       return -1;
+               }
+       }
+       for (i = 0; i < dev->max_queue_pairs; ++i) {
+               queue_sel = 2 * i + VTNET_SQ_TQ_QUEUE_IDX;
+               if (fn(dev, queue_sel) < 0) {
+                       PMD_DRV_LOG(INFO, "setup tx vq fails: %u", i);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+int
+virtio_user_start_device(struct virtio_user_dev *dev)
+{
+       uint64_t features;
+       int ret;
+
+       /* Step 0: tell vhost to create queues */
+       if (virtio_user_queue_setup(dev, virtio_user_create_queue) < 0)
+               goto error;
+
+       /* Step 1: set features */
+       features = dev->features;
+       /* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */
+       features &= ~(1ull << VIRTIO_NET_F_MAC);
+       /* Strip VIRTIO_NET_F_CTRL_VQ, as devices do not really need to know */
+       features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
+       ret = dev->ops->send_request(dev, VHOST_USER_SET_FEATURES, &features);
+       if (ret < 0)
+               goto error;
+       PMD_DRV_LOG(INFO, "set features: %" PRIx64, features);
+
+       /* Step 2: share memory regions */
+       ret = dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL);
+       if (ret < 0)
+               goto error;
+
+       /* Step 3: kick queues */
+       if (virtio_user_queue_setup(dev, virtio_user_kick_queue) < 0)
+               goto error;
+
+       /* Step 4: enable queues
+        * we enable the 1st queue pair by default.
+        */
+       dev->ops->enable_qp(dev, 0, 1);
+
+       return 0;
+error:
+       /* TODO: free resource here or caller to check */
+       return -1;
+}
+
+int virtio_user_stop_device(struct virtio_user_dev *dev)
+{
+       uint32_t i;
+
+       for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
+               close(dev->callfds[i]);
+               close(dev->kickfds[i]);
+       }
+
+       for (i = 0; i < dev->max_queue_pairs; ++i)
+               dev->ops->enable_qp(dev, i, 0);
+
+       free(dev->ifname);
+       dev->ifname = NULL;
+
+       return 0;
+}
+
+static inline void
+parse_mac(struct virtio_user_dev *dev, const char *mac)
+{
+       int i, r;
+       uint32_t tmp[ETHER_ADDR_LEN];
+
+       if (!mac)
+               return;
+
+       r = sscanf(mac, "%x:%x:%x:%x:%x:%x", &tmp[0],
+                       &tmp[1], &tmp[2], &tmp[3], &tmp[4], &tmp[5]);
+       if (r == ETHER_ADDR_LEN) {
+               for (i = 0; i < ETHER_ADDR_LEN; ++i)
+                       dev->mac_addr[i] = (uint8_t)tmp[i];
+               dev->mac_specified = 1;
+       } else {
+               /* ignore the wrong mac, use random mac */
+               PMD_DRV_LOG(ERR, "wrong format of mac: %s", mac);
+       }
+}
+
+static int
+is_vhost_user_by_type(const char *path)
+{
+       struct stat sb;
+
+       if (stat(path, &sb) == -1)
+               return 0;
+
+       return S_ISSOCK(sb.st_mode);
+}
+
+static int
+virtio_user_dev_setup(struct virtio_user_dev *dev)
+{
+       uint32_t i, q;
+
+       dev->vhostfd = -1;
+       for (i = 0; i < VIRTIO_MAX_VIRTQUEUES * 2 + 1; ++i) {
+               dev->kickfds[i] = -1;
+               dev->callfds[i] = -1;
+       }
+
+       dev->vhostfds = NULL;
+       dev->tapfds = NULL;
+
+       if (is_vhost_user_by_type(dev->path)) {
+               dev->ops = &ops_user;
+       } else {
+               dev->ops = &ops_kernel;
+
+               dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
+               dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
+               if (!dev->vhostfds || !dev->tapfds) {
+                       PMD_INIT_LOG(ERR, "Failed to malloc");
+                       return -1;
+               }
+
+               for (q = 0; q < dev->max_queue_pairs; ++q) {
+                       dev->vhostfds[q] = -1;
+                       dev->tapfds[q] = -1;
+               }
+       }
+
+       return dev->ops->setup(dev);
+}
+
+int
+virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
+                    int cq, int queue_size, const char *mac)
+{
+       snprintf(dev->path, PATH_MAX, "%s", path);
+       dev->max_queue_pairs = queues;
+       dev->queue_pairs = 1; /* mq disabled by default */
+       dev->queue_size = queue_size;
+       dev->mac_specified = 0;
+       parse_mac(dev, mac);
+
+       if (virtio_user_dev_setup(dev) < 0) {
+               PMD_INIT_LOG(ERR, "backend set up fails");
+               return -1;
+       }
+       if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL) < 0) {
+               PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
+               return -1;
+       }
+
+       if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
+                           &dev->device_features) < 0) {
+               PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno));
+               return -1;
+       }
+       if (dev->mac_specified)
+               dev->device_features |= (1ull << VIRTIO_NET_F_MAC);
+
+       if (cq) {
+               /* device does not really need to know anything about CQ,
+                * so if necessary, we just claim to support CQ
+                */
+               dev->device_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
+       } else {
+               dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
+               /* Also disable features depends on VIRTIO_NET_F_CTRL_VQ */
+               dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_RX);
+               dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN);
+               dev->device_features &= ~(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE);
+               dev->device_features &= ~(1ull << VIRTIO_NET_F_MQ);
+               dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR);
+       }
+
+       return 0;
+}
+
+void
+virtio_user_dev_uninit(struct virtio_user_dev *dev)
+{
+       uint32_t i;
+
+       virtio_user_stop_device(dev);
+
+       close(dev->vhostfd);
+
+       if (dev->vhostfds) {
+               for (i = 0; i < dev->max_queue_pairs; ++i)
+                       close(dev->vhostfds[i]);
+               free(dev->vhostfds);
+               free(dev->tapfds);
+       }
+}
+
+static uint8_t
+virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs)
+{
+       uint16_t i;
+       uint8_t ret = 0;
+
+       if (q_pairs > dev->max_queue_pairs) {
+               PMD_INIT_LOG(ERR, "multi-q config %u, but only %u supported",
+                            q_pairs, dev->max_queue_pairs);
+               return -1;
+       }
+
+       for (i = 0; i < q_pairs; ++i)
+               ret |= dev->ops->enable_qp(dev, i, 1);
+       for (i = q_pairs; i < dev->max_queue_pairs; ++i)
+               ret |= dev->ops->enable_qp(dev, i, 0);
+
+       dev->queue_pairs = q_pairs;
+
+       return ret;
+}
+
+static uint32_t
+virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring,
+                           uint16_t idx_hdr)
+{
+       struct virtio_net_ctrl_hdr *hdr;
+       virtio_net_ctrl_ack status = ~0;
+       uint16_t i, idx_data, idx_status;
+       uint32_t n_descs = 0;
+
+       /* locate desc for header, data, and status */
+       idx_data = vring->desc[idx_hdr].next;
+       n_descs++;
+
+       i = idx_data;
+       while (vring->desc[i].flags == VRING_DESC_F_NEXT) {
+               i = vring->desc[i].next;
+               n_descs++;
+       }
+
+       /* locate desc for status */
+       idx_status = i;
+       n_descs++;
+
+       hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr;
+       if (hdr->class == VIRTIO_NET_CTRL_MQ &&
+           hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
+               uint16_t queues;
+
+               queues = *(uint16_t *)(uintptr_t)vring->desc[idx_data].addr;
+               status = virtio_user_handle_mq(dev, queues);
+       }
+
+       /* Update status */
+       *(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status;
+
+       return n_descs;
+}
+
+void
+virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx)
+{
+       uint16_t avail_idx, desc_idx;
+       struct vring_used_elem *uep;
+       uint32_t n_descs;
+       struct vring *vring = &dev->vrings[queue_idx];
+
+       /* Consume avail ring, using used ring idx as first one */
+       while (vring->used->idx != vring->avail->idx) {
+               avail_idx = (vring->used->idx) & (vring->num - 1);
+               desc_idx = vring->avail->ring[avail_idx];
+
+               n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx);
+
+               /* Update used ring */
+               uep = &vring->used->ring[avail_idx];
+               uep->id = avail_idx;
+               uep->len = n_descs;
+
+               vring->used->idx++;
+       }
+}
diff --git a/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h b/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h
new file mode 100644 (file)
index 0000000..0d39f40
--- /dev/null
@@ -0,0 +1,75 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_USER_DEV_H
+#define _VIRTIO_USER_DEV_H
+
+#include <limits.h>
+#include "../virtio_pci.h"
+#include "../virtio_ring.h"
+#include "vhost.h"
+
+struct virtio_user_dev {
+       /* for vhost_user backend */
+       int             vhostfd;
+
+       /* for vhost_kernel backend */
+       char            *ifname;
+       int             *vhostfds;
+       int             *tapfds;
+
+       /* for both vhost_user and vhost_kernel */
+       int             callfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1];
+       int             kickfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1];
+       int             mac_specified;
+       uint32_t        max_queue_pairs;
+       uint32_t        queue_pairs;
+       uint32_t        queue_size;
+       uint64_t        features; /* the negotiated features with driver,
+                                  * and will be sync with device
+                                  */
+       uint64_t        device_features; /* supported features by device */
+       uint8_t         status;
+       uint8_t         mac_addr[ETHER_ADDR_LEN];
+       char            path[PATH_MAX];
+       struct vring    vrings[VIRTIO_MAX_VIRTQUEUES * 2 + 1];
+       struct virtio_user_backend_ops *ops;
+};
+
+int virtio_user_start_device(struct virtio_user_dev *dev);
+int virtio_user_stop_device(struct virtio_user_dev *dev);
+int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
+                        int cq, int queue_size, const char *mac);
+void virtio_user_dev_uninit(struct virtio_user_dev *dev);
+void virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx);
+#endif
index daef09b..0b226ac 100644 (file)
@@ -37,6 +37,7 @@
 
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
+#include <rte_vdev.h>
 
 #include "virtio_ethdev.h"
 #include "virtio_logs.h"
@@ -81,26 +82,29 @@ virtio_user_write_dev_config(struct virtio_hw *hw, size_t offset,
                for (i = 0; i < ETHER_ADDR_LEN; ++i)
                        dev->mac_addr[i] = ((const uint8_t *)src)[i];
        else
-               PMD_DRV_LOG(ERR, "not supported offset=%zu, len=%d\n",
+               PMD_DRV_LOG(ERR, "not supported offset=%zu, len=%d",
                            offset, length);
 }
 
 static void
-virtio_user_set_status(struct virtio_hw *hw, uint8_t status)
+virtio_user_reset(struct virtio_hw *hw)
 {
        struct virtio_user_dev *dev = virtio_user_get_dev(hw);
 
-       if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK)
-               virtio_user_start_device(dev);
-       dev->status = status;
+       if (dev->status & VIRTIO_CONFIG_STATUS_DRIVER_OK)
+               virtio_user_stop_device(dev);
 }
 
 static void
-virtio_user_reset(struct virtio_hw *hw)
+virtio_user_set_status(struct virtio_hw *hw, uint8_t status)
 {
        struct virtio_user_dev *dev = virtio_user_get_dev(hw);
 
-       virtio_user_stop_device(dev);
+       if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK)
+               virtio_user_start_device(dev);
+       else if (status == VIRTIO_CONFIG_STATUS_RESET)
+               virtio_user_reset(hw);
+       dev->status = status;
 }
 
 static uint8_t
@@ -116,7 +120,8 @@ virtio_user_get_features(struct virtio_hw *hw)
 {
        struct virtio_user_dev *dev = virtio_user_get_dev(hw);
 
-       return dev->features;
+       /* unmask feature bits defined in vhost user protocol */
+       return dev->device_features & VIRTIO_PMD_SUPPORTED_GUEST_FEATURES;
 }
 
 static void
@@ -124,7 +129,7 @@ virtio_user_set_features(struct virtio_hw *hw, uint64_t features)
 {
        struct virtio_user_dev *dev = virtio_user_get_dev(hw);
 
-       dev->features = features;
+       dev->features = features & dev->device_features;
 }
 
 static uint8_t
@@ -207,11 +212,11 @@ virtio_user_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
        }
 
        if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0)
-               PMD_DRV_LOG(ERR, "failed to kick backend: %s\n",
+               PMD_DRV_LOG(ERR, "failed to kick backend: %s",
                            strerror(errno));
 }
 
-static const struct virtio_pci_ops virtio_user_ops = {
+const struct virtio_pci_ops virtio_user_ops = {
        .read_dev_cfg   = virtio_user_read_dev_config,
        .write_dev_cfg  = virtio_user_write_dev_config,
        .reset          = virtio_user_reset,
@@ -269,6 +274,8 @@ get_integer_arg(const char *key __rte_unused,
        return 0;
 }
 
+static struct rte_vdev_driver virtio_user_driver;
+
 static struct rte_eth_dev *
 virtio_user_eth_dev_alloc(const char *name)
 {
@@ -277,7 +284,7 @@ virtio_user_eth_dev_alloc(const char *name)
        struct virtio_hw *hw;
        struct virtio_user_dev *dev;
 
-       eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+       eth_dev = rte_eth_dev_allocate(name);
        if (!eth_dev) {
                PMD_INIT_LOG(ERR, "cannot alloc rte_eth_dev");
                return NULL;
@@ -300,25 +307,38 @@ virtio_user_eth_dev_alloc(const char *name)
                return NULL;
        }
 
-       hw->vtpci_ops = &virtio_user_ops;
+       hw->port_id = data->port_id;
+       virtio_hw_internal[hw->port_id].vtpci_ops = &virtio_user_ops;
        hw->use_msix = 0;
        hw->modern   = 0;
+       hw->use_simple_rxtx = 0;
        hw->virtio_user_dev = dev;
        data->dev_private = hw;
+       data->drv_name = virtio_user_driver.driver.name;
        data->numa_node = SOCKET_ID_ANY;
        data->kdrv = RTE_KDRV_NONE;
        data->dev_flags = RTE_ETH_DEV_DETACHABLE;
-       eth_dev->pci_dev = NULL;
        eth_dev->driver = NULL;
        return eth_dev;
 }
 
+static void
+virtio_user_eth_dev_free(struct rte_eth_dev *eth_dev)
+{
+       struct rte_eth_dev_data *data = eth_dev->data;
+       struct virtio_hw *hw = data->dev_private;
+
+       rte_free(hw->virtio_user_dev);
+       rte_free(hw);
+       rte_eth_dev_release_port(eth_dev);
+}
+
 /* Dev initialization routine. Invoked once for each virtio vdev at
  * EAL init time, see rte_eal_dev_init().
  * Returns 0 on success.
  */
 static int
-virtio_user_pmd_devinit(const char *name, const char *params)
+virtio_user_pmd_probe(const char *name, const char *params)
 {
        struct rte_kvargs *kvlist = NULL;
        struct rte_eth_dev *eth_dev;
@@ -343,23 +363,21 @@ virtio_user_pmd_devinit(const char *name, const char *params)
        }
 
        if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_PATH) == 1) {
-               ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH,
-                                        &get_string_arg, &path);
-               if (ret < 0) {
+               if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH,
+                                      &get_string_arg, &path) < 0) {
                        PMD_INIT_LOG(ERR, "error to parse %s",
                                     VIRTIO_USER_ARG_PATH);
                        goto end;
                }
        } else {
-               PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user\n",
+               PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user",
                          VIRTIO_USER_ARG_QUEUE_SIZE);
                goto end;
        }
 
        if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_MAC) == 1) {
-               ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC,
-                                        &get_string_arg, &mac_addr);
-               if (ret < 0) {
+               if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC,
+                                      &get_string_arg, &mac_addr) < 0) {
                        PMD_INIT_LOG(ERR, "error to parse %s",
                                     VIRTIO_USER_ARG_MAC);
                        goto end;
@@ -367,9 +385,8 @@ virtio_user_pmd_devinit(const char *name, const char *params)
        }
 
        if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE) == 1) {
-               ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE,
-                                        &get_integer_arg, &queue_size);
-               if (ret < 0) {
+               if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE,
+                                      &get_integer_arg, &queue_size) < 0) {
                        PMD_INIT_LOG(ERR, "error to parse %s",
                                     VIRTIO_USER_ARG_QUEUE_SIZE);
                        goto end;
@@ -377,9 +394,8 @@ virtio_user_pmd_devinit(const char *name, const char *params)
        }
 
        if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUES_NUM) == 1) {
-               ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUES_NUM,
-                                        &get_integer_arg, &queues);
-               if (ret < 0) {
+               if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUES_NUM,
+                                      &get_integer_arg, &queues) < 0) {
                        PMD_INIT_LOG(ERR, "error to parse %s",
                                     VIRTIO_USER_ARG_QUEUES_NUM);
                        goto end;
@@ -387,9 +403,8 @@ virtio_user_pmd_devinit(const char *name, const char *params)
        }
 
        if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
-               ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
-                                        &get_integer_arg, &cq);
-               if (ret < 0) {
+               if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
+                                      &get_integer_arg, &cq) < 0) {
                        PMD_INIT_LOG(ERR, "error to parse %s",
                                     VIRTIO_USER_ARG_CQ_NUM);
                        goto end;
@@ -411,12 +426,16 @@ virtio_user_pmd_devinit(const char *name, const char *params)
 
        hw = eth_dev->data->dev_private;
        if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
-                                queue_size, mac_addr) < 0)
+                                queue_size, mac_addr) < 0) {
+               PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
+               virtio_user_eth_dev_free(eth_dev);
                goto end;
+       }
 
        /* previously called by rte_eal_pci_probe() for physical dev */
        if (eth_virtio_dev_init(eth_dev) < 0) {
                PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails");
+               virtio_user_eth_dev_free(eth_dev);
                goto end;
        }
        ret = 0;
@@ -433,7 +452,7 @@ end:
 
 /** Called by rte_eth_dev_detach() */
 static int
-virtio_user_pmd_devuninit(const char *name)
+virtio_user_pmd_remove(const char *name)
 {
        struct rte_eth_dev *eth_dev;
        struct virtio_hw *hw;
@@ -442,7 +461,7 @@ virtio_user_pmd_devuninit(const char *name)
        if (!name)
                return -EINVAL;
 
-       PMD_DRV_LOG(INFO, "Un-Initializing %s\n", name);
+       PMD_DRV_LOG(INFO, "Un-Initializing %s", name);
        eth_dev = rte_eth_dev_allocated(name);
        if (!eth_dev)
                return -ENODEV;
@@ -461,14 +480,14 @@ virtio_user_pmd_devuninit(const char *name)
        return 0;
 }
 
-static struct rte_driver virtio_user_driver = {
-       .type   = PMD_VDEV,
-       .init   = virtio_user_pmd_devinit,
-       .uninit = virtio_user_pmd_devuninit,
+static struct rte_vdev_driver virtio_user_driver = {
+       .probe = virtio_user_pmd_probe,
+       .remove = virtio_user_pmd_remove,
 };
 
-PMD_REGISTER_DRIVER(virtio_user_driver, virtio_user);
-DRIVER_REGISTER_PARAM_STRING(virtio_user,
+RTE_PMD_REGISTER_VDEV(net_virtio_user, virtio_user_driver);
+RTE_PMD_REGISTER_ALIAS(net_virtio_user, virtio_user);
+RTE_PMD_REGISTER_PARAM_STRING(net_virtio_user,
        "path=<path> "
        "mac=<mac addr> "
        "cq=<int> "
index 7f60e3e..9ad77b8 100644 (file)
 #include "virtio_logs.h"
 #include "virtio_pci.h"
 
-void
-virtqueue_disable_intr(struct virtqueue *vq)
-{
-       /*
-        * Set VRING_AVAIL_F_NO_INTERRUPT to hint host
-        * not to interrupt when it consumes packets
-        * Note: this is only considered a hint to the host
-        */
-       vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
-}
-
 /*
  * Two types of mbuf to be cleaned:
  * 1) mbuf that has been consumed by backend but not used by virtio.
index 6737b81..f9e3736 100644 (file)
@@ -44,6 +44,7 @@
 #include "virtio_pci.h"
 #include "virtio_ring.h"
 #include "virtio_logs.h"
+#include "virtio_rxtx.h"
 
 struct rte_mbuf;
 
@@ -191,6 +192,12 @@ struct virtqueue {
        void *vq_ring_virt_mem;  /**< linear address of vring*/
        unsigned int vq_ring_size;
 
+       union {
+               struct virtnet_rx rxq;
+               struct virtnet_tx txq;
+               struct virtnet_ctl cq;
+       };
+
        phys_addr_t vq_ring_mem; /**< physical address of vring,
                                  * or virtual address for virtio_user. */
 
@@ -204,7 +211,6 @@ struct virtqueue {
        uint16_t  vq_queue_index;   /**< PCI queue index */
        uint16_t offset; /**< relative offset to obtain addr in mbuf */
        uint16_t  *notify_addr;
-       int configured;
        struct rte_mbuf **sw_ring;  /**< RX software ring. */
        struct vq_desc_extra vq_descx[0];
 };
@@ -223,6 +229,7 @@ struct virtqueue {
  */
 struct virtio_net_hdr {
 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1    /**< Use csum_start,csum_offset*/
+#define VIRTIO_NET_HDR_F_DATA_VALID 2    /**< Checksum is valid */
        uint8_t flags;
 #define VIRTIO_NET_HDR_GSO_NONE     0    /**< Not a GSO frame */
 #define VIRTIO_NET_HDR_GSO_TCPV4    1    /**< GSO frame, IPv4 TCP (TSO) */
@@ -267,7 +274,21 @@ vring_desc_init(struct vring_desc *dp, uint16_t n)
 /**
  * Tell the backend not to interrupt us.
  */
-void virtqueue_disable_intr(struct virtqueue *vq);
+static inline void
+virtqueue_disable_intr(struct virtqueue *vq)
+{
+       vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
+}
+
+/**
+ * Tell the backend to interrupt us.
+ */
+static inline void
+virtqueue_enable_intr(struct virtqueue *vq)
+{
+       vq->vq_ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT);
+}
+
 /**
  *  Dump virtqueue internal structures, for debug purpose only.
  */
@@ -323,7 +344,7 @@ virtqueue_notify(struct virtqueue *vq)
         * For virtio on IA, the notificaiton is through io port operation
         * which is a serialization instruction itself.
         */
-       vq->hw->vtpci_ops->notify_queue(vq->hw, vq);
+       VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
 }
 
 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
index 47fdc3e..ff63a53 100644 (file)
@@ -69,6 +69,8 @@
 
 #define PROCESS_SYS_EVENTS 0
 
+#define        VMXNET3_TX_MAX_SEG      UINT8_MAX
+
 static int eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev);
 static int vmxnet3_dev_configure(struct rte_eth_dev *dev);
@@ -81,11 +83,11 @@ static void vmxnet3_dev_promiscuous_disable(struct rte_eth_dev *dev);
 static void vmxnet3_dev_allmulticast_enable(struct rte_eth_dev *dev);
 static void vmxnet3_dev_allmulticast_disable(struct rte_eth_dev *dev);
 static int vmxnet3_dev_link_update(struct rte_eth_dev *dev,
-                               int wait_to_complete);
+                                  int wait_to_complete);
 static void vmxnet3_dev_stats_get(struct rte_eth_dev *dev,
-                               struct rte_eth_stats *stats);
+                                 struct rte_eth_stats *stats);
 static void vmxnet3_dev_info_get(struct rte_eth_dev *dev,
-                               struct rte_eth_dev_info *dev_info);
+                                struct rte_eth_dev_info *dev_info);
 static const uint32_t *
 vmxnet3_dev_supported_ptypes_get(struct rte_eth_dev *dev);
 static int vmxnet3_dev_vlan_filter_set(struct rte_eth_dev *dev,
@@ -118,7 +120,7 @@ static const struct eth_dev_ops vmxnet3_eth_dev_ops = {
        .allmulticast_disable = vmxnet3_dev_allmulticast_disable,
        .link_update          = vmxnet3_dev_link_update,
        .stats_get            = vmxnet3_dev_stats_get,
-       .mac_addr_set         = vmxnet3_mac_addr_set,
+       .mac_addr_set         = vmxnet3_mac_addr_set,
        .dev_infos_get        = vmxnet3_dev_info_get,
        .dev_supported_ptypes_get = vmxnet3_dev_supported_ptypes_get,
        .vlan_filter_set      = vmxnet3_dev_vlan_filter_set,
@@ -131,20 +133,27 @@ static const struct eth_dev_ops vmxnet3_eth_dev_ops = {
 
 static const struct rte_memzone *
 gpa_zone_reserve(struct rte_eth_dev *dev, uint32_t size,
-               const char *post_string, int socket_id, uint16_t align)
+                const char *post_string, int socket_id,
+                uint16_t align, bool reuse)
 {
        char z_name[RTE_MEMZONE_NAMESIZE];
        const struct rte_memzone *mz;
 
        snprintf(z_name, sizeof(z_name), "%s_%d_%s",
-                                       dev->driver->pci_drv.name, dev->data->port_id, post_string);
+                dev->data->drv_name, dev->data->port_id, post_string);
 
        mz = rte_memzone_lookup(z_name);
+       if (!reuse) {
+               if (mz)
+                       rte_memzone_free(mz);
+               return rte_memzone_reserve_aligned(z_name, size, socket_id,
+                                                  0, align);
+       }
+
        if (mz)
                return mz;
 
-       return rte_memzone_reserve_aligned(z_name, size,
-                       socket_id, 0, align);
+       return rte_memzone_reserve_aligned(z_name, size, socket_id, 0, align);
 }
 
 /**
@@ -194,7 +203,7 @@ vmxnet3_dev_atomic_write_link_status(struct rte_eth_dev *dev,
        struct rte_eth_link *src = link;
 
        if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-                                       *(uint64_t *)src) == 0)
+                               *(uint64_t *)src) == 0)
                return -1;
 
        return 0;
@@ -212,7 +221,7 @@ vmxnet3_disable_intr(struct vmxnet3_hw *hw)
 
        hw->shared->devRead.intrConf.intrCtrl |= VMXNET3_IC_DISABLE_ALL;
        for (i = 0; i < VMXNET3_MAX_INTRS; i++)
-                       VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + i * 8, 1);
+               VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + i * 8, 1);
 }
 
 /*
@@ -230,7 +239,8 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev)
        eth_dev->dev_ops = &vmxnet3_eth_dev_ops;
        eth_dev->rx_pkt_burst = &vmxnet3_recv_pkts;
        eth_dev->tx_pkt_burst = &vmxnet3_xmit_pkts;
-       pci_dev = eth_dev->pci_dev;
+       eth_dev->tx_pkt_prepare = vmxnet3_prep_pkts;
+       pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
 
        /*
         * for secondary processes, we don't initialize any further as primary
@@ -240,6 +250,7 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev)
                return 0;
 
        rte_eth_copy_pci_info(eth_dev, pci_dev);
+       eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 
        /* Vendor and Device ID need to be set before init of shared code */
        hw->device_id = pci_dev->id.device_id;
@@ -274,8 +285,8 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev)
        /* Getting MAC Address */
        mac_lo = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_MACL);
        mac_hi = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_MACH);
-       memcpy(hw->perm_addr  , &mac_lo, 4);
-       memcpy(hw->perm_addr+4, &mac_hi, 2);
+       memcpy(hw->perm_addr, &mac_lo, 4);
+       memcpy(hw->perm_addr + 4, &mac_hi, 2);
 
        /* Allocate memory for storing MAC addresses */
        eth_dev->data->mac_addrs = rte_zmalloc("vmxnet3", ETHER_ADDR_LEN *
@@ -319,6 +330,7 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev)
        eth_dev->dev_ops = NULL;
        eth_dev->rx_pkt_burst = NULL;
        eth_dev->tx_pkt_burst = NULL;
+       eth_dev->tx_pkt_prepare = NULL;
 
        rte_free(eth_dev->data->mac_addrs);
        eth_dev->data->mac_addrs = NULL;
@@ -328,29 +340,16 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev)
 
 static struct eth_driver rte_vmxnet3_pmd = {
        .pci_drv = {
-               .name = "rte_vmxnet3_pmd",
                .id_table = pci_id_vmxnet3_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+               .probe = rte_eth_dev_pci_probe,
+               .remove = rte_eth_dev_pci_remove,
        },
        .eth_dev_init = eth_vmxnet3_dev_init,
        .eth_dev_uninit = eth_vmxnet3_dev_uninit,
        .dev_private_size = sizeof(struct vmxnet3_hw),
 };
 
-/*
- * Driver initialization routine.
- * Invoked once at EAL init time.
- * Register itself as the [Poll Mode] Driver of Virtual PCI VMXNET3 devices.
- */
-static int
-rte_vmxnet3_pmd_init(const char *name __rte_unused, const char *param __rte_unused)
-{
-       PMD_INIT_FUNC_TRACE();
-
-       rte_eth_driver_register(&rte_vmxnet3_pmd);
-       return 0;
-}
-
 static int
 vmxnet3_dev_configure(struct rte_eth_dev *dev)
 {
@@ -360,9 +359,16 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev)
 
        PMD_INIT_FUNC_TRACE();
 
-       if (dev->data->nb_rx_queues > UINT8_MAX ||
-           dev->data->nb_tx_queues > UINT8_MAX)
+       if (dev->data->nb_tx_queues > VMXNET3_MAX_TX_QUEUES ||
+           dev->data->nb_rx_queues > VMXNET3_MAX_RX_QUEUES) {
+               PMD_INIT_LOG(ERR, "ERROR: Number of queues not supported");
                return -EINVAL;
+       }
+
+       if (!rte_is_power_of_2(dev->data->nb_rx_queues)) {
+               PMD_INIT_LOG(ERR, "ERROR: Number of rx queues not power of 2");
+               return -EINVAL;
+       }
 
        size = dev->data->nb_rx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
                dev->data->nb_tx_queues * sizeof(struct Vmxnet3_RxQueueDesc);
@@ -378,7 +384,7 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev)
         * on current socket
         */
        mz = gpa_zone_reserve(dev, sizeof(struct Vmxnet3_DriverShared),
-                             "shared", rte_socket_id(), 8);
+                             "shared", rte_socket_id(), 8, 1);
 
        if (mz == NULL) {
                PMD_INIT_LOG(ERR, "ERROR: Creating shared zone");
@@ -391,10 +397,14 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev)
 
        /*
         * Allocate a memzone for Vmxnet3_RxQueueDesc - Vmxnet3_TxQueueDesc
-        * on current socket
+        * on current socket.
+        *
+        * We cannot reuse this memzone from previous allocation as its size
+        * depends on the number of tx and rx queues, which could be different
+        * from one config to another.
         */
-       mz = gpa_zone_reserve(dev, size, "queuedesc",
-                             rte_socket_id(), VMXNET3_QUEUE_DESC_ALIGN);
+       mz = gpa_zone_reserve(dev, size, "queuedesc", rte_socket_id(),
+                             VMXNET3_QUEUE_DESC_ALIGN, 0);
        if (mz == NULL) {
                PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
                return -ENOMEM;
@@ -408,10 +418,10 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev)
        hw->queue_desc_len = (uint16_t)size;
 
        if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) {
-
                /* Allocate memory structure for UPT1_RSSConf and configure */
-               mz = gpa_zone_reserve(dev, sizeof(struct VMXNET3_RSSConf), "rss_conf",
-                                     rte_socket_id(), RTE_CACHE_LINE_SIZE);
+               mz = gpa_zone_reserve(dev, sizeof(struct VMXNET3_RSSConf),
+                                     "rss_conf", rte_socket_id(),
+                                     RTE_CACHE_LINE_SIZE, 1);
                if (mz == NULL) {
                        PMD_INIT_LOG(ERR,
                                     "ERROR: Creating rss_conf structure zone");
@@ -459,8 +469,7 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
 
        /* Setting up Guest OS information */
        devRead->misc.driverInfo.gos.gosBits   = sizeof(void *) == 4 ?
-               VMXNET3_GOS_BITS_32 :
-               VMXNET3_GOS_BITS_64;
+               VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64;
        devRead->misc.driverInfo.gos.gosType   = VMXNET3_GOS_TYPE_LINUX;
        devRead->misc.driverInfo.vmxnet3RevSpt = 1;
        devRead->misc.driverInfo.uptVerSpt     = 1;
@@ -523,6 +532,11 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
        if (dev->data->dev_conf.rxmode.hw_ip_checksum)
                devRead->misc.uptFeatures |= VMXNET3_F_RXCSUM;
 
+       if (dev->data->dev_conf.rxmode.enable_lro) {
+               devRead->misc.uptFeatures |= VMXNET3_F_LRO;
+               devRead->misc.maxNumRxSG = 0;
+       }
+
        if (port_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) {
                ret = vmxnet3_rss_configure(dev);
                if (ret != VMXNET3_SUCCESS)
@@ -535,7 +549,7 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
        }
 
        vmxnet3_dev_vlan_offload_set(dev,
-                            ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK);
+                                    ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK);
 
        vmxnet3_write_mac(hw, hw->perm_addr);
 
@@ -550,7 +564,7 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
 static int
 vmxnet3_dev_start(struct rte_eth_dev *dev)
 {
-       int status, ret;
+       int ret;
        struct vmxnet3_hw *hw = dev->data->dev_private;
 
        PMD_INIT_FUNC_TRACE();
@@ -567,11 +581,11 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
 
        /* Activate device by register write */
        VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_ACTIVATE_DEV);
-       status = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
+       ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
 
-       if (status != 0) {
+       if (ret != 0) {
                PMD_INIT_LOG(ERR, "Device activation: UNSUCCESSFUL");
-               return -1;
+               return -EINVAL;
        }
 
        /* Disable interrupts */
@@ -583,7 +597,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
         */
        ret = vmxnet3_dev_rxtx_init(dev);
        if (ret != VMXNET3_SUCCESS) {
-               PMD_INIT_LOG(ERR, "Device receive init: UNSUCCESSFUL");
+               PMD_INIT_LOG(ERR, "Device queue init: UNSUCCESSFUL");
                return ret;
        }
 
@@ -598,7 +612,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev)
        PMD_INIT_LOG(DEBUG, "Reading events: 0x%X", events);
        vmxnet3_process_events(hw);
 #endif
-       return status;
+       return VMXNET3_SUCCESS;
 }
 
 /*
@@ -664,16 +678,15 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
                struct UPT1_TxStats *txStats = &hw->tqd_start[i].stats;
 
                stats->q_opackets[i] = txStats->ucastPktsTxOK +
-                       txStats->mcastPktsTxOK +
-                       txStats->bcastPktsTxOK;
+                                       txStats->mcastPktsTxOK +
+                                       txStats->bcastPktsTxOK;
                stats->q_obytes[i] = txStats->ucastBytesTxOK +
-                       txStats->mcastBytesTxOK +
-                       txStats->bcastBytesTxOK;
+                                       txStats->mcastBytesTxOK +
+                                       txStats->bcastBytesTxOK;
 
                stats->opackets += stats->q_opackets[i];
                stats->obytes += stats->q_obytes[i];
-               stats->oerrors += txStats->pktsTxError +
-                       txStats->pktsTxDiscard;
+               stats->oerrors += txStats->pktsTxError + txStats->pktsTxDiscard;
        }
 
        RTE_BUILD_BUG_ON(RTE_ETHDEV_QUEUE_STAT_CNTRS < VMXNET3_MAX_RX_QUEUES);
@@ -681,12 +694,12 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
                struct UPT1_RxStats *rxStats = &hw->rqd_start[i].stats;
 
                stats->q_ipackets[i] = rxStats->ucastPktsRxOK +
-                       rxStats->mcastPktsRxOK +
-                       rxStats->bcastPktsRxOK;
+                                       rxStats->mcastPktsRxOK +
+                                       rxStats->bcastPktsRxOK;
 
                stats->q_ibytes[i] = rxStats->ucastBytesRxOK +
-                       rxStats->mcastBytesRxOK +
-                       rxStats->bcastBytesRxOK;
+                                       rxStats->mcastBytesRxOK +
+                                       rxStats->bcastBytesRxOK;
 
                stats->ipackets += stats->q_ipackets[i];
                stats->ibytes += stats->q_ibytes[i];
@@ -698,16 +711,17 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 }
 
 static void
-vmxnet3_dev_info_get(__attribute__((unused))struct rte_eth_dev *dev,
+vmxnet3_dev_info_get(struct rte_eth_dev *dev,
                     struct rte_eth_dev_info *dev_info)
 {
+       dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device);
+
        dev_info->max_rx_queues = VMXNET3_MAX_RX_QUEUES;
        dev_info->max_tx_queues = VMXNET3_MAX_TX_QUEUES;
        dev_info->min_rx_bufsize = 1518 + RTE_PKTMBUF_HEADROOM;
        dev_info->max_rx_pktlen = 16384; /* includes CRC, cf MAXFRS register */
-       dev_info->max_mac_addrs = VMXNET3_MAX_MAC_ADDRS;
-       /* TRex patch */
        dev_info->speed_capa = ETH_LINK_SPEED_10G;
+       dev_info->max_mac_addrs = VMXNET3_MAX_MAC_ADDRS;
 
        dev_info->default_txconf.txq_flags = ETH_TXQ_FLAGS_NOXSUMSCTP;
        dev_info->flow_type_rss_offloads = VMXNET3_RSS_OFFLOAD_ALL;
@@ -722,12 +736,15 @@ vmxnet3_dev_info_get(__attribute__((unused))struct rte_eth_dev *dev,
                .nb_max = VMXNET3_TX_RING_MAX_SIZE,
                .nb_min = VMXNET3_DEF_TX_RING_SIZE,
                .nb_align = 1,
+               .nb_seg_max = VMXNET3_TX_MAX_SEG,
+               .nb_mtu_seg_max = VMXNET3_MAX_TXD_PER_PKT,
        };
 
        dev_info->rx_offload_capa =
                DEV_RX_OFFLOAD_VLAN_STRIP |
                DEV_RX_OFFLOAD_UDP_CKSUM |
-               DEV_RX_OFFLOAD_TCP_CKSUM;
+               DEV_RX_OFFLOAD_TCP_CKSUM |
+               DEV_RX_OFFLOAD_TCP_LRO;
 
        dev_info->tx_offload_capa =
                DEV_TX_OFFLOAD_VLAN_INSERT |
@@ -760,14 +777,16 @@ vmxnet3_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 
 /* return 0 means link status changed, -1 means not changed */
 static int
-vmxnet3_dev_link_update(struct rte_eth_dev *dev, __attribute__((unused)) int wait_to_complete)
+vmxnet3_dev_link_update(struct rte_eth_dev *dev,
+                       __rte_unused int wait_to_complete)
 {
        struct vmxnet3_hw *hw = dev->data->dev_private;
        struct rte_eth_link old, link;
        uint32_t ret;
 
+       /* Link status doesn't change for stopped dev */
        if (dev->data->dev_started == 0)
-               return -1; /* Link status doesn't change for stopped dev */
+               return -1;
 
        memset(&link, 0, sizeof(link));
        vmxnet3_dev_atomic_read_link_status(dev, &old);
@@ -789,8 +808,8 @@ vmxnet3_dev_link_update(struct rte_eth_dev *dev, __attribute__((unused)) int wai
 
 /* Updating rxmode through Vmxnet3_DriverShared structure in adapter */
 static void
-vmxnet3_dev_set_rxmode(struct vmxnet3_hw *hw, uint32_t feature, int set) {
-
+vmxnet3_dev_set_rxmode(struct vmxnet3_hw *hw, uint32_t feature, int set)
+{
        struct Vmxnet3_RxFilterConf *rxConf = &hw->shared->devRead.rxFilterConf;
 
        if (set)
@@ -923,11 +942,13 @@ vmxnet3_process_events(struct vmxnet3_hw *hw)
        /* Check if link state has changed */
        if (events & VMXNET3_ECR_LINK)
                PMD_INIT_LOG(ERR,
-                            "Process events in %s(): VMXNET3_ECR_LINK event", __func__);
+                            "Process events in %s(): VMXNET3_ECR_LINK event",
+                            __func__);
 
        /* Check if there is an error on xmit/recv queues */
        if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
-               VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_QUEUE_STATUS);
+               VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
+                                      VMXNET3_CMD_GET_QUEUE_STATUS);
 
                if (hw->tqd_start->status.stopped)
                        PMD_INIT_LOG(ERR, "tq error 0x%x",
@@ -946,14 +967,9 @@ vmxnet3_process_events(struct vmxnet3_hw *hw)
 
        if (events & VMXNET3_ECR_DEBUG)
                PMD_INIT_LOG(ERR, "Debug event generated by device.");
-
 }
 #endif
 
-static struct rte_driver rte_vmxnet3_driver = {
-       .type = PMD_PDEV,
-       .init = rte_vmxnet3_pmd_init,
-};
-
-PMD_REGISTER_DRIVER(rte_vmxnet3_driver, vmxnet3);
-DRIVER_REGISTER_PCI_TABLE(vmxnet3, pci_id_vmxnet3_map);
+RTE_PMD_REGISTER_PCI(net_vmxnet3, rte_vmxnet3_pmd.pci_drv);
+RTE_PMD_REGISTER_PCI_TABLE(net_vmxnet3, pci_id_vmxnet3_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_vmxnet3, "* igb_uio | uio_pci_generic | vfio");
index 1be833a..348c840 100644 (file)
@@ -34,6 +34,8 @@
 #ifndef _VMXNET3_ETHDEV_H_
 #define _VMXNET3_ETHDEV_H_
 
+#include <rte_io.h>
+
 #define VMXNET3_MAX_MAC_ADDRS 1
 
 /* UPT feature to negotiate */
@@ -62,8 +64,7 @@
        ETH_RSS_NONFRAG_IPV6_TCP)
 
 /* RSS configuration structure - shared with device through GPA */
-typedef
-struct VMXNET3_RSSConf {
+typedef struct VMXNET3_RSSConf {
        uint16_t   hashType;
        uint16_t   hashFunc;
        uint16_t   hashKeySize;
@@ -76,15 +77,13 @@ struct VMXNET3_RSSConf {
        uint8_t    indTable[VMXNET3_RSS_MAX_IND_TABLE_SIZE];
 } VMXNET3_RSSConf;
 
-typedef
-struct vmxnet3_mf_table {
+typedef struct vmxnet3_mf_table {
        void          *mfTableBase; /* Multicast addresses list */
        uint64_t      mfTablePA;    /* Physical address of the list */
        uint16_t      num_addrs;    /* number of multicast addrs */
 } vmxnet3_mf_table_t;
 
 struct vmxnet3_hw {
-
        uint8_t *hw_addr0;      /* BAR0: PT-Passthrough Regs    */
        uint8_t *hw_addr1;      /* BAR1: VD-Virtual Device Regs */
        /* BAR2: MSI-X Regs */
@@ -111,10 +110,10 @@ struct vmxnet3_hw {
        uint64_t              queueDescPA;
        uint16_t              queue_desc_len;
 
-       VMXNET3_RSSConf          *rss_conf;
-       uint64_t                         rss_confPA;
-       vmxnet3_mf_table_t   *mf_table;
-       uint32_t              shadow_vfta[VMXNET3_VFT_SIZE];
+       VMXNET3_RSSConf       *rss_conf;
+       uint64_t              rss_confPA;
+       vmxnet3_mf_table_t    *mf_table;
+       uint32_t              shadow_vfta[VMXNET3_VFT_SIZE];
 #define VMXNET3_VFT_TABLE_SIZE     (VMXNET3_VFT_SIZE * sizeof(uint32_t))
 };
 
@@ -123,16 +122,15 @@ struct vmxnet3_hw {
 
 /* Config space read/writes */
 
-#define VMXNET3_PCI_REG(reg) (*((volatile uint32_t *)(reg)))
+#define VMXNET3_PCI_REG(reg) rte_read32(reg)
 
-static inline uint32_t vmxnet3_read_addr(volatile void *addr)
+static inline uint32_t
+vmxnet3_read_addr(volatile void *addr)
 {
        return VMXNET3_PCI_REG(addr);
 }
 
-#define VMXNET3_PCI_REG_WRITE(reg, value) do { \
-       VMXNET3_PCI_REG((reg)) = (value); \
-} while(0)
+#define VMXNET3_PCI_REG_WRITE(reg, value) rte_write32((value), (reg))
 
 #define VMXNET3_PCI_BAR0_REG_ADDR(hw, reg) \
        ((volatile uint32_t *)((char *)(hw)->hw_addr0 + (reg)))
@@ -158,20 +156,22 @@ void vmxnet3_dev_rx_queue_release(void *rxq);
 void vmxnet3_dev_tx_queue_release(void *txq);
 
 int  vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
-               uint16_t nb_rx_desc, unsigned int socket_id,
-               const struct rte_eth_rxconf *rx_conf,
-               struct rte_mempool *mb_pool);
+                               uint16_t nb_rx_desc, unsigned int socket_id,
+                               const struct rte_eth_rxconf *rx_conf,
+                               struct rte_mempool *mb_pool);
 int  vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
-               uint16_t nb_tx_desc, unsigned int socket_id,
-               const struct rte_eth_txconf *tx_conf);
+                               uint16_t nb_tx_desc, unsigned int socket_id,
+                               const struct rte_eth_txconf *tx_conf);
 
 int vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev);
 
 int vmxnet3_rss_configure(struct rte_eth_dev *dev);
 
 uint16_t vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
-               uint16_t nb_pkts);
+                          uint16_t nb_pkts);
 uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
-               uint16_t nb_pkts);
+                          uint16_t nb_pkts);
+uint16_t vmxnet3_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+                       uint16_t nb_pkts);
 
 #endif /* _VMXNET3_ETHDEV_H_ */
index 69ff2de..b50d2b0 100644 (file)
@@ -96,12 +96,12 @@ vmxnet3_cmd_ring_desc_empty(struct vmxnet3_cmd_ring *ring)
 }
 
 typedef struct vmxnet3_comp_ring {
-       uint32_t               size;
-       uint32_t               next2proc;
-       uint8_t                gen;
-       uint8_t                intr_idx;
+       uint32_t               size;
+       uint32_t               next2proc;
+       uint8_t                gen;
+       uint8_t                intr_idx;
        Vmxnet3_GenericDesc    *base;
-       uint64_t               basePA;
+       uint64_t               basePA;
 } vmxnet3_comp_ring_t;
 
 struct vmxnet3_data_ring {
@@ -121,13 +121,13 @@ vmxnet3_comp_ring_adv_next2proc(struct vmxnet3_comp_ring *ring)
 }
 
 struct vmxnet3_txq_stats {
-       uint64_t        drop_total; /* # of pkts dropped by the driver,
+       uint64_t        drop_total; /* # of pkts dropped by the driver,
                                     * the counters below track droppings due to
                                     * different reasons
                                     */
-       uint64_t        drop_too_many_segs;
-       uint64_t        drop_tso;
-       uint64_t        tx_ring_full;
+       uint64_t        drop_too_many_segs;
+       uint64_t        drop_tso;
+       uint64_t        tx_ring_full;
 };
 
 typedef struct vmxnet3_tx_queue {
@@ -158,8 +158,8 @@ typedef struct vmxnet3_rx_queue {
        uint32_t                    qid1;
        uint32_t                    qid2;
        Vmxnet3_RxQueueDesc         *shared;
-       struct rte_mbuf             *start_seg;
-       struct rte_mbuf             *last_seg;
+       struct rte_mbuf             *start_seg;
+       struct rte_mbuf             *last_seg;
        struct vmxnet3_rxq_stats    stats;
        bool                        stopped;
        uint16_t                    queue_id;      /**< Device RX queue index. */
index 9deeb3f..b246884 100644 (file)
@@ -57,7 +57,6 @@
 #include <rte_lcore.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
-#include <rte_ring.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
@@ -70,6 +69,7 @@
 #include <rte_sctp.h>
 #include <rte_string_fns.h>
 #include <rte_errno.h>
+#include <rte_net.h>
 
 #include "base/vmxnet3_defs.h"
 #include "vmxnet3_ring.h"
 #include "vmxnet3_logs.h"
 #include "vmxnet3_ethdev.h"
 
+#define        VMXNET3_TX_OFFLOAD_MASK ( \
+               PKT_TX_VLAN_PKT | \
+               PKT_TX_L4_MASK |  \
+               PKT_TX_TCP_SEG)
+
+#define        VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
+       (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
+
 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
 
 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
@@ -141,10 +149,10 @@ vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
 #endif
 
 static void
-vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
+vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
 {
        while (ring->next2comp != ring->next2fill) {
-               /* No need to worry about tx desc ownership, device is quiesced by now. */
+               /* No need to worry about desc ownership, device is quiesced by now. */
                vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
 
                if (buf_info->m) {
@@ -157,21 +165,40 @@ vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
        }
 }
 
+static void
+vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
+{
+       uint32_t i;
+
+       for (i = 0; i < ring->size; i++) {
+               /* No need to worry about desc ownership, device is quiesced by now. */
+               vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
+
+               if (buf_info->m) {
+                       rte_pktmbuf_free_seg(buf_info->m);
+                       buf_info->m = NULL;
+                       buf_info->bufPA = 0;
+                       buf_info->len = 0;
+               }
+               vmxnet3_cmd_ring_adv_next2comp(ring);
+       }
+}
+
 static void
 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
 {
-       vmxnet3_cmd_ring_release_mbufs(ring);
        rte_free(ring->buf_info);
        ring->buf_info = NULL;
 }
 
-
 void
 vmxnet3_dev_tx_queue_release(void *txq)
 {
        vmxnet3_tx_queue_t *tq = txq;
 
        if (tq != NULL) {
+               /* Release mbufs */
+               vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
                /* Release the cmd_ring */
                vmxnet3_cmd_ring_release(&tq->cmd_ring);
        }
@@ -184,6 +211,10 @@ vmxnet3_dev_rx_queue_release(void *rxq)
        vmxnet3_rx_queue_t *rq = rxq;
 
        if (rq != NULL) {
+               /* Release mbufs */
+               for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
+                       vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
+
                /* Release both the cmd_rings */
                for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
                        vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
@@ -201,7 +232,7 @@ vmxnet3_dev_tx_queue_reset(void *txq)
 
        if (tq != NULL) {
                /* Release the cmd_ring mbufs */
-               vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
+               vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
        }
 
        /* Tx vmxnet rings structure initialization*/
@@ -230,7 +261,7 @@ vmxnet3_dev_rx_queue_reset(void *rxq)
        if (rq != NULL) {
                /* Release both the cmd_rings mbufs */
                for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
-                       vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
+                       vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
        }
 
        ring0 = &rq->cmd_ring[0];
@@ -327,6 +358,53 @@ vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
        PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
 }
 
+uint16_t
+vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+       uint16_t nb_pkts)
+{
+       int32_t ret;
+       uint32_t i;
+       uint64_t ol_flags;
+       struct rte_mbuf *m;
+
+       for (i = 0; i != nb_pkts; i++) {
+               m = tx_pkts[i];
+               ol_flags = m->ol_flags;
+
+               /* Non-TSO packet cannot occupy more than
+                * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
+                */
+               if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
+                               m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
+                       rte_errno = -EINVAL;
+                       return i;
+               }
+
+               /* check that only supported TX offloads are requested. */
+               if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
+                               (ol_flags & PKT_TX_L4_MASK) ==
+                               PKT_TX_SCTP_CKSUM) {
+                       rte_errno = -ENOTSUP;
+                       return i;
+               }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+               ret = rte_validate_tx_offload(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+#endif
+               ret = rte_net_intel_cksum_prepare(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+       }
+
+       return i;
+}
+
 uint16_t
 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                  uint16_t nb_pkts)
@@ -392,7 +470,8 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                        continue;
                }
 
-               if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
+               if (txm->nb_segs == 1 &&
+                   rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
                        struct Vmxnet3_TxDataDesc *tdd;
 
                        tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
@@ -414,8 +493,8 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                        gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
                        if (copy_size)
                                gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
-                                                               txq->cmd_ring.next2fill *
-                                                               sizeof(struct Vmxnet3_TxDataDesc));
+                                                                  txq->cmd_ring.next2fill *
+                                                                  sizeof(struct Vmxnet3_TxDataDesc));
                        else
                                gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
 
@@ -495,16 +574,41 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
        return nb_tx;
 }
 
+static inline void
+vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
+                  struct rte_mbuf *mbuf)
+{
+       uint32_t val = 0;
+       struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
+       struct Vmxnet3_RxDesc *rxd =
+               (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
+       vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
+
+       if (ring_id == 0)
+               val = VMXNET3_RXD_BTYPE_HEAD;
+       else
+               val = VMXNET3_RXD_BTYPE_BODY;
+
+       buf_info->m = mbuf;
+       buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
+       buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
+
+       rxd->addr = buf_info->bufPA;
+       rxd->btype = val;
+       rxd->len = buf_info->len;
+       rxd->gen = ring->gen;
+
+       vmxnet3_cmd_ring_adv_next2fill(ring);
+}
 /*
  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
  *  so that device can receive packets in those buffers.
- *     Ring layout:
- *      Among the two rings, 1st ring contains buffers of type 0 and type1.
+ *  Ring layout:
+ *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
  *      only for LRO.
- *
  */
 static int
 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
@@ -549,8 +653,7 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
                buf_info->m = mbuf;
                buf_info->len = (uint16_t)(mbuf->buf_len -
                                           RTE_PKTMBUF_HEADROOM);
-               buf_info->bufPA =
-                       rte_mbuf_data_dma_addr_default(mbuf);
+               buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
 
                /* Load Rx Descriptor with the buffer's GPA */
                rxd->addr = buf_info->bufPA;
@@ -636,9 +739,18 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        }
 
        while (rcd->gen == rxq->comp_ring.gen) {
+               struct rte_mbuf *newm;
+
                if (nb_rx >= nb_pkts)
                        break;
 
+               newm = rte_mbuf_raw_alloc(rxq->mp);
+               if (unlikely(newm == NULL)) {
+                       PMD_RX_LOG(ERR, "Error allocating mbuf");
+                       rxq->stats.rx_buf_alloc_failure++;
+                       break;
+               }
+
                idx = rcd->rxdIdx;
                ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
                rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
@@ -676,7 +788,6 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                        goto rcd_done;
                }
 
-
                /* Initialize newly received packet buffer */
                rxm->port = rxq->port_id;
                rxm->nb_segs = 1;
@@ -736,10 +847,11 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 rcd_done:
                rxq->cmd_ring[ring_idx].next2comp = idx;
-               VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
+               VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
+                                         rxq->cmd_ring[ring_idx].size);
 
-               /* It's time to allocate some new buf and renew descriptors */
-               vmxnet3_post_rx_bufs(rxq, ring_idx);
+               /* It's time to renew descriptors */
+               vmxnet3_renew_desc(rxq, ring_idx, newm);
                if (unlikely(rxq->shared->ctrl.updateRxProd)) {
                        VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
                                               rxq->cmd_ring[ring_idx].next2fill);
@@ -751,8 +863,7 @@ rcd_done:
                rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
                nb_rxd++;
                if (nb_rxd > rxq->cmd_ring[0].size) {
-                       PMD_RX_LOG(ERR,
-                                  "Used up quota of receiving packets,"
+                       PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
                                   " relinquish control.");
                        break;
                }
@@ -774,15 +885,15 @@ ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
        const struct rte_memzone *mz;
 
        snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-                       dev->driver->pci_drv.name, ring_name,
-                       dev->data->port_id, queue_id);
+                dev->driver->pci_drv.driver.name, ring_name,
+                dev->data->port_id, queue_id);
 
        mz = rte_memzone_lookup(z_name);
        if (mz)
                return mz;
 
        return rte_memzone_reserve_aligned(z_name, ring_size,
-                       socket_id, 0, VMXNET3_RING_BA_ALIGN);
+                                          socket_id, 0, VMXNET3_RING_BA_ALIGN);
 }
 
 int
@@ -790,7 +901,7 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
                           uint16_t queue_idx,
                           uint16_t nb_desc,
                           unsigned int socket_id,
-                          __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
+                          __rte_unused const struct rte_eth_txconf *tx_conf)
 {
        struct vmxnet3_hw *hw = dev->data->dev_private;
        const struct rte_memzone *mz;
@@ -808,7 +919,8 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
                return -EINVAL;
        }
 
-       txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
+       txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
+                         RTE_CACHE_LINE_SIZE);
        if (txq == NULL) {
                PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
                return -ENOMEM;
@@ -891,12 +1003,12 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
                           uint16_t queue_idx,
                           uint16_t nb_desc,
                           unsigned int socket_id,
-                          __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
+                          __rte_unused const struct rte_eth_rxconf *rx_conf,
                           struct rte_mempool *mp)
 {
        const struct rte_memzone *mz;
        struct vmxnet3_rx_queue *rxq;
-       struct vmxnet3_hw     *hw = dev->data->dev_private;
+       struct vmxnet3_hw *hw = dev->data->dev_private;
        struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
        struct vmxnet3_comp_ring *comp_ring;
        int size;
@@ -905,7 +1017,8 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
        PMD_INIT_FUNC_TRACE();
 
-       rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
+       rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
+                         RTE_CACHE_LINE_SIZE);
        if (rxq == NULL) {
                PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
                return -ENOMEM;
@@ -979,7 +1092,9 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
                ring->rid = i;
                snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
 
-               ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
+               ring->buf_info = rte_zmalloc(mem_name,
+                                            ring->size * sizeof(vmxnet3_buf_info_t),
+                                            RTE_CACHE_LINE_SIZE);
                if (ring->buf_info == NULL) {
                        PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
                        return -ENOMEM;
@@ -1013,10 +1128,15 @@ vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
                        /* Passing 0 as alloc_num will allocate full ring */
                        ret = vmxnet3_post_rx_bufs(rxq, j);
                        if (ret <= 0) {
-                               PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
+                               PMD_INIT_LOG(ERR,
+                                            "ERROR: Posting Rxq: %d buffers ring: %d",
+                                            i, j);
                                return -ret;
                        }
-                       /* Updating device with the index:next2fill to fill the mbufs for coming packets */
+                       /*
+                        * Updating device with the index:next2fill to fill the
+                        * mbufs for coming packets.
+                        */
                        if (unlikely(rxq->shared->ctrl.updateRxProd)) {
                                VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
                                                       rxq->cmd_ring[j].next2fill);
@@ -1064,7 +1184,7 @@ vmxnet3_rss_configure(struct rte_eth_dev *dev)
        dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
        /* loading hashKeySize */
        dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
-       /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
+       /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
        dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
 
        if (port_rss_conf->rss_key == NULL) {
@@ -1073,7 +1193,8 @@ vmxnet3_rss_configure(struct rte_eth_dev *dev)
        }
 
        /* loading hashKey */
-       memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
+       memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
+              dev_rss_conf->hashKeySize);
 
        /* loading indTable */
        for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
index 99f6cc8..19bc09a 100644 (file)
@@ -56,7 +56,7 @@
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_string_fns.h>
-#include <rte_dev.h>
+#include <rte_vdev.h>
 #include <cmdline_parse.h>
 #include <cmdline_parse_etheraddr.h>
 
@@ -70,8 +70,6 @@
 /* virtio_idx is increased after new device is created.*/
 static int virtio_idx = 0;
 
-static const char *drivername = "xen virtio PMD";
-
 static struct rte_eth_link pmd_link = {
                .link_speed = ETH_SPEED_NUM_10G,
                .link_duplex = ETH_LINK_FULL_DUPLEX,
@@ -331,13 +329,11 @@ eth_dev_info(struct rte_eth_dev *dev,
        struct pmd_internals *internals = dev->data->dev_private;
 
        RTE_SET_USED(internals);
-       dev_info->driver_name = drivername;
        dev_info->max_mac_addrs = 1;
        dev_info->max_rx_pktlen = (uint32_t)2048;
        dev_info->max_rx_queues = (uint16_t)1;
        dev_info->max_tx_queues = (uint16_t)1;
        dev_info->min_rx_bufsize = 0;
-       dev_info->pci_dev = NULL;
 }
 
 static void
@@ -620,6 +616,7 @@ enum dev_action {
        DEV_ATTACH
 };
 
+static struct rte_vdev_driver pmd_xenvirt_drv;
 
 static int
 eth_dev_xenvirt_create(const char *name, const char *params,
@@ -654,7 +651,7 @@ eth_dev_xenvirt_create(const char *name, const char *params,
                goto err;
 
        /* reserve an ethdev entry */
-       eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+       eth_dev = rte_eth_dev_allocate(name);
        if (eth_dev == NULL)
                goto err;
 
@@ -673,9 +670,9 @@ eth_dev_xenvirt_create(const char *name, const char *params,
        eth_dev->data = data;
        eth_dev->dev_ops = &ops;
 
-       eth_dev->data->dev_flags = RTE_PCI_DRV_DETACHABLE;
+       eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
        eth_dev->data->kdrv = RTE_KDRV_NONE;
-       eth_dev->data->drv_name = drivername;
+       eth_dev->data->drv_name = pmd_xenvirt_drv.driver.name;
        eth_dev->driver = NULL;
        eth_dev->data->numa_node = numa_node;
 
@@ -729,7 +726,7 @@ eth_dev_xenvirt_free(const char *name, const unsigned numa_node)
 
 /*TODO: Support multiple process model */
 static int
-rte_pmd_xenvirt_devinit(const char *name, const char *params)
+rte_pmd_xenvirt_probe(const char *name, const char *params)
 {
        if (virtio_idx == 0) {
                if (xenstore_init() != 0) {
@@ -746,7 +743,7 @@ rte_pmd_xenvirt_devinit(const char *name, const char *params)
 }
 
 static int
-rte_pmd_xenvirt_devuninit(const char *name)
+rte_pmd_xenvirt_remove(const char *name)
 {
        eth_dev_xenvirt_free(name, rte_socket_id());
 
@@ -759,12 +756,12 @@ rte_pmd_xenvirt_devuninit(const char *name)
        return 0;
 }
 
-static struct rte_driver pmd_xenvirt_drv = {
-       .type = PMD_VDEV,
-       .init = rte_pmd_xenvirt_devinit,
-       .uninit = rte_pmd_xenvirt_devuninit,
+static struct rte_vdev_driver pmd_xenvirt_drv = {
+       .probe = rte_pmd_xenvirt_probe,
+       .remove = rte_pmd_xenvirt_remove,
 };
 
-PMD_REGISTER_DRIVER(pmd_xenvirt_drv, eth_xenvirt);
-DRIVER_REGISTER_PARAM_STRING(eth_xenvirt,
+RTE_PMD_REGISTER_VDEV(net_xenvirt, pmd_xenvirt_drv);
+RTE_PMD_REGISTER_ALIAS(net_xenvirt, eth_xenvirt);
+RTE_PMD_REGISTER_PARAM_STRING(net_xenvirt,
        "mac=<mac addr>");
index 4995a9b..598adc6 100644 (file)
@@ -39,7 +39,6 @@ extern "C" {
 #endif
 
 #include <rte_mempool.h>
-#include <rte_ring.h>
 
 /**
  * Creates mempool for xen virtio PMD.
index 09d6784..6664a55 100644 (file)
@@ -234,6 +234,10 @@ int
 rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
        uint32_t *results, uint32_t num, uint32_t categories);
 
+int
+rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data,
+       uint32_t *results, uint32_t num, uint32_t categories);
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
index b2fc42c..a862ff6 100644 (file)
@@ -39,7 +39,9 @@
 
 #define MAX_SEARCHES_AVX16     16
 #define MAX_SEARCHES_SSE8      8
+#define MAX_SEARCHES_ALTIVEC8  8
 #define MAX_SEARCHES_SSE4      4
+#define MAX_SEARCHES_ALTIVEC4  4
 #define MAX_SEARCHES_SCALAR    2
 
 #define GET_NEXT_4BYTES(prm, idx)      \
@@ -67,10 +69,10 @@ struct acl_flow_data {
        uint32_t            trie;
        /* current trie index (0 to N-1) */
        uint32_t            cmplt_size;
+       /* maximum number of packets to process */
        uint32_t            total_packets;
-       uint32_t            categories;
        /* number of result categories per packet. */
-       /* maximum number of packets to process */
+       uint32_t            categories;
        const uint64_t     *trans;
        const uint8_t     **data;
        uint32_t           *results;
similarity index 76%
rename from src/dpdk/lib/librte_eal/bsdapp/eal/eal_log.c
rename to src/dpdk/lib/librte_acl/acl_run_altivec.c
index a425f7a..3523526 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright (C) IBM Corporation 2016.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <stdio.h>
-#include <rte_common.h>
-#include <rte_log.h>
-
-#include <eal_private.h>
-
-/*
- * set the log to default function, called during eal init process,
- * once memzones are available.
- */
-int
-rte_eal_log_init(const char *id __rte_unused, int facility __rte_unused)
-{
-       if (rte_eal_common_log_init(stderr) < 0)
-               return -1;
-       return 0;
-}
+#include "acl_run_altivec.h"
 
 int
-rte_eal_log_early_init(void)
+rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data,
+       uint32_t *results, uint32_t num, uint32_t categories)
 {
-       rte_openlog_stream(stderr);
-       return 0;
+       if (likely(num >= MAX_SEARCHES_ALTIVEC8))
+               return search_altivec_8(ctx, data, results, num, categories);
+       else if (num >= MAX_SEARCHES_ALTIVEC4)
+               return search_altivec_4(ctx, data, results, num, categories);
+       else
+               return rte_acl_classify_scalar(ctx, data, results, num,
+                       categories);
 }
diff --git a/src/dpdk/lib/librte_acl/acl_run_altivec.h b/src/dpdk/lib/librte_acl/acl_run_altivec.h
new file mode 100644 (file)
index 0000000..7d329bc
--- /dev/null
@@ -0,0 +1,329 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2016.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "acl_run.h"
+#include "acl_vect.h"
+
+struct _altivec_acl_const {
+       rte_xmm_t xmm_shuffle_input;
+       rte_xmm_t xmm_index_mask;
+       rte_xmm_t xmm_ones_16;
+       rte_xmm_t range_base;
+} altivec_acl_const  __attribute__((aligned(RTE_CACHE_LINE_SIZE))) = {
+       {
+               .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c}
+       },
+       {
+               .u32 = {RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX,
+               RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX}
+       },
+       {
+               .u16 = {1, 1, 1, 1, 1, 1, 1, 1}
+       },
+       {
+               .u32 = {0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c}
+       },
+};
+
+/*
+ * Resolve priority for multiple results (altivec version).
+ * This consists comparing the priority of the current traversal with the
+ * running set of results for the packet.
+ * For each result, keep a running array of the result (rule number) and
+ * its priority for each category.
+ */
+static inline void
+resolve_priority_altivec(uint64_t transition, int n,
+       const struct rte_acl_ctx *ctx, struct parms *parms,
+       const struct rte_acl_match_results *p, uint32_t categories)
+{
+       uint32_t x;
+       xmm_t results, priority, results1, priority1;
+       vector bool int selector;
+       xmm_t *saved_results, *saved_priority;
+
+       for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) {
+
+               saved_results = (xmm_t *)(&parms[n].cmplt->results[x]);
+               saved_priority =
+                       (xmm_t *)(&parms[n].cmplt->priority[x]);
+
+               /* get results and priorities for completed trie */
+               results = *(const xmm_t *)&p[transition].results[x];
+               priority = *(const xmm_t *)&p[transition].priority[x];
+
+               /* if this is not the first completed trie */
+               if (parms[n].cmplt->count != ctx->num_tries) {
+
+                       /* get running best results and their priorities */
+                       results1 = *saved_results;
+                       priority1 = *saved_priority;
+
+                       /* select results that are highest priority */
+                       selector = vec_cmpgt(priority1, priority);
+                       results = vec_sel(results, results1, selector);
+                       priority = vec_sel(priority, priority1,
+                               selector);
+               }
+
+               /* save running best results and their priorities */
+               *saved_results = results;
+               *saved_priority = priority;
+       }
+}
+
+/*
+ * Check for any match in 4 transitions
+ */
+static inline __attribute__((always_inline)) uint32_t
+check_any_match_x4(uint64_t val[])
+{
+       return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH;
+}
+
+static inline __attribute__((always_inline)) void
+acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
+       struct acl_flow_data *flows, uint64_t transitions[])
+{
+       while (check_any_match_x4(transitions)) {
+               transitions[0] = acl_match_check(transitions[0], slot, ctx,
+                       parms, flows, resolve_priority_altivec);
+               transitions[1] = acl_match_check(transitions[1], slot + 1, ctx,
+                       parms, flows, resolve_priority_altivec);
+               transitions[2] = acl_match_check(transitions[2], slot + 2, ctx,
+                       parms, flows, resolve_priority_altivec);
+               transitions[3] = acl_match_check(transitions[3], slot + 3, ctx,
+                       parms, flows, resolve_priority_altivec);
+       }
+}
+
+/*
+ * Process 4 transitions (in 2 XMM registers) in parallel
+ */
+static inline __attribute__((optimize("O2"))) xmm_t
+transition4(xmm_t next_input, const uint64_t *trans,
+       xmm_t *indices1, xmm_t *indices2)
+{
+       xmm_t addr, tr_lo, tr_hi;
+       xmm_t in, node_type, r, t;
+       xmm_t dfa_ofs, quad_ofs;
+       xmm_t *index_mask, *tp;
+       vector bool int dfa_msk;
+       vector signed char zeroes = {};
+       union {
+               uint64_t d64[2];
+               uint32_t d32[4];
+       } v;
+
+       /* Move low 32 into tr_lo and high 32 into tr_hi */
+       tr_lo = (xmm_t){(*indices1)[0], (*indices1)[2],
+                       (*indices2)[0], (*indices2)[2]};
+       tr_hi = (xmm_t){(*indices1)[1], (*indices1)[3],
+                       (*indices2)[1], (*indices2)[3]};
+
+        /* Calculate the address (array index) for all 4 transitions. */
+       index_mask = (xmm_t *)&altivec_acl_const.xmm_index_mask.u32;
+       t = vec_xor(*index_mask, *index_mask);
+       in = vec_perm(next_input, (xmm_t){},
+               *(vector unsigned char *)&altivec_acl_const.xmm_shuffle_input);
+
+       /* Calc node type and node addr */
+       node_type = vec_and(vec_nor(*index_mask, *index_mask), tr_lo);
+       addr = vec_and(tr_lo, *index_mask);
+
+       /* mask for DFA type(0) nodes */
+       dfa_msk = vec_cmpeq(node_type, t);
+
+       /* DFA calculations. */
+       r = vec_sr(in, (vector unsigned int){30, 30, 30, 30});
+       tp = (xmm_t *)&altivec_acl_const.range_base.u32;
+       r = vec_add(r, *tp);
+       t = vec_sr(in, (vector unsigned int){24, 24, 24, 24});
+       r = vec_perm(tr_hi, (xmm_t){(uint16_t)0 << 16},
+               (vector unsigned char)r);
+
+       dfa_ofs = vec_sub(t, r);
+
+       /* QUAD/SINGLE caluclations. */
+       t = (xmm_t)vec_cmpgt((vector signed char)in, (vector signed char)tr_hi);
+       t = (xmm_t)vec_sel(
+               vec_sel(
+                       (vector signed char)vec_sub(
+                               zeroes, (vector signed char)t),
+                       (vector signed char)t,
+                       vec_cmpgt((vector signed char)t, zeroes)),
+               zeroes,
+               vec_cmpeq((vector signed char)t, zeroes));
+
+       t = (xmm_t)vec_msum((vector signed char)t,
+               (vector unsigned char)t, (xmm_t){});
+       quad_ofs = (xmm_t)vec_msum((vector signed short)t,
+               *(vector signed short *)&altivec_acl_const.xmm_ones_16.u16,
+               (xmm_t){});
+
+       /* blend DFA and QUAD/SINGLE. */
+       t = vec_sel(quad_ofs, dfa_ofs, dfa_msk);
+
+       /* calculate address for next transitions. */
+       addr = vec_add(addr, t);
+
+       v.d64[0] = (uint64_t)trans[addr[0]];
+       v.d64[1] = (uint64_t)trans[addr[1]];
+       *indices1 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]};
+       v.d64[0] = (uint64_t)trans[addr[2]];
+       v.d64[1] = (uint64_t)trans[addr[3]];
+       *indices2 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]};
+
+       return vec_sr(next_input,
+               (vector unsigned int){CHAR_BIT, CHAR_BIT, CHAR_BIT, CHAR_BIT});
+}
+
+/*
+ * Execute trie traversal with 8 traversals in parallel
+ */
+static inline int
+search_altivec_8(const struct rte_acl_ctx *ctx, const uint8_t **data,
+       uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+       int n;
+       struct acl_flow_data flows;
+       uint64_t index_array[MAX_SEARCHES_ALTIVEC8];
+       struct completion cmplt[MAX_SEARCHES_ALTIVEC8];
+       struct parms parms[MAX_SEARCHES_ALTIVEC8];
+       xmm_t input0, input1;
+
+       acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+               total_packets, categories, ctx->trans_table);
+
+       for (n = 0; n < MAX_SEARCHES_ALTIVEC8; n++) {
+               cmplt[n].count = 0;
+               index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+       }
+
+        /* Check for any matches. */
+       acl_match_check_x4(0, ctx, parms, &flows, (uint64_t *)&index_array[0]);
+       acl_match_check_x4(4, ctx, parms, &flows, (uint64_t *)&index_array[4]);
+
+       while (flows.started > 0) {
+
+               /* Gather 4 bytes of input data for each stream. */
+               input0 = (xmm_t){GET_NEXT_4BYTES(parms, 0),
+                               GET_NEXT_4BYTES(parms, 1),
+                               GET_NEXT_4BYTES(parms, 2),
+                               GET_NEXT_4BYTES(parms, 3)};
+
+               input1 = (xmm_t){GET_NEXT_4BYTES(parms, 4),
+                               GET_NEXT_4BYTES(parms, 5),
+                               GET_NEXT_4BYTES(parms, 6),
+                               GET_NEXT_4BYTES(parms, 7)};
+
+                /* Process the 4 bytes of input on each stream. */
+
+               input0 = transition4(input0, flows.trans,
+                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+               input1 = transition4(input1, flows.trans,
+                       (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
+
+               input0 = transition4(input0, flows.trans,
+                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+               input1 = transition4(input1, flows.trans,
+                       (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
+
+               input0 = transition4(input0, flows.trans,
+                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+               input1 = transition4(input1, flows.trans,
+                       (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
+
+               input0 = transition4(input0, flows.trans,
+                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+               input1 = transition4(input1, flows.trans,
+                       (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
+
+                /* Check for any matches. */
+               acl_match_check_x4(0, ctx, parms, &flows,
+                       (uint64_t *)&index_array[0]);
+               acl_match_check_x4(4, ctx, parms, &flows,
+                       (uint64_t *)&index_array[4]);
+       }
+
+       return 0;
+}
+
+/*
+ * Execute trie traversal with 4 traversals in parallel
+ */
+static inline int
+search_altivec_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
+        uint32_t *results, int total_packets, uint32_t categories)
+{
+       int n;
+       struct acl_flow_data flows;
+       uint64_t index_array[MAX_SEARCHES_ALTIVEC4];
+       struct completion cmplt[MAX_SEARCHES_ALTIVEC4];
+       struct parms parms[MAX_SEARCHES_ALTIVEC4];
+       xmm_t input;
+
+       acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+               total_packets, categories, ctx->trans_table);
+
+       for (n = 0; n < MAX_SEARCHES_ALTIVEC4; n++) {
+               cmplt[n].count = 0;
+               index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+       }
+
+       /* Check for any matches. */
+       acl_match_check_x4(0, ctx, parms, &flows, index_array);
+
+       while (flows.started > 0) {
+
+               /* Gather 4 bytes of input data for each stream. */
+               input = (xmm_t){GET_NEXT_4BYTES(parms, 0),
+                               GET_NEXT_4BYTES(parms, 1),
+                               GET_NEXT_4BYTES(parms, 2),
+                               GET_NEXT_4BYTES(parms, 3)};
+
+               /* Process the 4 bytes of input on each stream. */
+               input = transition4(input, flows.trans,
+                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+               input = transition4(input, flows.trans,
+                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+               input = transition4(input, flows.trans,
+                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+               input = transition4(input, flows.trans,
+                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+
+               /* Check for any matches. */
+               acl_match_check_x4(0, ctx, parms, &flows, index_array);
+       }
+
+       return 0;
+}
index 4ba9786..d1f40be 100644 (file)
@@ -75,12 +75,23 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
        return -ENOTSUP;
 }
 
+int __attribute__ ((weak))
+rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx,
+       __rte_unused const uint8_t **data,
+       __rte_unused uint32_t *results,
+       __rte_unused uint32_t num,
+       __rte_unused uint32_t categories)
+{
+       return -ENOTSUP;
+}
+
 static const rte_acl_classify_t classify_fns[] = {
        [RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar,
        [RTE_ACL_CLASSIFY_SCALAR] = rte_acl_classify_scalar,
        [RTE_ACL_CLASSIFY_SSE] = rte_acl_classify_sse,
        [RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2,
        [RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon,
+       [RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec,
 };
 
 /* by default, use always available scalar code path. */
@@ -119,6 +130,8 @@ rte_acl_init(void)
 #elif defined(RTE_ARCH_ARM)
        if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
                alg =  RTE_ACL_CLASSIFY_NEON;
+#elif defined(RTE_ARCH_PPC_64)
+       alg = RTE_ACL_CLASSIFY_ALTIVEC;
 #else
 #ifdef CC_AVX2_SUPPORT
        if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
@@ -300,8 +313,7 @@ acl_check_rule(const struct rte_acl_rule_data *rd)
        if ((RTE_LEN2MASK(RTE_ACL_MAX_CATEGORIES, typeof(rd->category_mask)) &
                        rd->category_mask) == 0 ||
                        rd->priority > RTE_ACL_MAX_PRIORITY ||
-                       rd->priority < RTE_ACL_MIN_PRIORITY ||
-                       rd->userdata == RTE_ACL_INVALID_USERDATA)
+                       rd->priority < RTE_ACL_MIN_PRIORITY)
                return -EINVAL;
        return 0;
 }
index 0979a09..b53179a 100644 (file)
@@ -120,8 +120,6 @@ enum {
        RTE_ACL_MIN_PRIORITY = 0,
 };
 
-#define        RTE_ACL_INVALID_USERDATA        0
-
 #define        RTE_ACL_MASKLEN_TO_BITMASK(v, s)        \
 ((v) == 0 ? (v) : (typeof(v))((uint64_t)-1 << ((s) * CHAR_BIT - (v))))
 
@@ -144,7 +142,7 @@ struct rte_acl_rule_data {
        struct rte_acl_field field[fld_num];         \
 }
 
-RTE_ACL_RULE_DEF(rte_acl_rule, 0);
+RTE_ACL_RULE_DEF(rte_acl_rule,);
 
 #define        RTE_ACL_RULE_SZ(fld_num)        \
        (sizeof(struct rte_acl_rule) + sizeof(struct rte_acl_field) * (fld_num))
@@ -271,6 +269,7 @@ enum rte_acl_classify_alg {
        RTE_ACL_CLASSIFY_SSE = 2,     /**< requires SSE4.1 support. */
        RTE_ACL_CLASSIFY_AVX2 = 3,    /**< requires AVX2 support. */
        RTE_ACL_CLASSIFY_NEON = 4,    /**< requires NEON support. */
+       RTE_ACL_CLASSIFY_ALTIVEC = 5,    /**< requires ALTIVEC support. */
        RTE_ACL_CLASSIFY_NUM          /* should always be the last one. */
 };
 
index f649836..b40e6a1 100644 (file)
@@ -34,6 +34,8 @@
 #ifndef __INCLUDE_RTE_CFGFILE_H__
 #define __INCLUDE_RTE_CFGFILE_H__
 
+#include <stddef.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -86,7 +88,7 @@ struct rte_cfgfile *rte_cfgfile_load(const char *filename, int flags);
 * @param length
 *   Maximum section name length
 * @return
-*   0 on success, error code otherwise
+*   Number of sections
 */
 int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name,
        size_t length);
@@ -100,13 +102,13 @@ int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name,
 * @param cfg
 *   Config file
 * @param sections
-*   Array containing section names after successful invocation. Each elemen
+*   Array containing section names after successful invocation. Each element
 *   of this array should be preallocated by the user with at least
 *   CFG_NAME_LEN characters.
 * @param max_sections
 *   Maximum number of section names to be stored in sections array
 * @return
-*   0 on success, error code otherwise
+*   Number of populated sections names
 */
 int rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[],
        int max_sections);
@@ -134,12 +136,13 @@ int rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname);
 * @param sectionname
 *   Section name
 * @return
-*   Number of entries in section
+*   Number of entries in section on success, -1 otherwise
 */
 int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg,
        const char *sectionname);
 
-/** Get section entries as key-value pairs
+/**
+* Get section entries as key-value pairs
 *
 * If multiple sections have the given name this function operates on the
 * first one.
@@ -154,14 +157,15 @@ int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg,
 * @param max_entries
 *   Maximum number of section entries to be stored in entries array
 * @return
-*   0 on success, error code otherwise
+*   Number of entries populated on success, -1 otherwise
 */
 int rte_cfgfile_section_entries(struct rte_cfgfile *cfg,
        const char *sectionname,
        struct rte_cfgfile_entry *entries,
        int max_entries);
 
-/** Get section entries as key-value pairs
+/**
+* Get section entries as key-value pairs
 *
 * The index of a section is the same as the index of its name in the
 * result of rte_cfgfile_sections. This API can be used when there are
@@ -180,7 +184,7 @@ int rte_cfgfile_section_entries(struct rte_cfgfile *cfg,
 * @param max_entries
 *   Maximum number of section entries to be stored in entries array
 * @return
-*   Number of entries populated on success, negative error code otherwise
+*   Number of entries populated on success, -1 otherwise
 */
 int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg,
        int index,
@@ -188,7 +192,8 @@ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg,
        struct rte_cfgfile_entry *entries,
        int max_entries);
 
-/** Get value of the named entry in named config file section
+/**
+* Get value of the named entry in named config file section
 *
 * If multiple sections have the given name this function operates on the
 * first one.
@@ -200,13 +205,14 @@ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg,
 * @param entryname
 *   Entry name
 * @return
-*   Entry value
+*   Entry value on success, NULL otherwise
 */
 const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg,
        const char *sectionname,
        const char *entryname);
 
-/** Check if given entry exists in named config file section
+/**
+* Check if given entry exists in named config file section
 *
 * If multiple sections have the given name this function operates on the
 * first one.
@@ -223,12 +229,13 @@ const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg,
 int rte_cfgfile_has_entry(struct rte_cfgfile *cfg, const char *sectionname,
        const char *entryname);
 
-/** Close config file
+/**
+* Close config file
 *
 * @param cfg
 *   Config file
 * @return
-*   0 on success, error code otherwise
+*   0 on success, -1 otherwise
 */
 int rte_cfgfile_close(struct rte_cfgfile *cfg);
 
index c6ca3b9..da971de 100644 (file)
@@ -216,15 +216,19 @@ static int
 contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
                struct vm_object **obj, int nprot)
 {
+       uint64_t buffer_index;
+
        /*
         * The buffer index is encoded in the offset.  Divide the offset by
         *  PAGE_SIZE to get the index of the buffer requested by the user
         *  app.
         */
-       if ((*offset/PAGE_SIZE) >= contigmem_num_buffers)
+       buffer_index = *offset / PAGE_SIZE;
+       if (buffer_index >= contigmem_num_buffers)
                return EINVAL;
 
-       *offset = (vm_ooffset_t)vtophys(contigmem_buffers[*offset/PAGE_SIZE]);
+       memset(contigmem_buffers[buffer_index], 0, contigmem_buffer_size);
+       *offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index]);
        *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset,
                        curthread->td_ucred);
 
index a0c8f8c..ee7c9de 100644 (file)
@@ -64,6 +64,7 @@
 #include <rte_string_fns.h>
 #include <rte_cpuflags.h>
 #include <rte_interrupts.h>
+#include <rte_bus.h>
 #include <rte_pci.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
@@ -496,14 +497,14 @@ rte_eal_init(int argc, char **argv)
        char cpuset[RTE_CPU_AFFINITY_STR_LEN];
        char thread_name[RTE_MAX_THREAD_NAME_LEN];
 
+       /* checks if the machine is adequate */
+       rte_cpu_check_supported();
+
        if (!rte_atomic32_test_and_set(&run_once))
                return -1;
 
        thread_id = pthread_self();
 
-       if (rte_eal_log_early_init() < 0)
-               rte_panic("Cannot init early logs\n");
-
        eal_log_level_parse(argc, argv);
 
        /* set log level as early as possible */
@@ -552,9 +553,6 @@ rte_eal_init(int argc, char **argv)
        if (rte_eal_tailqs_init() < 0)
                rte_panic("Cannot init tail queues for objects\n");
 
-/*     if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0)
-               rte_panic("Cannot init logs\n");*/
-
        if (rte_eal_alarm_init() < 0)
                rte_panic("Cannot init interrupt-handling thread\n");
 
@@ -580,8 +578,8 @@ rte_eal_init(int argc, char **argv)
                rte_config.master_lcore, thread_id, cpuset,
                ret == 0 ? "" : "...");
 
-       if (rte_eal_dev_init() < 0)
-               rte_panic("Cannot init pmd devices\n");
+       if (rte_bus_scan())
+               rte_panic("Cannot scan the buses for devices\n");
 
        RTE_LCORE_FOREACH_SLAVE(i) {
 
@@ -615,10 +613,17 @@ rte_eal_init(int argc, char **argv)
        rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
        rte_eal_mp_wait_lcore();
 
+       /* Probe all the buses and devices/drivers on them */
+       if (rte_bus_probe())
+               rte_panic("Cannot probe devices\n");
+
        /* Probe & Initialize PCI devices */
        if (rte_eal_pci_probe())
                rte_panic("Cannot probe PCI\n");
 
+       if (rte_eal_dev_init() < 0)
+               rte_panic("Cannot init pmd devices\n");
+
        rte_eal_mcfg_complete();
 
        return fctret;
index 836e483..ea2afff 100644 (file)
 #include "eal_private.h"
 
 int
-rte_intr_callback_register(struct rte_intr_handle *intr_handle __rte_unused,
-                       rte_intr_callback_fn cb __rte_unused,
-                       void *cb_arg __rte_unused)
+rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
+                       rte_intr_callback_fn cb,
+                       void *cb_arg)
 {
+       RTE_SET_USED(intr_handle);
+       RTE_SET_USED(cb);
+       RTE_SET_USED(cb_arg);
+
        return -ENOTSUP;
 }
 
 int
-rte_intr_callback_unregister(struct rte_intr_handle *intr_handle __rte_unused,
-                       rte_intr_callback_fn cb_fn __rte_unused,
-                       void *cb_arg __rte_unused)
+rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
+                       rte_intr_callback_fn cb,
+                       void *cb_arg)
 {
+       RTE_SET_USED(intr_handle);
+       RTE_SET_USED(cb);
+       RTE_SET_USED(cb_arg);
+
        return -ENOTSUP;
 }
 
 int
-rte_intr_enable(struct rte_intr_handle *intr_handle __rte_unused)
+rte_intr_enable(const struct rte_intr_handle *intr_handle __rte_unused)
 {
        return -ENOTSUP;
 }
 
 int
-rte_intr_disable(struct rte_intr_handle *intr_handle __rte_unused)
+rte_intr_disable(const struct rte_intr_handle *intr_handle __rte_unused)
 {
        return -ENOTSUP;
 }
index 374b68f..3a5c315 100644 (file)
  * enabling bus master.
  */
 
-/* unbind kernel driver for this device */
-int
-pci_unbind_kernel_driver(struct rte_pci_device *dev __rte_unused)
-{
-       RTE_LOG(ERR, EAL, "RTE_PCI_DRV_FORCE_UNBIND flag is not implemented "
-               "for BSD\n");
-       return -ENOTSUP;
-}
-
 /* Map pci device */
 int
 rte_eal_pci_map_device(struct rte_pci_device *dev)
@@ -287,7 +278,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
        dev->max_vfs = 0;
 
        /* FreeBSD has no NUMA support (yet) */
-       dev->numa_node = 0;
+       dev->device.numa_node = 0;
 
        /* FreeBSD has only one pass through driver */
        dev->kdrv = RTE_KDRV_NIC_UIO;
@@ -406,6 +397,55 @@ error:
        return -1;
 }
 
+int
+pci_update_device(const struct rte_pci_addr *addr)
+{
+       int fd;
+       struct pci_conf matches[2];
+       struct pci_match_conf match = {
+               .pc_sel = {
+                       .pc_domain = addr->domain,
+                       .pc_bus = addr->bus,
+                       .pc_dev = addr->devid,
+                       .pc_func = addr->function,
+               },
+       };
+       struct pci_conf_io conf_io = {
+               .pat_buf_len = 0,
+               .num_patterns = 1,
+               .patterns = &match,
+               .match_buf_len = sizeof(matches),
+               .matches = &matches[0],
+       };
+
+       fd = open("/dev/pci", O_RDONLY);
+       if (fd < 0) {
+               RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
+               goto error;
+       }
+
+       if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) {
+               RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n",
+                               __func__, strerror(errno));
+               goto error;
+       }
+
+       if (conf_io.num_matches != 1)
+               goto error;
+
+       if (pci_scan_one(fd, &matches[0]) < 0)
+               goto error;
+
+       close(fd);
+
+       return 0;
+
+error:
+       if (fd >= 0)
+               close(fd);
+       return -1;
+}
+
 /* Read PCI config space. */
 int rte_eal_pci_read_config(const struct rte_pci_device *dev,
                            void *buf, size_t len, off_t offset)
@@ -623,9 +663,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
 int
 rte_eal_pci_init(void)
 {
-       TAILQ_INIT(&pci_driver_list);
-       TAILQ_INIT(&pci_device_list);
-
        /* for debug purposes, PCI can be disabled */
        if (internal_config.no_pci)
                return 0;
diff --git a/src/dpdk/lib/librte_eal/common/eal_common_bus.c b/src/dpdk/lib/librte_eal/common/eal_common_bus.c
new file mode 100644 (file)
index 0000000..4638e78
--- /dev/null
@@ -0,0 +1,133 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 NXP
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of NXP nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_bus.h>
+
+#include "eal_private.h"
+
+struct rte_bus_list rte_bus_list =
+       TAILQ_HEAD_INITIALIZER(rte_bus_list);
+
+void
+rte_bus_register(struct rte_bus *bus)
+{
+       RTE_VERIFY(bus);
+       RTE_VERIFY(bus->name && strlen(bus->name));
+       /* A bus should mandatorily have the scan implemented */
+       RTE_VERIFY(bus->scan);
+       RTE_VERIFY(bus->probe);
+
+       TAILQ_INSERT_TAIL(&rte_bus_list, bus, next);
+       RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name);
+}
+
+void
+rte_bus_unregister(struct rte_bus *bus)
+{
+       TAILQ_REMOVE(&rte_bus_list, bus, next);
+       RTE_LOG(DEBUG, EAL, "Unregistered [%s] bus.\n", bus->name);
+}
+
+/* Scan all the buses for registered devices */
+int
+rte_bus_scan(void)
+{
+       int ret;
+       struct rte_bus *bus = NULL;
+
+       TAILQ_FOREACH(bus, &rte_bus_list, next) {
+               ret = bus->scan();
+               if (ret) {
+                       RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n",
+                               bus->name);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+/* Probe all devices of all buses */
+int
+rte_bus_probe(void)
+{
+       int ret;
+       struct rte_bus *bus;
+
+       TAILQ_FOREACH(bus, &rte_bus_list, next) {
+               ret = bus->probe();
+               if (ret) {
+                       RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n",
+                               bus->name);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+/* Dump information of a single bus */
+static int
+bus_dump_one(FILE *f, struct rte_bus *bus)
+{
+       int ret;
+
+       /* For now, dump only the bus name */
+       ret = fprintf(f, " %s\n", bus->name);
+
+       /* Error in case of inability in writing to stream */
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+void
+rte_bus_dump(FILE *f)
+{
+       int ret;
+       struct rte_bus *bus;
+
+       TAILQ_FOREACH(bus, &rte_bus_list, next) {
+               ret = bus_dump_one(f, bus);
+               if (ret) {
+                       RTE_LOG(ERR, EAL, "Unable to write to stream (%d)\n",
+                               ret);
+                       break;
+               }
+       }
+}
index ecb1240..b5f76f7 100644 (file)
 /**
  * Checks if the machine is adequate for running the binary. If it is not, the
  * program exits with status 1.
- * The function attribute forces this function to be called before main(). But
- * with ICC, the check is generated by the compiler.
  */
-#ifndef __INTEL_COMPILER
-void __attribute__ ((__constructor__))
-#else
 void
-#endif
 rte_cpu_check_supported(void)
 {
        /* This is generated at compile-time by the build system */
index a8a4146..4f3b493 100644 (file)
@@ -48,6 +48,9 @@
 /** Global list of device drivers. */
 static struct rte_driver_list dev_driver_list =
        TAILQ_HEAD_INITIALIZER(dev_driver_list);
+/** Global list of device drivers. */
+static struct rte_device_list dev_device_list =
+       TAILQ_HEAD_INITIALIZER(dev_device_list);
 
 /* register a driver */
 void
@@ -63,42 +66,25 @@ rte_eal_driver_unregister(struct rte_driver *driver)
        TAILQ_REMOVE(&dev_driver_list, driver, next);
 }
 
-int
-rte_eal_vdev_init(const char *name, const char *args)
+void rte_eal_device_insert(struct rte_device *dev)
 {
-       struct rte_driver *driver;
-
-       if (name == NULL)
-               return -EINVAL;
-
-       TAILQ_FOREACH(driver, &dev_driver_list, next) {
-               if (driver->type != PMD_VDEV)
-                       continue;
-
-               /*
-                * search a driver prefix in virtual device name.
-                * For example, if the driver is pcap PMD, driver->name
-                * will be "eth_pcap", but "name" will be "eth_pcapN".
-                * So use strncmp to compare.
-                */
-               if (!strncmp(driver->name, name, strlen(driver->name)))
-                       return driver->init(name, args);
-       }
+       TAILQ_INSERT_TAIL(&dev_device_list, dev, next);
+}
 
-       RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
-       return -EINVAL;
+void rte_eal_device_remove(struct rte_device *dev)
+{
+       TAILQ_REMOVE(&dev_device_list, dev, next);
 }
 
 int
 rte_eal_dev_init(void)
 {
        struct rte_devargs *devargs;
-       struct rte_driver *driver;
 
        /*
         * Note that the dev_driver_list is populated here
         * from calls made to rte_eal_driver_register from constructor functions
-        * embedded into PMD modules via the PMD_REGISTER_DRIVER macro
+        * embedded into PMD modules via the RTE_PMD_REGISTER_VDEV macro
         */
 
        /* call the init function for each virtual device */
@@ -115,38 +101,53 @@ rte_eal_dev_init(void)
                }
        }
 
-       /* Once the vdevs are initalized, start calling all the pdev drivers */
-       TAILQ_FOREACH(driver, &dev_driver_list, next) {
-               if (driver->type != PMD_PDEV)
-                       continue;
-               /* PDEV drivers don't get passed any parameters */
-               driver->init(NULL, NULL);
-       }
        return 0;
 }
 
-int
-rte_eal_vdev_uninit(const char *name)
+int rte_eal_dev_attach(const char *name, const char *devargs)
 {
-       struct rte_driver *driver;
+       struct rte_pci_addr addr;
 
-       if (name == NULL)
+       if (name == NULL || devargs == NULL) {
+               RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n");
                return -EINVAL;
+       }
 
-       TAILQ_FOREACH(driver, &dev_driver_list, next) {
-               if (driver->type != PMD_VDEV)
-                       continue;
+       if (eal_parse_pci_DomBDF(name, &addr) == 0) {
+               if (rte_eal_pci_probe_one(&addr) < 0)
+                       goto err;
+
+       } else {
+               if (rte_eal_vdev_init(name, devargs))
+                       goto err;
+       }
+
+       return 0;
+
+err:
+       RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", name);
+       return -EINVAL;
+}
+
+int rte_eal_dev_detach(const char *name)
+{
+       struct rte_pci_addr addr;
 
-               /*
-                * search a driver prefix in virtual device name.
-                * For example, if the driver is pcap PMD, driver->name
-                * will be "eth_pcap", but "name" will be "eth_pcapN".
-                * So use strncmp to compare.
-                */
-               if (!strncmp(driver->name, name, strlen(driver->name)))
-                       return driver->uninit(name);
+       if (name == NULL) {
+               RTE_LOG(ERR, EAL, "Invalid device provided.\n");
+               return -EINVAL;
        }
 
-       RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+       if (eal_parse_pci_DomBDF(name, &addr) == 0) {
+               if (rte_eal_pci_detach(&addr) < 0)
+                       goto err;
+       } else {
+               if (rte_eal_vdev_uninit(name))
+                       goto err;
+       }
+       return 0;
+
+err:
+       RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", name);
        return -EINVAL;
 }
index e403717..ffa8ad9 100644 (file)
@@ -72,6 +72,7 @@ rte_eal_parse_devargs_str(const char *devargs_str,
 
        if (*drvargs == NULL) {
                free(*drvname);
+               *drvname = NULL;
                return -1;
        }
        return 0;
index 7916c78..2197558 100644 (file)
@@ -48,11 +48,12 @@ struct rte_logs rte_logs = {
        .file = NULL,
 };
 
+/* Stream to use for logging if rte_logs.file is NULL */
 static FILE *default_log_stream;
 
 /**
  * This global structure stores some informations about the message
- * that is currently beeing processed by one lcore
+ * that is currently being processed by one lcore
  */
 struct log_cur_msg {
        uint32_t loglevel; /**< log level - see rte_log.h */
@@ -64,27 +65,11 @@ static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg);
 
 /* default logs */
 
-int
-rte_log_add_in_history(const char *buf __rte_unused, size_t size __rte_unused)
-{
-       return 0;
-}
-
-void
-rte_log_set_history(int enable)
-{
-       if (enable)
-               RTE_LOG(WARNING, EAL, "The log history is deprecated.\n");
-}
-
 /* Change the stream that will be used by logging system */
 int
 rte_openlog_stream(FILE *f)
 {
-       if (f == NULL)
-               rte_logs.file = default_log_stream;
-       else
-               rte_logs.file = f;
+       rte_logs.file = f;
        return 0;
 }
 
@@ -131,12 +116,6 @@ int rte_log_cur_msg_logtype(void)
        return RTE_PER_LCORE(log_cur_msg).logtype;
 }
 
-/* Dump log history to file */
-void
-rte_log_dump_history(FILE *out __rte_unused)
-{
-}
-
 /*
  * Generates a log message The message will be sent in the stream
  * defined by the previous call to rte_openlog_stream().
@@ -146,6 +125,19 @@ rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
 {
        int ret;
        FILE *f = rte_logs.file;
+       if (f == NULL) {
+               f = default_log_stream;
+               if (f == NULL) {
+                       /*
+                        * Grab the current value of stderr here, rather than
+                        * just initializing default_log_stream to stderr. This
+                        * ensures that we will always use the current value
+                        * of stderr, even if the application closes and
+                        * reopens it.
+                        */
+                       f = stderr;
+               }
+       }
 
        if ((level > rte_logs.level) || !(logtype & rte_logs.type))
                return 0;
@@ -177,17 +169,15 @@ rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
 }
 
 /*
- * called by environment-specific log init function
+ * Called by environment-specific initialization functions.
  */
-int
-rte_eal_common_log_init(FILE *default_log)
+void
+eal_log_set_default(FILE *default_log)
 {
        default_log_stream = default_log;
-       rte_openlog_stream(default_log);
 
-#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG
-       RTE_LOG(NOTICE, EAL, "Debug logs available - lower performance\n");
+#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
+       RTE_LOG(NOTICE, EAL,
+               "Debug dataplane logs available - lower performance\n");
 #endif
-
-       return 0;
 }
index 1bd0a33..64f4e0a 100644 (file)
@@ -337,19 +337,7 @@ rte_memzone_free(const struct rte_memzone *mz)
        idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
        idx = idx / sizeof(struct rte_memzone);
 
-#ifdef RTE_LIBRTE_IVSHMEM
-       /*
-        * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot
-        * free it.
-        */
-       if (mcfg->memzone[idx].ioremap_addr != 0) {
-               rte_rwlock_write_unlock(&mcfg->mlock);
-               return -EINVAL;
-       }
-#endif
-
        addr = mcfg->memzone[idx].addr;
-
        if (addr == NULL)
                ret = -EINVAL;
        else if (mcfg->memzone_cnt == 0) {
index 1a1bab3..f36bc55 100644 (file)
@@ -118,7 +118,7 @@ static const char *default_solib_dir = RTE_EAL_PMD_PATH;
 /*
  * Stringified version of solib path used by dpdk-pmdinfo.py
  * Note: PLEASE DO NOT ALTER THIS without making a corresponding
- * change to tools/dpdk-pmdinfo.py
+ * change to usertools/dpdk-pmdinfo.py
  */
 static const char dpdk_solib_path[] __attribute__((used)) =
 "DPDK_PLUGIN_PATH=" RTE_EAL_PMD_PATH;
@@ -126,6 +126,7 @@ static const char dpdk_solib_path[] __attribute__((used)) =
 
 static int master_lcore_parsed;
 static int mem_parsed;
+static int core_parsed;
 
 void
 eal_reset_internal_config(struct internal_config *internal_cfg)
@@ -797,6 +798,7 @@ eal_parse_common_option(int opt, const char *optarg,
                        RTE_LOG(ERR, EAL, "invalid coremask\n");
                        return -1;
                }
+               core_parsed = 1;
                break;
        /* corelist */
        case 'l':
@@ -804,6 +806,7 @@ eal_parse_common_option(int opt, const char *optarg,
                        RTE_LOG(ERR, EAL, "invalid core list\n");
                        return -1;
                }
+               core_parsed = 1;
                break;
        /* size of memory */
        case 'm':
@@ -912,6 +915,7 @@ eal_parse_common_option(int opt, const char *optarg,
                                OPT_LCORES "\n");
                        return -1;
                }
+               core_parsed = 1;
                break;
 
        /* don't know what to do, leave this to caller */
@@ -923,12 +927,38 @@ eal_parse_common_option(int opt, const char *optarg,
        return 0;
 }
 
+static void
+eal_auto_detect_cores(struct rte_config *cfg)
+{
+       unsigned int lcore_id;
+       unsigned int removed = 0;
+       rte_cpuset_t affinity_set;
+       pthread_t tid = pthread_self();
+
+       if (pthread_getaffinity_np(tid, sizeof(rte_cpuset_t),
+                               &affinity_set) < 0)
+               CPU_ZERO(&affinity_set);
+
+       for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+               if (cfg->lcore_role[lcore_id] == ROLE_RTE &&
+                   !CPU_ISSET(lcore_id, &affinity_set)) {
+                       cfg->lcore_role[lcore_id] = ROLE_OFF;
+                       removed++;
+               }
+       }
+
+       cfg->lcore_count -= removed;
+}
+
 int
 eal_adjust_config(struct internal_config *internal_cfg)
 {
        int i;
        struct rte_config *cfg = rte_eal_get_configuration();
 
+       if (!core_parsed)
+               eal_auto_detect_cores(cfg);
+
        if (internal_config.process_type == RTE_PROC_AUTO)
                internal_config.process_type = eal_proc_type_detect();
 
@@ -1021,7 +1051,7 @@ eal_common_usage(void)
               "                      [NOTE: PCI whitelist cannot be used with -b option]\n"
               "  --"OPT_VDEV"              Add a virtual device.\n"
               "                      The argument format is <driver><id>[,key=val,...]\n"
-              "                      (ex: --vdev=eth_pcap0,iface=eth2).\n"
+              "                      (ex: --vdev=net_pcap0,iface=eth2).\n"
               "  -d LIB.so|DIR       Add a driver or driver directory\n"
               "                      (can be used multiple times)\n"
               "  --"OPT_VMWARE_TSC_MAP"    Use VMware TSC map instead of native RDTSC\n"
index 7248c38..72547bd 100644 (file)
 
 #include "eal_private.h"
 
-struct pci_driver_list pci_driver_list;
-struct pci_device_list pci_device_list;
+struct pci_driver_list pci_driver_list =
+       TAILQ_HEAD_INITIALIZER(pci_driver_list);
+struct pci_device_list pci_device_list =
+       TAILQ_HEAD_INITIALIZER(pci_device_list);
 
 #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
 
@@ -151,7 +153,7 @@ pci_unmap_resource(void *requested_addr, size_t size)
 }
 
 /*
- * If vendor/device ID match, call the devinit() function of the
+ * If vendor/device ID match, call the probe() function of the
  * driver.
  */
 static int
@@ -183,42 +185,45 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d
 
                RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
                                loc->domain, loc->bus, loc->devid, loc->function,
-                               dev->numa_node);
+                               dev->device.numa_node);
 
                /* no initialization when blacklisted, return without error */
-               if (dev->devargs != NULL &&
-                       dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) {
+               if (dev->device.devargs != NULL &&
+                       dev->device.devargs->type ==
+                               RTE_DEVTYPE_BLACKLISTED_PCI) {
                        RTE_LOG(INFO, EAL, "  Device is blacklisted, not initializing\n");
                        return 1;
                }
 
                RTE_LOG(INFO, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
-                               dev->id.device_id, dr->name);
+                               dev->id.device_id, dr->driver.name);
 
                if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
                        /* map resources for devices that use igb_uio */
                        ret = rte_eal_pci_map_device(dev);
                        if (ret != 0)
                                return ret;
-               } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND &&
-                               rte_eal_process_type() == RTE_PROC_PRIMARY) {
-                       /* unbind current driver */
-                       if (pci_unbind_kernel_driver(dev) < 0)
-                               return -1;
                }
 
                /* reference driver structure */
                dev->driver = dr;
 
-               /* call the driver devinit() function */
-               return dr->devinit(dr, dev);
+               /* call the driver probe() function */
+               ret = dr->probe(dr, dev);
+               if (ret) {
+                       dev->driver = NULL;
+                       if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)
+                               rte_eal_pci_unmap_device(dev);
+               }
+
+               return ret;
        }
        /* return positive value if driver doesn't support this device */
        return 1;
 }
 
 /*
- * If vendor/device ID match, call the devuninit() function of the
+ * If vendor/device ID match, call the remove() function of the
  * driver.
  */
 static int
@@ -250,12 +255,12 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
 
                RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
                                loc->domain, loc->bus, loc->devid,
-                               loc->function, dev->numa_node);
+                               loc->function, dev->device.numa_node);
 
                RTE_LOG(DEBUG, EAL, "  remove driver: %x:%x %s\n", dev->id.vendor_id,
-                               dev->id.device_id, dr->name);
+                               dev->id.device_id, dr->driver.name);
 
-               if (dr->devuninit && (dr->devuninit(dev) < 0))
+               if (dr->remove && (dr->remove(dev) < 0))
                        return -1;      /* negative value is an error */
 
                /* clear driver structure */
@@ -273,7 +278,7 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
 }
 
 /*
- * If vendor/device ID match, call the devinit() function of all
+ * If vendor/device ID match, call the probe() function of all
  * registered driver for the given device. Return -1 if initialization
  * failed, return 1 if no driver is found for this device.
  */
@@ -286,6 +291,10 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
        if (dev == NULL)
                return -1;
 
+       /* Check if a driver is already loaded */
+       if (dev->driver != NULL)
+               return 0;
+
        TAILQ_FOREACH(dr, &pci_driver_list, next) {
                rc = rte_eal_pci_probe_one_driver(dr, dev);
                if (rc < 0)
@@ -300,7 +309,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
 }
 
 /*
- * If vendor/device ID match, call the devuninit() function of all
+ * If vendor/device ID match, call the remove() function of all
  * registered driver for the given device. Return -1 if initialization
  * failed, return 1 if no driver is found for this device.
  */
@@ -339,21 +348,27 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr)
        if (addr == NULL)
                return -1;
 
+       /* update current pci device in global list, kernel bindings might have
+        * changed since last time we looked at it.
+        */
+       if (pci_update_device(addr) < 0)
+               goto err_return;
+
        TAILQ_FOREACH(dev, &pci_device_list, next) {
                if (rte_eal_compare_pci_addr(&dev->addr, addr))
                        continue;
 
                ret = pci_probe_all_drivers(dev);
-               if (ret < 0)
+               if (ret)
                        goto err_return;
                return 0;
        }
        return -1;
 
 err_return:
-       RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT
-                       " cannot be used\n", dev->addr.domain, dev->addr.bus,
-                       dev->addr.devid, dev->addr.function);
+       RTE_LOG(WARNING, EAL,
+               "Requested device " PCI_PRI_FMT " cannot be used\n",
+               addr->domain, addr->bus, addr->devid, addr->function);
        return -1;
 }
 
@@ -378,6 +393,7 @@ rte_eal_pci_detach(const struct rte_pci_addr *addr)
                        goto err_return;
 
                TAILQ_REMOVE(&pci_device_list, dev, next);
+               free(dev);
                return 0;
        }
        return -1;
@@ -390,7 +406,7 @@ err_return:
 }
 
 /*
- * Scan the content of the PCI bus, and call the devinit() function for
+ * Scan the content of the PCI bus, and call the probe() function for
  * all registered drivers that have a matching entry in its id_table
  * for discovered devices.
  */
@@ -410,7 +426,7 @@ rte_eal_pci_probe(void)
                /* set devargs in PCI structure */
                devargs = pci_devargs_lookup(dev);
                if (devargs != NULL)
-                       dev->devargs = devargs;
+                       dev->device.devargs = devargs;
 
                /* probe all or only whitelisted devices */
                if (probe_all)
@@ -463,11 +479,13 @@ void
 rte_eal_pci_register(struct rte_pci_driver *driver)
 {
        TAILQ_INSERT_TAIL(&pci_driver_list, driver, next);
+       rte_eal_driver_register(&driver->driver);
 }
 
 /* unregister a driver */
 void
 rte_eal_pci_unregister(struct rte_pci_driver *driver)
 {
+       rte_eal_driver_unregister(&driver->driver);
        TAILQ_REMOVE(&pci_driver_list, driver, next);
 }
index c4227cd..7265617 100644 (file)
 /* The frequency of the RDTSC timer resolution */
 static uint64_t eal_tsc_resolution_hz;
 
+/* Pointer to user delay function */
+void (*rte_delay_us)(unsigned int) = NULL;
+
 void
-rte_delay_us(unsigned us)
+rte_delay_us_block(unsigned int us)
 {
        const uint64_t start = rte_get_timer_cycles();
        const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6;
@@ -84,3 +87,15 @@ set_tsc_freq(void)
        RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000);
        eal_tsc_resolution_hz = freq;
 }
+
+void rte_delay_us_callback_register(void (*userfunc)(unsigned int))
+{
+       rte_delay_us = userfunc;
+}
+
+static void __attribute__((constructor))
+rte_timer_init(void)
+{
+       /* set rte_delay_us_block as a delay function */
+       rte_delay_us_callback_register(rte_delay_us_block);
+}
diff --git a/src/dpdk/lib/librte_eal/common/eal_common_vdev.c b/src/dpdk/lib/librte_eal/common/eal_common_vdev.c
new file mode 100644 (file)
index 0000000..7d6e54f
--- /dev/null
@@ -0,0 +1,124 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+#include <rte_vdev.h>
+#include <rte_common.h>
+
+struct vdev_driver_list vdev_driver_list =
+       TAILQ_HEAD_INITIALIZER(vdev_driver_list);
+
+/* register a driver */
+void
+rte_eal_vdrv_register(struct rte_vdev_driver *driver)
+{
+       TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next);
+       rte_eal_driver_register(&driver->driver);
+}
+
+/* unregister a driver */
+void
+rte_eal_vdrv_unregister(struct rte_vdev_driver *driver)
+{
+       rte_eal_driver_unregister(&driver->driver);
+       TAILQ_REMOVE(&vdev_driver_list, driver, next);
+}
+
+int
+rte_eal_vdev_init(const char *name, const char *args)
+{
+       struct rte_vdev_driver *driver;
+
+       if (name == NULL)
+               return -EINVAL;
+
+       TAILQ_FOREACH(driver, &vdev_driver_list, next) {
+               /*
+                * search a driver prefix in virtual device name.
+                * For example, if the driver is pcap PMD, driver->name
+                * will be "net_pcap", but "name" will be "net_pcapN".
+                * So use strncmp to compare.
+                */
+               if (!strncmp(driver->driver.name, name,
+                           strlen(driver->driver.name)))
+                       return driver->probe(name, args);
+       }
+
+       /* Give new names precedence over aliases. */
+       TAILQ_FOREACH(driver, &vdev_driver_list, next) {
+               if (driver->driver.alias &&
+                   !strncmp(driver->driver.alias, name,
+                           strlen(driver->driver.alias)))
+                       return driver->probe(name, args);
+       }
+
+       RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+       return -EINVAL;
+}
+
+int
+rte_eal_vdev_uninit(const char *name)
+{
+       struct rte_vdev_driver *driver;
+
+       if (name == NULL)
+               return -EINVAL;
+
+       TAILQ_FOREACH(driver, &vdev_driver_list, next) {
+               /*
+                * search a driver prefix in virtual device name.
+                * For example, if the driver is pcap PMD, driver->name
+                * will be "net_pcap", but "name" will be "net_pcapN".
+                * So use strncmp to compare.
+                */
+               if (!strncmp(driver->driver.name, name,
+                            strlen(driver->driver.name)))
+                       return driver->remove(name);
+       }
+
+       /* Give new names precedence over aliases. */
+       TAILQ_FOREACH(driver, &vdev_driver_list, next) {
+               if (driver->driver.alias &&
+                   !strncmp(driver->driver.alias, name,
+                           strlen(driver->driver.alias)))
+                       return driver->remove(name);
+       }
+
+       RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+       return -EINVAL;
+}
index fdb4a70..8acbd99 100644 (file)
@@ -97,17 +97,6 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id
        return buffer;
 }
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-static inline const char *
-eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
-{
-       snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir,
-                       internal_config.hugefile_prefix, f_id);
-       buffer[buflen - 1] = '\0';
-       return buffer;
-}
-#endif
-
 /** define the default filename prefix for the %s values above */
 #define HUGEFILE_PREFIX_DEFAULT "rte"
 
index 38edac0..68369f2 100644 (file)
@@ -52,9 +52,6 @@ struct hugepage_file {
        int socket_id;      /**< NUMA socket ID */
        int file_id;        /**< the '%d' in HUGEFILE_FMT */
        int memseg_id;      /**< the memory segment to which page belongs */
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-       int repeated;           /**< number of times the page size is repeated */
-#endif
        char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
 };
 
index 857dc3e..9e7d8f6 100644 (file)
@@ -47,7 +47,9 @@
 int rte_eal_memzone_init(void);
 
 /**
- * Common log initialization function (private to eal).
+ * Common log initialization function (private to eal).  Determines
+ * where log data is written when no call to rte_openlog_stream is
+ * in effect.
  *
  * @param default_log
  *   The default log stream to be used.
@@ -55,7 +57,7 @@ int rte_eal_memzone_init(void);
  *   - 0 on success
  *   - Negative on error
  */
-int rte_eal_common_log_init(FILE *default_log);
+void eal_log_set_default(FILE *default_log);
 
 /**
  * Fill configuration with number of physical and logical processors
@@ -96,16 +98,6 @@ int rte_eal_memory_init(void);
  */
 int rte_eal_timer_init(void);
 
-/**
- * Init early logs
- *
- * This function is private to EAL.
- *
- * @return
- *   0 on success, negative on error
- */
-int rte_eal_log_early_init(void);
-
 /**
  * Init the default log stream
  *
@@ -117,7 +109,7 @@ int rte_eal_log_early_init(void);
 int rte_eal_log_init(const char *id, int facility);
 
 /**
- * Init the default log stream
+ * Init the PCI infrastructure
  *
  * This function is private to EAL.
  *
@@ -126,30 +118,21 @@ int rte_eal_log_init(const char *id, int facility);
  */
 int rte_eal_pci_init(void);
 
-#ifdef RTE_LIBRTE_IVSHMEM
-/**
- * Init the memory from IVSHMEM devices
- *
- * This function is private to EAL.
- *
- * @return
- *  0 on success, negative on error
- */
-int rte_eal_ivshmem_init(void);
+struct rte_pci_driver;
+struct rte_pci_device;
 
 /**
- * Init objects in IVSHMEM devices
+ * Update a pci device object by asking the kernel for the latest information.
  *
  * This function is private to EAL.
  *
+ * @param addr
+ *     The PCI Bus-Device-Function address to look for
  * @return
- *  0 on success, negative on error
+ *   - 0 on success.
+ *   - negative on error.
  */
-int rte_eal_ivshmem_obj_init(void);
-#endif
-
-struct rte_pci_driver;
-struct rte_pci_device;
+int pci_update_device(const struct rte_pci_addr *addr);
 
 /**
  * Unbind kernel driver for this device
@@ -258,13 +241,6 @@ int rte_eal_intr_init(void);
  */
 int rte_eal_alarm_init(void);
 
-/**
- * This function initialises any virtual devices
- *
- * This function is private to the EAL.
- */
-int rte_eal_dev_init(void);
-
 /**
  * Function is to check if the kernel module(like, vfio, vfio_iommu_type1,
  * etc.) loaded.
index b20056b..4eac666 100644 (file)
@@ -38,6 +38,8 @@
 extern "C" {
 #endif
 
+#include <stdint.h>
+#include <rte_common.h>
 #include <emmintrin.h>
 #include "generic/rte_atomic.h"
 
@@ -59,6 +61,12 @@ extern "C" {
 
 #define rte_smp_rmb() rte_compiler_barrier()
 
+#define rte_io_mb() rte_mb()
+
+#define rte_io_wmb() rte_compiler_barrier()
+
+#define rte_io_rmb() rte_compiler_barrier()
+
 /*------------------------- 16 bit atomic operations -------------------------*/
 
 #ifndef RTE_FORCE_INTRINSICS
index 400d8a9..2e04c75 100644 (file)
  * All rights reserved.
  */
 
+#ifndef _RTE_ATOMIC_X86_H_
+#error do not include this file directly, use <rte_atomic.h> instead
+#endif
+
 #ifndef _RTE_ATOMIC_I686_H_
 #define _RTE_ATOMIC_I686_H_
 
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
 /*------------------------- 64 bit atomic operations -------------------------*/
 
 #ifndef RTE_FORCE_INTRINSICS
@@ -47,6 +55,7 @@ static inline int
 rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
 {
        uint8_t res;
+       RTE_STD_C11
        union {
                struct {
                        uint32_t l32;
index 4de6600..1a53a76 100644 (file)
  * All rights reserved.
  */
 
+#ifndef _RTE_ATOMIC_X86_H_
+#error do not include this file directly, use <rte_atomic.h> instead
+#endif
+
 #ifndef _RTE_ATOMIC_X86_64_H_
 #define _RTE_ATOMIC_X86_64_H_
 
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
 /*------------------------- 64 bit atomic operations -------------------------*/
 
 #ifndef RTE_FORCE_INTRINSICS
index ffdb6ef..251f11b 100644 (file)
@@ -38,6 +38,8 @@
 extern "C" {
 #endif
 
+#include <stdint.h>
+#include <rte_common.h>
 #include "generic/rte_byteorder.h"
 
 #ifndef RTE_BYTE_ORDER
index 51c306f..14d6483 100644 (file)
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifndef _RTE_BYTEORDER_X86_H_
+#error do not include this file directly, use <rte_byteorder.h> instead
+#endif
+
 #ifndef _RTE_BYTEORDER_I686_H_
 #define _RTE_BYTEORDER_I686_H_
 
+#include <stdint.h>
+#include <rte_byteorder.h>
+
 /*
  * An architecture-optimized byte swap for a 64-bit value.
  *
index dda572b..516ac05 100644 (file)
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifndef _RTE_BYTEORDER_X86_H_
+#error do not include this file directly, use <rte_byteorder.h> instead
+#endif
+
 #ifndef _RTE_BYTEORDER_X86_64_H_
 #define _RTE_BYTEORDER_X86_64_H_
 
+#include <stdint.h>
+#include <rte_common.h>
+
 /*
  * An architecture-optimized byte swap for a 64-bit value.
  *
index 6e3c7d8..5eb6ce9 100644 (file)
@@ -75,12 +75,14 @@ extern "C" {
 extern int rte_cycles_vmware_tsc_map;
 #include <rte_branch_prediction.h>
 #endif
+#include <rte_common.h>
 
 static inline uint64_t
 rte_rdtsc(void)
 {
        union {
                uint64_t tsc_64;
+               RTE_STD_C11
                struct {
                        uint32_t lo_32;
                        uint32_t hi_32;
diff --git a/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_io.h b/src/dpdk/lib/librte_eal/common/include/arch/x86/rte_io.h
new file mode 100644 (file)
index 0000000..c8d1404
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Cavium networks. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_IO_X86_H_
+#define _RTE_IO_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_io.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IO_X86_H_ */
index 413035e..b9785e8 100644 (file)
@@ -69,6 +69,8 @@ rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline));
 
 #ifdef RTE_MACHINE_CPUFLAG_AVX512F
 
+#define ALIGNMENT_MASK 0x3F
+
 /**
  * AVX512 implementation below
  */
@@ -189,7 +191,7 @@ rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n)
 }
 
 static inline void *
-rte_memcpy(void *dst, const void *src, size_t n)
+rte_memcpy_generic(void *dst, const void *src, size_t n)
 {
        uintptr_t dstu = (uintptr_t)dst;
        uintptr_t srcu = (uintptr_t)src;
@@ -308,6 +310,8 @@ COPY_BLOCK_128_BACK63:
 
 #elif defined RTE_MACHINE_CPUFLAG_AVX2
 
+#define ALIGNMENT_MASK 0x1F
+
 /**
  * AVX2 implementation below
  */
@@ -387,7 +391,7 @@ rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
 }
 
 static inline void *
-rte_memcpy(void *dst, const void *src, size_t n)
+rte_memcpy_generic(void *dst, const void *src, size_t n)
 {
        uintptr_t dstu = (uintptr_t)dst;
        uintptr_t srcu = (uintptr_t)src;
@@ -499,6 +503,8 @@ COPY_BLOCK_128_BACK31:
 
 #else /* RTE_MACHINE_CPUFLAG */
 
+#define ALIGNMENT_MASK 0x0F
+
 /**
  * SSE & AVX implementation below
  */
@@ -594,7 +600,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
  * - __m128i <xmm0> ~ <xmm8> must be pre-defined
  */
 #define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset)                                                     \
-({                                                                                                          \
+__extension__ ({                                                                                            \
     int tmp;                                                                                                \
     while (len >= 128 + 16 - offset) {                                                                      \
         xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16));                  \
@@ -655,7 +661,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
  * - __m128i <xmm0> ~ <xmm8> used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined
  */
 #define MOVEUNALIGNED_LEFT47(dst, src, len, offset)                   \
-({                                                                    \
+__extension__ ({                                                      \
     switch (offset) {                                                 \
     case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break;    \
     case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break;    \
@@ -677,7 +683,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
 })
 
 static inline void *
-rte_memcpy(void *dst, const void *src, size_t n)
+rte_memcpy_generic(void *dst, const void *src, size_t n)
 {
        __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
        uintptr_t dstu = (uintptr_t)dst;
@@ -821,6 +827,75 @@ COPY_BLOCK_64_BACK15:
 
 #endif /* RTE_MACHINE_CPUFLAG */
 
+static inline void *
+rte_memcpy_aligned(void *dst, const void *src, size_t n)
+{
+       void *ret = dst;
+
+       /* Copy size <= 16 bytes */
+       if (n < 16) {
+               if (n & 0x01) {
+                       *(uint8_t *)dst = *(const uint8_t *)src;
+                       src = (const uint8_t *)src + 1;
+                       dst = (uint8_t *)dst + 1;
+               }
+               if (n & 0x02) {
+                       *(uint16_t *)dst = *(const uint16_t *)src;
+                       src = (const uint16_t *)src + 1;
+                       dst = (uint16_t *)dst + 1;
+               }
+               if (n & 0x04) {
+                       *(uint32_t *)dst = *(const uint32_t *)src;
+                       src = (const uint32_t *)src + 1;
+                       dst = (uint32_t *)dst + 1;
+               }
+               if (n & 0x08)
+                       *(uint64_t *)dst = *(const uint64_t *)src;
+
+               return ret;
+       }
+
+       /* Copy 16 <= size <= 32 bytes */
+       if (n <= 32) {
+               rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+               rte_mov16((uint8_t *)dst - 16 + n,
+                               (const uint8_t *)src - 16 + n);
+
+               return ret;
+       }
+
+       /* Copy 32 < size <= 64 bytes */
+       if (n <= 64) {
+               rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+               rte_mov32((uint8_t *)dst - 32 + n,
+                               (const uint8_t *)src - 32 + n);
+
+               return ret;
+       }
+
+       /* Copy 64 bytes blocks */
+       for (; n >= 64; n -= 64) {
+               rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+               dst = (uint8_t *)dst + 64;
+               src = (const uint8_t *)src + 64;
+       }
+
+       /* Copy whatever left */
+       rte_mov64((uint8_t *)dst - 64 + n,
+                       (const uint8_t *)src - 64 + n);
+
+       return ret;
+}
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+       if (!(((uintptr_t)dst | (uintptr_t)src) & ALIGNMENT_MASK))
+               return rte_memcpy_aligned(dst, src, n);
+       else
+               return rte_memcpy_generic(dst, src, n);
+}
+
 #ifdef __cplusplus
 }
 #endif
index 5dac47e..f464398 100644 (file)
@@ -38,6 +38,7 @@
 extern "C" {
 #endif
 
+#include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
 static inline void rte_prefetch0(const volatile void *p)
index 0649f79..ab09995 100644 (file)
@@ -20,6 +20,7 @@
 /* Official RTM intrinsics interface matching gcc/icc, but works
    on older gcc compatible compilers and binutils. */
 
+#include <rte_common.h>
 
 #ifdef __cplusplus
 extern "C" {
index b698797..1b4b85d 100644 (file)
@@ -31,8 +31,8 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef _RTE_VECT_H_
-#define _RTE_VECT_H_
+#ifndef _RTE_VECT_X86_H_
+#define _RTE_VECT_X86_H_
 
 /**
  * @file
@@ -40,6 +40,9 @@
  * RTE SSE/AVX related header.
  */
 
+#include <stdint.h>
+#include "generic/rte_vect.h"
+
 #if (defined(__ICC) || (__GNUC__ == 4 &&  __GNUC_MINOR__ < 4))
 
 #ifdef __SSE__
@@ -106,7 +109,8 @@ typedef union rte_ymm {
 #endif /* __AVX__ */
 
 #ifdef RTE_ARCH_I686
-#define _mm_cvtsi128_si64(a) ({ \
+#define _mm_cvtsi128_si64(a)    \
+__extension__ ({                \
        rte_xmm_t m;            \
        m.x = (a);              \
        (m.u64[0]);             \
@@ -117,7 +121,8 @@ typedef union rte_ymm {
  * Prior to version 12.1 icc doesn't support _mm_set_epi64x.
  */
 #if (defined(__ICC) && __ICC < 1210)
-#define _mm_set_epi64x(a, b)  ({ \
+#define _mm_set_epi64x(a, b)     \
+__extension__ ({                 \
        rte_xmm_t m;             \
        m.u64[0] = b;            \
        m.u64[1] = a;            \
@@ -129,4 +134,4 @@ typedef union rte_ymm {
 }
 #endif
 
-#endif /* _RTE_VECT_H_ */
+#endif /* _RTE_VECT_X86_H_ */
index bfb4fe4..7b81705 100644 (file)
@@ -42,6 +42,7 @@
  */
 
 #include <stdint.h>
+#include <rte_common.h>
 
 #ifdef __DOXYGEN__
 
@@ -99,6 +100,33 @@ static inline void rte_smp_wmb(void);
  */
 static inline void rte_smp_rmb(void);
 
+/**
+ * General memory barrier for I/O device
+ *
+ * Guarantees that the LOAD and STORE operations that precede the
+ * rte_io_mb() call are visible to I/O device or CPU before the
+ * LOAD and STORE operations that follow it.
+ */
+static inline void rte_io_mb(void);
+
+/**
+ * Write memory barrier for I/O device
+ *
+ * Guarantees that the STORE operations that precede the
+ * rte_io_wmb() call are visible to I/O device before the STORE
+ * operations that follow it.
+ */
+static inline void rte_io_wmb(void);
+
+/**
+ * Read memory barrier for IO device
+ *
+ * Guarantees that the LOAD operations on I/O device that precede the
+ * rte_io_rmb() call are visible to CPU before the LOAD
+ * operations that follow it.
+ */
+static inline void rte_io_rmb(void);
+
 #endif /* __DOXYGEN__ */
 
 /**
index c1da357..71321f3 100644 (file)
@@ -44,6 +44,7 @@
 /**
  * Enumeration of all CPU features supported
  */
+__extension__
 enum rte_cpu_flag_t;
 
 /**
@@ -55,6 +56,7 @@ enum rte_cpu_flag_t;
  *     flag name
  *     NULL if flag ID is invalid
  */
+__extension__
 const char *
 rte_cpu_get_flag_name(enum rte_cpu_flag_t feature);
 
@@ -68,6 +70,7 @@ rte_cpu_get_flag_name(enum rte_cpu_flag_t feature);
  *     0 if flag is not available
  *     -ENOENT if flag is invalid
  */
+__extension__
 int
 rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature);
 
index 8cc21f2..00103ca 100644 (file)
@@ -180,15 +180,16 @@ rte_get_timer_hz(void)
        default: rte_panic("Invalid timer source specified\n");
        }
 }
-
 /**
  * Wait at least us microseconds.
+ * This function can be replaced with user-defined function.
+ * @see rte_delay_us_callback_register
  *
  * @param us
  *   The number of microseconds to wait.
  */
-void
-rte_delay_us(unsigned us);
+extern void
+(*rte_delay_us)(unsigned int us);
 
 /**
  * Wait at least ms milliseconds.
@@ -202,4 +203,21 @@ rte_delay_ms(unsigned ms)
        rte_delay_us(ms * 1000);
 }
 
+/**
+ * Blocking delay function.
+ *
+ * @param us
+ *   Number of microseconds to wait.
+ */
+void rte_delay_us_block(unsigned int us);
+
+/**
+ * Replace rte_delay_us with user defined function.
+ *
+ * @param userfunc
+ *   User function which replaces rte_delay_us. rte_delay_us_block restores
+ *   buildin block delay function.
+ */
+void rte_delay_us_callback_register(void(*userfunc)(unsigned int));
+
 #endif /* _RTE_CYCLES_H_ */
diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_io.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_io.h
new file mode 100644 (file)
index 0000000..d82ee69
--- /dev/null
@@ -0,0 +1,381 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Cavium networks. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_IO_H_
+#define _RTE_IO_H_
+
+#include <rte_atomic.h>
+
+/**
+ * @file
+ * I/O device memory operations
+ *
+ * This file defines the generic API for I/O device memory read/write operations
+ */
+
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+#ifdef __DOXYGEN__
+
+/**
+ * Read a 8-bit value from I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param addr
+ *  I/O memory address to read the value from
+ * @return
+ *  read value
+ */
+static inline uint8_t
+rte_read8_relaxed(const volatile void *addr);
+
+/**
+ * Read a 16-bit value from I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param addr
+ *  I/O memory address to read the value from
+ * @return
+ *  read value
+ */
+static inline uint16_t
+rte_read16_relaxed(const volatile void *addr);
+
+/**
+ * Read a 32-bit value from I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param addr
+ *  I/O memory address to read the value from
+ * @return
+ *  read value
+ */
+static inline uint32_t
+rte_read32_relaxed(const volatile void *addr);
+
+/**
+ * Read a 64-bit value from I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param addr
+ *  I/O memory address to read the value from
+ * @return
+ *  read value
+ */
+static inline uint64_t
+rte_read64_relaxed(const volatile void *addr);
+
+/**
+ * Write a 8-bit value to I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param value
+ *  Value to write
+ * @param addr
+ *  I/O memory address to write the value to
+ */
+
+static inline void
+rte_write8_relaxed(uint8_t value, volatile void *addr);
+
+/**
+ * Write a 16-bit value to I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param value
+ *  Value to write
+ * @param addr
+ *  I/O memory address to write the value to
+ */
+static inline void
+rte_write16_relaxed(uint16_t value, volatile void *addr);
+
+/**
+ * Write a 32-bit value to I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param value
+ *  Value to write
+ * @param addr
+ *  I/O memory address to write the value to
+ */
+static inline void
+rte_write32_relaxed(uint32_t value, volatile void *addr);
+
+/**
+ * Write a 64-bit value to I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param value
+ *  Value to write
+ * @param addr
+ *  I/O memory address to write the value to
+ */
+static inline void
+rte_write64_relaxed(uint64_t value, volatile void *addr);
+
+/**
+ * Read a 8-bit value from I/O device memory address *addr*.
+ *
+ * @param addr
+ *  I/O memory address to read the value from
+ * @return
+ *  read value
+ */
+static inline uint8_t
+rte_read8(const volatile void *addr);
+
+/**
+ * Read a 16-bit value from I/O device memory address *addr*.
+ *
+ *
+ * @param addr
+ *  I/O memory address to read the value from
+ * @return
+ *  read value
+ */
+static inline uint16_t
+rte_read16(const volatile void *addr);
+
+/**
+ * Read a 32-bit value from I/O device memory address *addr*.
+ *
+ * @param addr
+ *  I/O memory address to read the value from
+ * @return
+ *  read value
+ */
+static inline uint32_t
+rte_read32(const volatile void *addr);
+
+/**
+ * Read a 64-bit value from I/O device memory address *addr*.
+ *
+ * @param addr
+ *  I/O memory address to read the value from
+ * @return
+ *  read value
+ */
+static inline uint64_t
+rte_read64(const volatile void *addr);
+
+/**
+ * Write a 8-bit value to I/O device memory address *addr*.
+ *
+ * @param value
+ *  Value to write
+ * @param addr
+ *  I/O memory address to write the value to
+ */
+
+static inline void
+rte_write8(uint8_t value, volatile void *addr);
+
+/**
+ * Write a 16-bit value to I/O device memory address *addr*.
+ *
+ * @param value
+ *  Value to write
+ * @param addr
+ *  I/O memory address to write the value to
+ */
+static inline void
+rte_write16(uint16_t value, volatile void *addr);
+
+/**
+ * Write a 32-bit value to I/O device memory address *addr*.
+ *
+ * @param value
+ *  Value to write
+ * @param addr
+ *  I/O memory address to write the value to
+ */
+static inline void
+rte_write32(uint32_t value, volatile void *addr);
+
+/**
+ * Write a 64-bit value to I/O device memory address *addr*.
+ *
+ * @param value
+ *  Value to write
+ * @param addr
+ *  I/O memory address to write the value to
+ */
+static inline void
+rte_write64(uint64_t value, volatile void *addr);
+
+#endif /* __DOXYGEN__ */
+
+#ifndef RTE_OVERRIDE_IO_H
+
+static inline uint8_t __attribute__((always_inline))
+rte_read8_relaxed(const volatile void *addr)
+{
+       return *(const volatile uint8_t *)addr;
+}
+
+static inline uint16_t __attribute__((always_inline))
+rte_read16_relaxed(const volatile void *addr)
+{
+       return *(const volatile uint16_t *)addr;
+}
+
+static inline uint32_t __attribute__((always_inline))
+rte_read32_relaxed(const volatile void *addr)
+{
+       return *(const volatile uint32_t *)addr;
+}
+
+static inline uint64_t __attribute__((always_inline))
+rte_read64_relaxed(const volatile void *addr)
+{
+       return *(const volatile uint64_t *)addr;
+}
+
+static inline void __attribute__((always_inline))
+rte_write8_relaxed(uint8_t value, volatile void *addr)
+{
+       *(volatile uint8_t *)addr = value;
+}
+
+static inline void __attribute__((always_inline))
+rte_write16_relaxed(uint16_t value, volatile void *addr)
+{
+       *(volatile uint16_t *)addr = value;
+}
+
+static inline void __attribute__((always_inline))
+rte_write32_relaxed(uint32_t value, volatile void *addr)
+{
+       *(volatile uint32_t *)addr = value;
+}
+
+static inline void __attribute__((always_inline))
+rte_write64_relaxed(uint64_t value, volatile void *addr)
+{
+       *(volatile uint64_t *)addr = value;
+}
+
+static inline uint8_t __attribute__((always_inline))
+rte_read8(const volatile void *addr)
+{
+       uint8_t val;
+       val = rte_read8_relaxed(addr);
+       rte_io_rmb();
+       return val;
+}
+
+static inline uint16_t __attribute__((always_inline))
+rte_read16(const volatile void *addr)
+{
+       uint16_t val;
+       val = rte_read16_relaxed(addr);
+       rte_io_rmb();
+       return val;
+}
+
+static inline uint32_t  __attribute__((always_inline))
+rte_read32(const volatile void *addr)
+{
+       uint32_t val;
+       val = rte_read32_relaxed(addr);
+       rte_io_rmb();
+       return val;
+}
+
+static inline uint64_t __attribute__((always_inline))
+rte_read64(const volatile void *addr)
+{
+       uint64_t val;
+       val = rte_read64_relaxed(addr);
+       rte_io_rmb();
+       return val;
+}
+
+static inline void __attribute__((always_inline))
+rte_write8(uint8_t value, volatile void *addr)
+{
+       rte_io_wmb();
+       rte_write8_relaxed(value, addr);
+}
+
+static inline void __attribute__((always_inline))
+rte_write16(uint16_t value, volatile void *addr)
+{
+       rte_io_wmb();
+       rte_write16_relaxed(value, addr);
+}
+
+static inline void __attribute__((always_inline))
+rte_write32(uint32_t value, volatile void *addr)
+{
+       rte_io_wmb();
+       rte_write32_relaxed(value, addr);
+}
+
+static inline void __attribute__((always_inline))
+rte_write64(uint64_t value, volatile void *addr)
+{
+       rte_io_wmb();
+       rte_write64_relaxed(value, addr);
+}
+
+#endif /* RTE_OVERRIDE_IO_H */
+
+#endif /* _RTE_IO_H_ */
index afb0afe..4e9d879 100644 (file)
@@ -64,6 +64,8 @@ rte_mov16(uint8_t *dst, const uint8_t *src);
 static inline void
 rte_mov32(uint8_t *dst, const uint8_t *src);
 
+#ifdef __DOXYGEN__
+
 /**
  * Copy 48 bytes from one location to another using optimised
  * instructions. The locations should not overlap.
@@ -76,6 +78,8 @@ rte_mov32(uint8_t *dst, const uint8_t *src);
 static inline void
 rte_mov48(uint8_t *dst, const uint8_t *src);
 
+#endif /* __DOXYGEN__ */
+
 /**
  * Copy 64 bytes from one location to another using optimised
  * instructions. The locations should not overlap.
diff --git a/src/dpdk/lib/librte_eal/common/include/generic/rte_vect.h b/src/dpdk/lib/librte_eal/common/include/generic/rte_vect.h
new file mode 100644 (file)
index 0000000..600ee9f
--- /dev/null
@@ -0,0 +1,214 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_VECT_H_
+#define _RTE_VECT_H_
+
+/**
+ * @file
+ * SIMD vector types
+ *
+ * This file defines types to use vector instructions with generic C code.
+ */
+
+#include <stdint.h>
+
+/* Unsigned vector types */
+
+/**
+ * 64 bits vector size to use with unsigned 8 bits elements.
+ *
+ * a = (rte_v64u8_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
+ */
+typedef uint8_t rte_v64u8_t __attribute__((vector_size(8), aligned(8)));
+
+/**
+ * 64 bits vector size to use with unsigned 16 bits elements.
+ *
+ * a = (rte_v64u16_t){ a0, a1, a2, a3 }
+ */
+typedef uint16_t rte_v64u16_t __attribute__((vector_size(8), aligned(8)));
+
+/**
+ * 64 bits vector size to use with unsigned 32 bits elements.
+ *
+ * a = (rte_v64u32_t){ a0, a1 }
+ */
+typedef uint32_t rte_v64u32_t __attribute__((vector_size(8), aligned(8)));
+
+/**
+ * 128 bits vector size to use with unsigned 8 bits elements.
+ *
+ * a = (rte_v128u8_t){ a00, a01, a02, a03, a04, a05, a06, a07,
+ *                     a08, a09, a10, a11, a12, a13, a14, a15 }
+ */
+typedef uint8_t rte_v128u8_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 128 bits vector size to use with unsigned 16 bits elements.
+ *
+ * a = (rte_v128u16_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
+ */
+typedef uint16_t rte_v128u16_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 128 bits vector size to use with unsigned 32 bits elements.
+ *
+ * a = (rte_v128u32_t){ a0, a1, a2, a3, a4 }
+ */
+typedef uint32_t rte_v128u32_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 128 bits vector size to use with unsigned 64 bits elements.
+ *
+ * a = (rte_v128u64_t){ a0, a1 }
+ */
+typedef uint64_t rte_v128u64_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 256 bits vector size to use with unsigned 8 bits elements.
+ *
+ * a = (rte_v256u8_t){ a00, a01, a02, a03, a04, a05, a06, a07,
+ *                     a08, a09, a10, a11, a12, a13, a14, a15,
+ *                     a16, a17, a18, a19, a20, a21, a22, a23,
+ *                     a24, a25, a26, a27, a28, a29, a30, a31 }
+ */
+typedef uint8_t rte_v256u8_t __attribute__((vector_size(32), aligned(32)));
+
+/**
+ * 256 bits vector size to use with unsigned 16 bits elements.
+ *
+ * a = (rte_v256u16_t){ a00, a01, a02, a03, a04, a05, a06, a07,
+ *                      a08, a09, a10, a11, a12, a13, a14, a15 }
+ */
+typedef uint16_t rte_v256u16_t __attribute__((vector_size(32), aligned(32)));
+
+/**
+ * 256 bits vector size to use with unsigned 32 bits elements.
+ *
+ * a = (rte_v256u32_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
+ */
+typedef uint32_t rte_v256u32_t __attribute__((vector_size(32), aligned(32)));
+
+/**
+ * 256 bits vector size to use with unsigned 64 bits elements.
+ *
+ * a = (rte_v256u64_t){ a0, a1, a2, a3 }
+ */
+typedef uint64_t rte_v256u64_t __attribute__((vector_size(32), aligned(32)));
+
+
+/* Signed vector types */
+
+/**
+ * 64 bits vector size to use with 8 bits elements.
+ *
+ * a = (rte_v64s8_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
+ */
+typedef int8_t rte_v64s8_t __attribute__((vector_size(8), aligned(8)));
+
+/**
+ * 64 bits vector size to use with 16 bits elements.
+ *
+ * a = (rte_v64s16_t){ a0, a1, a2, a3 }
+ */
+typedef int16_t rte_v64s16_t __attribute__((vector_size(8), aligned(8)));
+
+/**
+ * 64 bits vector size to use with 32 bits elements.
+ *
+ * a = (rte_v64s32_t){ a0, a1 }
+ */
+typedef int32_t rte_v64s32_t __attribute__((vector_size(8), aligned(8)));
+
+/**
+ * 128 bits vector size to use with 8 bits elements.
+ *
+ * a = (rte_v128s8_t){ a00, a01, a02, a03, a04, a05, a06, a07,
+ *                     a08, a09, a10, a11, a12, a13, a14, a15 }
+ */
+typedef int8_t rte_v128s8_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 128 bits vector size to use with 16 bits elements.
+ *
+ * a = (rte_v128s16_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
+ */
+typedef int16_t rte_v128s16_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 128 bits vector size to use with 32 bits elements.
+ *
+ * a = (rte_v128s32_t){ a0, a1, a2, a3 }
+ */
+typedef int32_t rte_v128s32_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 128 bits vector size to use with 64 bits elements.
+ *
+ * a = (rte_v128s64_t){ a1, a2 }
+ */
+typedef int64_t rte_v128s64_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 256 bits vector size to use with 8 bits elements.
+ *
+ * a = (rte_v256s8_t){ a00, a01, a02, a03, a04, a05, a06, a07,
+ *                     a08, a09, a10, a11, a12, a13, a14, a15,
+ *                     a16, a17, a18, a19, a20, a21, a22, a23,
+ *                     a24, a25, a26, a27, a28, a29, a30, a31 }
+ */
+typedef int8_t rte_v256s8_t __attribute__((vector_size(32), aligned(32)));
+
+/**
+ * 256 bits vector size to use with 16 bits elements.
+ *
+ * a = (rte_v256s16_t){ a00, a01, a02, a03, a04, a05, a06, a07,
+ *                      a08, a09, a10, a11, a12, a13, a14, a15 }
+ */
+typedef int16_t rte_v256s16_t __attribute__((vector_size(32), aligned(32)));
+
+/**
+ * 256 bits vector size to use with 32 bits elements.
+ *
+ * a = (rte_v256s32_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
+ */
+typedef int32_t rte_v256s32_t __attribute__((vector_size(32), aligned(32)));
+
+/**
+ * 256 bits vector size to use with 64 bits elements.
+ *
+ * a = (rte_v256s64_t){ a0, a1, a2, a3 }
+ */
+typedef int64_t rte_v256s64_t __attribute__((vector_size(32), aligned(32)));
+
+#endif /* _RTE_VECT_H_ */
diff --git a/src/dpdk/lib/librte_eal/common/include/rte_bus.h b/src/dpdk/lib/librte_eal/common/include/rte_bus.h
new file mode 100644 (file)
index 0000000..7c36969
--- /dev/null
@@ -0,0 +1,158 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 NXP
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of NXP nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_BUS_H_
+#define _RTE_BUS_H_
+
+/**
+ * @file
+ *
+ * DPDK device bus interface
+ *
+ * This file exposes API and interfaces for bus abstraction
+ * over the devices and drivers in EAL.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_dev.h>
+
+/** Double linked list of buses */
+TAILQ_HEAD(rte_bus_list, rte_bus);
+
+/**
+ * Bus specific scan for devices attached on the bus.
+ * For each bus object, the scan would be reponsible for finding devices and
+ * adding them to its private device list.
+ *
+ * A bus should mandatorily implement this method.
+ *
+ * @return
+ *     0 for successful scan
+ *     <0 for unsuccessful scan with error value
+ */
+typedef int (*rte_bus_scan_t)(void);
+
+/**
+ * Implementation specific probe function which is responsible for linking
+ * devices on that bus with applicable drivers.
+ *
+ * This is called while iterating over each registered bus.
+ *
+ * @return
+ *     0 for successful probe
+ *     !0 for any error while probing
+ */
+typedef int (*rte_bus_probe_t)(void);
+
+/**
+ * A structure describing a generic bus.
+ */
+struct rte_bus {
+       TAILQ_ENTRY(rte_bus) next;   /**< Next bus object in linked list */
+       const char *name;            /**< Name of the bus */
+       rte_bus_scan_t scan;         /**< Scan for devices attached to bus */
+       rte_bus_probe_t probe;       /**< Probe devices on bus */
+};
+
+/**
+ * Register a Bus handler.
+ *
+ * @param bus
+ *   A pointer to a rte_bus structure describing the bus
+ *   to be registered.
+ */
+void rte_bus_register(struct rte_bus *bus);
+
+/**
+ * Unregister a Bus handler.
+ *
+ * @param bus
+ *   A pointer to a rte_bus structure describing the bus
+ *   to be unregistered.
+ */
+void rte_bus_unregister(struct rte_bus *bus);
+
+/**
+ * Scan all the buses.
+ *
+ * @return
+ *   0 in case of success in scanning all buses
+ *  !0 in case of failure to scan
+ */
+int rte_bus_scan(void);
+
+/**
+ * For each device on the buses, perform a driver 'match' and call the
+ * driver-specific probe for device initialization.
+ *
+ * @return
+ *      0 for successful match/probe
+ *     !0 otherwise
+ */
+int rte_bus_probe(void);
+
+/**
+ * Dump information of all the buses registered with EAL.
+ *
+ * @param f
+ *      A valid and open output stream handle
+ *
+ * @return
+ *      0 in case of success
+ *     !0 in case there is error in opening the output stream
+ */
+void rte_bus_dump(FILE *f);
+
+/**
+ * Helper for Bus registration.
+ * The constructor has higher priority than PMD constructors.
+ */
+#define RTE_REGISTER_BUS(nm, bus) \
+static void __attribute__((constructor(101), used)) businitfn_ ##nm(void) \
+{\
+       (bus).name = RTE_STR(nm);\
+       rte_bus_register(&bus); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BUS_H */
index 332f2a4..8dda3e2 100644 (file)
@@ -59,6 +59,13 @@ extern "C" {
 #define asm __asm__
 #endif
 
+/** C extension macro for environments lacking C11 features. */
+#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L
+#define RTE_STD_C11 __extension__
+#else
+#define RTE_STD_C11
+#endif
+
 #ifdef RTE_ARCH_STRICT_ALIGN
 typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1)));
 typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1)));
@@ -268,7 +275,8 @@ rte_align64pow2(uint64_t v)
 /**
  * Macro to return the minimum of two numbers
  */
-#define RTE_MIN(a, b) ({ \
+#define RTE_MIN(a, b) \
+       __extension__ ({ \
                typeof (a) _a = (a); \
                typeof (b) _b = (b); \
                _a < _b ? _a : _b; \
@@ -277,7 +285,8 @@ rte_align64pow2(uint64_t v)
 /**
  * Macro to return the maximum of two numbers
  */
-#define RTE_MAX(a, b) ({ \
+#define RTE_MAX(a, b) \
+       __extension__ ({ \
                typeof (a) _a = (a); \
                typeof (b) _b = (b); \
                _a > _b ? _a : _b; \
@@ -322,10 +331,39 @@ rte_bsf32(uint32_t v)
 #define offsetof(TYPE, MEMBER)  __builtin_offsetof (TYPE, MEMBER)
 #endif
 
+/**
+ * Return pointer to the wrapping struct instance.
+ *
+ * Example:
+ *
+ *  struct wrapper {
+ *      ...
+ *      struct child c;
+ *      ...
+ *  };
+ *
+ *  struct child *x = obtain(...);
+ *  struct wrapper *w = container_of(x, struct wrapper, c);
+ */
+#ifndef container_of
+#define container_of(ptr, type, member)        __extension__ ({                \
+                       typeof(((type *)0)->member) *_ptr = (ptr);      \
+                       (type *)(((char *)_ptr) - offsetof(type, member)); })
+#endif
+
 #define _RTE_STR(x) #x
 /** Take a macro value and get a string version of it */
 #define RTE_STR(x) _RTE_STR(x)
 
+/**
+ * ISO C helpers to modify format strings using variadic macros.
+ * This is a replacement for the ", ## __VA_ARGS__" GNU extension.
+ * An empty %s argument is appended to avoid a dangling comma.
+ */
+#define RTE_FMT(fmt, ...) fmt "%.0s", __VA_ARGS__ ""
+#define RTE_FMT_HEAD(fmt, ...) fmt
+#define RTE_FMT_TAIL(fmt, ...) __VA_ARGS__
+
 /** Mask value of type "tp" for the first "ln" bit set. */
 #define        RTE_LEN2MASK(ln, tp)    \
        ((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln))))
index 95789f9..b17791f 100644 (file)
@@ -100,37 +100,56 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
        } \
 } while (0)
 
+/**
+ * A generic memory resource representation.
+ */
+struct rte_mem_resource {
+       uint64_t phys_addr; /**< Physical address, 0 if not resource. */
+       uint64_t len;       /**< Length of the resource. */
+       void *addr;         /**< Virtual address, NULL when not mapped. */
+};
 
 /** Double linked list of device drivers. */
 TAILQ_HEAD(rte_driver_list, rte_driver);
+/** Double linked list of devices. */
+TAILQ_HEAD(rte_device_list, rte_device);
+
+/* Forward declaration */
+struct rte_driver;
 
 /**
- * Initialization function called for each device driver once.
+ * A structure describing a generic device.
  */
-typedef int (rte_dev_init_t)(const char *name, const char *args);
+struct rte_device {
+       TAILQ_ENTRY(rte_device) next; /**< Next device */
+       const struct rte_driver *driver;/**< Associated driver */
+       int numa_node;                /**< NUMA node connection */
+       struct rte_devargs *devargs;  /**< Device user arguments */
+};
 
 /**
- * Uninitilization function called for each device driver once.
+ * Insert a device detected by a bus scanning.
+ *
+ * @param dev
+ *   A pointer to a rte_device structure describing the detected device.
  */
-typedef int (rte_dev_uninit_t)(const char *name);
+void rte_eal_device_insert(struct rte_device *dev);
 
 /**
- * Driver type enumeration
+ * Remove a device (e.g. when being unplugged).
+ *
+ * @param dev
+ *   A pointer to a rte_device structure describing the device to be removed.
  */
-enum pmd_type {
-       PMD_VDEV = 0,
-       PMD_PDEV = 1,
-};
+void rte_eal_device_remove(struct rte_device *dev);
 
 /**
  * A structure describing a device driver.
  */
 struct rte_driver {
        TAILQ_ENTRY(rte_driver) next;  /**< Next in list. */
-       enum pmd_type type;                /**< PMD Driver type */
        const char *name;                   /**< Driver name. */
-       rte_dev_init_t *init;              /**< Device init. function. */
-       rte_dev_uninit_t *uninit;          /**< Device uninit. function. */
+       const char *alias;              /**< Driver alias. */
 };
 
 /**
@@ -178,31 +197,73 @@ int rte_eal_vdev_init(const char *name, const char *args);
  */
 int rte_eal_vdev_uninit(const char *name);
 
-#define DRIVER_EXPORT_NAME_ARRAY(n, idx) n##idx[]
+/**
+ * Attach a device to a registered driver.
+ *
+ * @param name
+ *   The device name, that refers to a pci device (or some private
+ *   way of designating a vdev device). Based on this device name, eal
+ *   will identify a driver capable of handling it and pass it to the
+ *   driver probing function.
+ * @param devargs
+ *   Device arguments to be passed to the driver.
+ * @return
+ *   0 on success, negative on error.
+ */
+int rte_eal_dev_attach(const char *name, const char *devargs);
 
-#define DRIVER_EXPORT_NAME(name, idx) \
-static const char DRIVER_EXPORT_NAME_ARRAY(this_pmd_name, idx) \
-__attribute__((used)) = RTE_STR(name)
+/**
+ * Detach a device from its driver.
+ *
+ * @param name
+ *   Same description as for rte_eal_dev_attach().
+ *   Here, eal will call the driver detaching function.
+ * @return
+ *   0 on success, negative on error.
+ */
+int rte_eal_dev_detach(const char *name);
 
-#define PMD_REGISTER_DRIVER(drv, nm)\
-void devinitfn_ ##drv(void);\
-void __attribute__((constructor, used)) devinitfn_ ##drv(void)\
-{\
-       (drv).name = RTE_STR(nm);\
-       rte_eal_driver_register(&drv);\
-} \
-DRIVER_EXPORT_NAME(nm, __COUNTER__)
+#define RTE_PMD_EXPORT_NAME_ARRAY(n, idx) n##idx[]
+
+#define RTE_PMD_EXPORT_NAME(name, idx) \
+static const char RTE_PMD_EXPORT_NAME_ARRAY(this_pmd_name, idx) \
+__attribute__((used)) = RTE_STR(name)
 
 #define DRV_EXP_TAG(name, tag) __##name##_##tag
 
-#define DRIVER_REGISTER_PCI_TABLE(name, table) \
+#define RTE_PMD_REGISTER_PCI_TABLE(name, table) \
 static const char DRV_EXP_TAG(name, pci_tbl_export)[] __attribute__((used)) = \
 RTE_STR(table)
 
-#define DRIVER_REGISTER_PARAM_STRING(name, str) \
+#define RTE_PMD_REGISTER_PARAM_STRING(name, str) \
 static const char DRV_EXP_TAG(name, param_string_export)[] \
 __attribute__((used)) = str
 
+/**
+ * Advertise the list of kernel modules required to run this driver
+ *
+ * This string lists the kernel modules required for the devices
+ * associated to a PMD. The format of each line of the string is:
+ * "<device-pattern> <kmod-expression>".
+ *
+ * The possible formats for the device pattern are:
+ *   "*"                     all devices supported by this driver
+ *   "pci:*"                 all PCI devices supported by this driver
+ *   "pci:v8086:d*:sv*:sd*"  all PCI devices supported by this driver
+ *                           whose vendor id is 0x8086.
+ *
+ * The format of the kernel modules list is a parenthesed expression
+ * containing logical-and (&) and logical-or (|).
+ *
+ * The device pattern and the kmod expression are separated by a space.
+ *
+ * Example:
+ * - "* igb_uio | uio_pci_generic | vfio"
+ */
+#define RTE_PMD_REGISTER_KMOD_DEP(name, str) \
+static const char DRV_EXP_TAG(name, kmod_dep_export)[] \
+__attribute__((used)) = str
+
 #ifdef __cplusplus
 }
 #endif
index 53c59f5..88120a1 100644 (file)
@@ -76,6 +76,7 @@ struct rte_devargs {
        TAILQ_ENTRY(rte_devargs) next;
        /** Type of device. */
        enum rte_devtype type;
+       RTE_STD_C11
        union {
                /** Used if type is RTE_DEVTYPE_*_PCI. */
                struct {
@@ -106,8 +107,8 @@ extern struct rte_devargs_list devargs_list;
  * "04:00.0,arg=val".
  *
  * For virtual devices, the format of arguments string is "DRIVER_NAME*"
- * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring",
- * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1".
+ * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring",
+ * "net_ring0", "net_pmdAnything,arg=0:arg2=1".
  *
  * The function parses the arguments string to get driver name and driver
  * arguments.
@@ -134,8 +135,8 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
  * "04:00.0,arg=val".
  *
  * For virtual devices, the format of arguments string is "DRIVER_NAME*"
- * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring",
- * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". The validity of the
+ * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring",
+ * "net_ring0", "net_pmdAnything,arg=0:arg2=1". The validity of the
  * driver name is not checked by this function, it is done when probing
  * the drivers.
  *
index a71d6f5..03fee50 100644 (file)
@@ -44,6 +44,7 @@
 #include <sched.h>
 
 #include <rte_per_lcore.h>
+#include <rte_config.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -145,14 +146,19 @@ int rte_eal_iopl_init(void);
  * This behavior may change in the future.
  *
  * @param argc
- *   The argc argument that was given to the main() function.
+ *   A non-negative value.  If it is greater than 0, the array members
+ *   for argv[0] through argv[argc] (non-inclusive) shall contain pointers
+ *   to strings.
  * @param argv
- *   The argv argument that was given to the main() function.
+ *   An array of strings.  The contents of the array, as well as the strings
+ *   which are pointed to by the array, may be modified by this function.
  * @return
  *   - On success, the number of parsed arguments, which is greater or
  *     equal to zero. After the call to rte_eal_init(),
- *     all arguments argv[x] with x < ret may be modified and should
- *     not be accessed by the application.
+ *     all arguments argv[x] with x < ret may have been modified by this
+ *     function call and should not be further interpreted by the
+ *     application.  The EAL does not take any ownership of the memory used
+ *     for either the argv array, or its members.
  *   - On failure, a negative error value.
  */
 int rte_eal_init(int argc, char **argv);
@@ -252,6 +258,9 @@ static inline int rte_gettid(void)
        return RTE_PER_LCORE(_thread_id);
 }
 
+#define RTE_INIT(func) \
+static void __attribute__((constructor, used)) func(void)
+
 #ifdef __cplusplus
 }
 #endif
index ff11ef3..6cade01 100644 (file)
@@ -34,6 +34,8 @@
 #ifndef _RTE_INTERRUPTS_H_
 #define _RTE_INTERRUPTS_H_
 
+#include <rte_common.h>
+
 /**
  * @file
  *
@@ -68,7 +70,7 @@ typedef void (*rte_intr_callback_fn)(struct rte_intr_handle *intr_handle,
  *  - On success, zero.
  *  - On failure, a negative value.
  */
-int rte_intr_callback_register(struct rte_intr_handle *intr_handle,
+int rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
                                rte_intr_callback_fn cb, void *cb_arg);
 
 /**
@@ -86,7 +88,7 @@ int rte_intr_callback_register(struct rte_intr_handle *intr_handle,
  *  - On success, return the number of callback entities removed.
  *  - On failure, a negative value.
  */
-int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
+int rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
                                rte_intr_callback_fn cb, void *cb_arg);
 
 /**
@@ -99,7 +101,7 @@ int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
  *  - On success, zero.
  *  - On failure, a negative value.
  */
-int rte_intr_enable(struct rte_intr_handle *intr_handle);
+int rte_intr_enable(const struct rte_intr_handle *intr_handle);
 
 /**
  * It disables the interrupt for the specified handle.
@@ -111,7 +113,7 @@ int rte_intr_enable(struct rte_intr_handle *intr_handle);
  *  - On success, zero.
  *  - On failure, a negative value.
  */
-int rte_intr_disable(struct rte_intr_handle *intr_handle);
+int rte_intr_disable(const struct rte_intr_handle *intr_handle);
 
 #ifdef __cplusplus
 }
index b1add04..954b96c 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -42,8 +42,6 @@
  * This file provides a log API to RTE applications.
  */
 
-#include "rte_common.h" /* for __rte_deprecated macro */
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -56,7 +54,7 @@ extern "C" {
 struct rte_logs {
        uint32_t type;  /**< Bitfield with enabled logs. */
        uint32_t level; /**< Log level. */
-       FILE *file;     /**< Pointer to current FILE* for logs. */
+       FILE *file;     /**< Output file set by rte_openlog_stream, or NULL. */
 };
 
 /** Global log informations */
@@ -81,6 +79,7 @@ extern struct rte_logs rte_logs;
 #define RTE_LOGTYPE_PIPELINE 0x00008000 /**< Log related to pipeline. */
 #define RTE_LOGTYPE_MBUF    0x00010000 /**< Log related to mbuf. */
 #define RTE_LOGTYPE_CRYPTODEV 0x00020000 /**< Log related to cryptodev. */
+#define RTE_LOGTYPE_EFD     0x00040000 /**< Log related to EFD. */
 
 /* these log types can be used in an application */
 #define RTE_LOGTYPE_USER1   0x01000000 /**< User-defined log type 1. */
@@ -102,9 +101,6 @@ extern struct rte_logs rte_logs;
 #define RTE_LOG_INFO     7U  /**< Informational.                    */
 #define RTE_LOG_DEBUG    8U  /**< Debug-level messages.             */
 
-/** The default log stream. */
-extern FILE *eal_default_log_stream;
-
 /**
  * Change the stream that will be used by the logging system.
  *
@@ -123,9 +119,8 @@ int rte_openlog_stream(FILE *f);
 /**
  * Set the global log level.
  *
- * After this call, all logs that are lower or equal than level and
- * lower or equal than the RTE_LOG_LEVEL configuration option will be
- * displayed.
+ * After this call, logs with a level lower or equal than the level
+ * passed as argument will be displayed.
  *
  * @param level
  *   Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
@@ -180,45 +175,6 @@ int rte_log_cur_msg_loglevel(void);
  */
 int rte_log_cur_msg_logtype(void);
 
-/**
- * @deprecated
- * Enable or disable the history (enabled by default)
- *
- * @param enable
- *   true to enable, or 0 to disable history.
- */
-__rte_deprecated
-void rte_log_set_history(int enable);
-
-/**
- * @deprecated
- * Dump the log history to a file
- *
- * @param f
- *   A pointer to a file for output
- */
-__rte_deprecated
-void rte_log_dump_history(FILE *f);
-
-/**
- * @deprecated
- * Add a log message to the history.
- *
- * This function can be called from a user-defined log stream. It adds
- * the given message in the history that can be dumped using
- * rte_log_dump_history().
- *
- * @param buf
- *   A data buffer containing the message to be saved in the history.
- * @param size
- *   The length of the data buffer.
- * @return
- *   - 0: Success.
- *   - (-ENOBUFS) if there is no room to store the message.
- */
-__rte_deprecated
-int rte_log_add_in_history(const char *buf, size_t size);
-
 /**
  * Generates a log message.
  *
@@ -228,9 +184,8 @@ int rte_log_add_in_history(const char *buf, size_t size);
  * The level argument determines if the log should be displayed or
  * not, depending on the global rte_logs variable.
  *
- * The preferred alternative is the RTE_LOG() function because debug logs may
- * be removed at compilation time if optimization is enabled. Moreover,
- * logs are automatically prefixed by type when using the macro.
+ * The preferred alternative is the RTE_LOG() because it adds the
+ * level and type in the logged string.
  *
  * @param level
  *   Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
@@ -261,8 +216,8 @@ int rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
  * not, depending on the global rte_logs variable. A trailing
  * newline may be added if needed.
  *
- * The preferred alternative is the RTE_LOG() because debug logs may be
- * removed at compilation time.
+ * The preferred alternative is the RTE_LOG() because it adds the
+ * level and type in the logged string.
  *
  * @param level
  *   Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
@@ -283,15 +238,8 @@ int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
 /**
  * Generates a log message.
  *
- * The RTE_LOG() is equivalent to rte_log() with two differences:
-
- * - RTE_LOG() can be used to remove debug logs at compilation time,
- *   depending on RTE_LOG_LEVEL configuration option, and compilation
- *   optimization level. If optimization is enabled, the tests
- *   involving constants only are pre-computed. If compilation is done
- *   with -O0, these tests will be done at run time.
- * - The log level and log type names are smaller, for example:
- *   RTE_LOG(INFO, EAL, "this is a %s", "log");
+ * The RTE_LOG() is a helper that prefixes the string with the log level
+ * and type, and call rte_log().
  *
  * @param l
  *   Log level. A value between EMERG (1) and DEBUG (8). The short name is
@@ -307,7 +255,31 @@ int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
  *   - Negative on error.
  */
 #define RTE_LOG(l, t, ...)                                     \
-       (void)((RTE_LOG_ ## l <= RTE_LOG_LEVEL) ?               \
+        rte_log(RTE_LOG_ ## l,                                 \
+                RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__)
+
+/**
+ * Generates a log message for data path.
+ *
+ * Similar to RTE_LOG(), except that it is removed at compilation time
+ * if the RTE_LOG_DP_LEVEL configuration option is lower than the log
+ * level argument.
+ *
+ * @param l
+ *   Log level. A value between EMERG (1) and DEBUG (8). The short name is
+ *   expanded by the macro, so it cannot be an integer value.
+ * @param t
+ *   The log type, for example, EAL. The short name is expanded by the
+ *   macro, so it cannot be an integer value.
+ * @param ...
+ *   The fmt string, as in printf(3), followed by the variable arguments
+ *   required by the format.
+ * @return
+ *   - 0: Success.
+ *   - Negative on error.
+ */
+#define RTE_LOG_DP(l, t, ...)                                  \
+       (void)((RTE_LOG_ ## l <= RTE_LOG_DP_LEVEL) ?            \
         rte_log(RTE_LOG_ ## l,                                 \
                 RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) :     \
         0)
index 74bb78c..008ce13 100644 (file)
@@ -294,7 +294,7 @@ rte_malloc_get_socket_stats(int socket,
 /**
  * Dump statistics.
  *
- * Dump for the specified type to the console. If the type argument is
+ * Dump for the specified type to a file. If the type argument is
  * NULL, all memory types will be dumped.
  *
  * @param f
index 0661109..4aa5d1f 100644 (file)
@@ -44,6 +44,8 @@
 #include <stddef.h>
 #include <stdio.h>
 
+#include <rte_config.h>
+
 #ifdef RTE_EXEC_ENV_LINUXAPP
 #include <exec-env/rte_dom0_common.h>
 #endif
@@ -54,6 +56,7 @@ extern "C" {
 
 #include <rte_common.h>
 
+__extension__
 enum rte_page_sizes {
        RTE_PGSIZE_4K    = 1ULL << 12,
        RTE_PGSIZE_64K   = 1ULL << 16,
@@ -103,13 +106,11 @@ typedef uint64_t phys_addr_t; /**< Physical address definition. */
  */
 struct rte_memseg {
        phys_addr_t phys_addr;      /**< Start physical address. */
+       RTE_STD_C11
        union {
                void *addr;         /**< Start virtual address. */
                uint64_t addr_64;   /**< Makes sure addr is always 64 bits */
        };
-#ifdef RTE_LIBRTE_IVSHMEM
-       phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
-#endif
        size_t len;               /**< Length of the segment. */
        uint64_t hugepage_sz;       /**< The pagesize of underlying memory */
        int32_t socket_id;          /**< NUMA socket ID. */
@@ -161,7 +162,7 @@ phys_addr_t rte_mem_virt2phy(const void *virt);
 const struct rte_memseg *rte_eal_get_physmem_layout(void);
 
 /**
- * Dump the physical memory layout to the console.
+ * Dump the physical memory layout to a file.
  *
  * @param f
  *   A pointer to a file for output
index f69b5a8..1d0827f 100644 (file)
@@ -53,6 +53,7 @@
 
 #include <stdio.h>
 #include <rte_memory.h>
+#include <rte_common.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -78,13 +79,11 @@ struct rte_memzone {
        char name[RTE_MEMZONE_NAMESIZE];  /**< Name of the memory zone. */
 
        phys_addr_t phys_addr;            /**< Start physical address. */
+       RTE_STD_C11
        union {
                void *addr;                   /**< Start virtual address. */
                uint64_t addr_64;             /**< Makes sure addr is always 64-bits */
        };
-#ifdef RTE_LIBRTE_IVSHMEM
-       phys_addr_t ioremap_addr;         /**< Real physical address inside the VM */
-#endif
        size_t len;                       /**< Length of the memzone. */
 
        uint64_t hugepage_sz;             /**< The page size of underlying memory */
@@ -256,12 +255,10 @@ const struct rte_memzone *rte_memzone_reserve_bounded(const char *name,
 /**
  * Free a memzone.
  *
- * Note: an IVSHMEM zone cannot be freed.
- *
  * @param mz
  *   A pointer to the memzone
  * @return
- *  -EINVAL - invalid parameter, IVSHMEM memzone.
+ *  -EINVAL - invalid parameter.
  *  0 - success
  */
 int rte_memzone_free(const struct rte_memzone *mz);
@@ -280,7 +277,7 @@ int rte_memzone_free(const struct rte_memzone *mz);
 const struct rte_memzone *rte_memzone_lookup(const char *name);
 
 /**
- * Dump all reserved memzones to the console.
+ * Dump all reserved memzones to a file.
  *
  * @param f
  *   A pointer to a file for output
index fa74962..8557e47 100644 (file)
@@ -82,7 +82,9 @@ extern "C" {
 #include <stdint.h>
 #include <inttypes.h>
 
+#include <rte_debug.h>
 #include <rte_interrupts.h>
+#include <rte_dev.h>
 
 TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */
 TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */
@@ -95,6 +97,7 @@ const char *pci_get_sysfs_path(void);
 
 /** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
 #define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
+#define PCI_PRI_STR_SIZE sizeof("XXXX:XX:XX.X")
 
 /** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */
 #define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
@@ -105,15 +108,6 @@ const char *pci_get_sysfs_path(void);
 /** Nb. of values in PCI resource format. */
 #define PCI_RESOURCE_FMT_NVAL 3
 
-/**
- * A structure describing a PCI resource.
- */
-struct rte_pci_resource {
-       uint64_t phys_addr;   /**< Physical address, 0 if no resource. */
-       uint64_t len;         /**< Length of the resource. */
-       void *addr;           /**< Virtual address, NULL when not mapped. */
-};
-
 /** Maximum number of PCI resources. */
 #define PCI_MAX_RESOURCE 6
 
@@ -155,17 +149,23 @@ enum rte_kernel_driver {
  */
 struct rte_pci_device {
        TAILQ_ENTRY(rte_pci_device) next;       /**< Next probed PCI device. */
+       struct rte_device device;               /**< Inherit core device */
        struct rte_pci_addr addr;               /**< PCI location. */
        struct rte_pci_id id;                   /**< PCI ID. */
-       struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE];   /**< PCI Memory Resource */
+       struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE];
+                                               /**< PCI Memory Resource */
        struct rte_intr_handle intr_handle;     /**< Interrupt handle */
        struct rte_pci_driver *driver;          /**< Associated driver */
        uint16_t max_vfs;                       /**< sriov enable if not zero */
-       int numa_node;                          /**< NUMA node connection */
-       struct rte_devargs *devargs;            /**< Device user arguments */
        enum rte_kernel_driver kdrv;            /**< Kernel driver passthrough */
 };
 
+/**
+ * @internal
+ * Helper macro for drivers that need to convert to struct rte_pci_device.
+ */
+#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device)
+
 /** Any PCI device identifier (vendor, device, ...) */
 #define PCI_ANY_ID (0xffff)
 #define RTE_CLASS_ANY_ID (0xffffff)
@@ -193,33 +193,29 @@ struct rte_pci_driver;
 /**
  * Initialisation function for the driver called during PCI probing.
  */
-typedef int (pci_devinit_t)(struct rte_pci_driver *, struct rte_pci_device *);
+typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *);
 
 /**
  * Uninitialisation function for the driver called during hotplugging.
  */
-typedef int (pci_devuninit_t)(struct rte_pci_device *);
+typedef int (pci_remove_t)(struct rte_pci_device *);
 
 /**
  * A structure describing a PCI driver.
  */
 struct rte_pci_driver {
        TAILQ_ENTRY(rte_pci_driver) next;       /**< Next in list. */
-       const char *name;                       /**< Driver name. */
-       pci_devinit_t *devinit;                 /**< Device init. function. */
-       pci_devuninit_t *devuninit;             /**< Device uninit function. */
+       struct rte_driver driver;               /**< Inherit core driver. */
+       pci_probe_t *probe;                     /**< Device Probe function. */
+       pci_remove_t *remove;                   /**< Device Remove function. */
        const struct rte_pci_id *id_table;      /**< ID table, NULL terminated. */
        uint32_t drv_flags;                     /**< Flags contolling handling of device. */
 };
 
 /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
 #define RTE_PCI_DRV_NEED_MAPPING 0x0001
-/** Device needs to be unbound even if no module is provided */
-#define RTE_PCI_DRV_FORCE_UNBIND 0x0004
 /** Device driver supports link state interrupt */
 #define RTE_PCI_DRV_INTR_LSC   0x0008
-/** Device driver supports detaching capability */
-#define RTE_PCI_DRV_DETACHABLE 0x0010
 
 /**
  * A structure describing a PCI mapping.
@@ -308,6 +304,28 @@ eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
 }
 #undef GET_PCIADDR_FIELD
 
+/**
+ * Utility function to write a pci device name, this device name can later be
+ * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_*
+ * BDF helpers.
+ *
+ * @param addr
+ *     The PCI Bus-Device-Function address
+ * @param output
+ *     The output buffer string
+ * @param size
+ *     The output buffer size
+ */
+static inline void
+rte_eal_pci_device_name(const struct rte_pci_addr *addr,
+                   char *output, size_t size)
+{
+       RTE_VERIFY(size >= PCI_PRI_STR_SIZE);
+       RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT,
+                           addr->domain, addr->bus,
+                           addr->devid, addr->function) >= 0);
+}
+
 /* Compare two PCI device addresses. */
 /**
  * Utility function to compare two PCI device addresses.
@@ -442,7 +460,7 @@ int rte_eal_pci_probe_one(const struct rte_pci_addr *addr);
  * Close the single PCI device.
  *
  * Scan the content of the PCI bus, and find the pci device specified by pci
- * address, then call the devuninit() function for registered driver that has a
+ * address, then call the remove() function for registered driver that has a
  * matching entry in its id_table for discovered device.
  *
  * @param addr
@@ -470,6 +488,16 @@ void rte_eal_pci_dump(FILE *f);
  */
 void rte_eal_pci_register(struct rte_pci_driver *driver);
 
+/** Helper for PCI device registration from driver (eth, crypto) instance */
+#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \
+RTE_INIT(pciinitfn_ ##nm); \
+static void pciinitfn_ ##nm(void) \
+{\
+       (pci_drv).driver.name = RTE_STR(nm);\
+       rte_eal_pci_register(&pci_drv); \
+} \
+RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
+
 /**
  * Unregister a PCI driver.
  *
diff --git a/src/dpdk/lib/librte_eal/common/include/rte_pci_dev_ids.h b/src/dpdk/lib/librte_eal/common/include/rte_pci_dev_ids.h
deleted file mode 100644 (file)
index 6ec8ae8..0000000
+++ /dev/null
@@ -1,326 +0,0 @@
-/*-
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- *   redistributing this file, you may do so under either license.
- *
- *   GPL LICENSE SUMMARY
- *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *   General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *   The full GNU General Public License is included in this distribution
- *   in the file called LICENSE.GPL.
- *
- *   Contact Information:
- *   Intel Corporation
- *
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef RTE_PCI_DEV_ID_DECL_IGB
-#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev)
-#endif
-
-#ifndef RTE_PCI_DEV_ID_DECL_IGBVF
-#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev)
-#endif
-
-#ifndef RTE_PCI_DEV_ID_DECL_IXGBE
-#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev)
-#endif
-
-#ifndef RTE_PCI_DEV_ID_DECL_IXGBEVF
-#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev)
-#endif
-
-#ifndef PCI_VENDOR_ID_INTEL
-/** Vendor ID used by Intel devices */
-#define PCI_VENDOR_ID_INTEL 0x8086
-#endif
-
-/******************** Physical IGB devices from e1000_hw.h ********************/
-
-#define E1000_DEV_ID_82576                      0x10C9
-#define E1000_DEV_ID_82576_FIBER                0x10E6
-#define E1000_DEV_ID_82576_SERDES               0x10E7
-#define E1000_DEV_ID_82576_QUAD_COPPER          0x10E8
-#define E1000_DEV_ID_82576_QUAD_COPPER_ET2      0x1526
-#define E1000_DEV_ID_82576_NS                   0x150A
-#define E1000_DEV_ID_82576_NS_SERDES            0x1518
-#define E1000_DEV_ID_82576_SERDES_QUAD          0x150D
-#define E1000_DEV_ID_82575EB_COPPER             0x10A7
-#define E1000_DEV_ID_82575EB_FIBER_SERDES       0x10A9
-#define E1000_DEV_ID_82575GB_QUAD_COPPER        0x10D6
-#define E1000_DEV_ID_82580_COPPER               0x150E
-#define E1000_DEV_ID_82580_FIBER                0x150F
-#define E1000_DEV_ID_82580_SERDES               0x1510
-#define E1000_DEV_ID_82580_SGMII                0x1511
-#define E1000_DEV_ID_82580_COPPER_DUAL          0x1516
-#define E1000_DEV_ID_82580_QUAD_FIBER           0x1527
-#define E1000_DEV_ID_I350_COPPER                0x1521
-#define E1000_DEV_ID_I350_FIBER                 0x1522
-#define E1000_DEV_ID_I350_SERDES                0x1523
-#define E1000_DEV_ID_I350_SGMII                 0x1524
-#define E1000_DEV_ID_I350_DA4                   0x1546
-#define E1000_DEV_ID_I210_COPPER                0x1533
-#define E1000_DEV_ID_I210_COPPER_OEM1           0x1534
-#define E1000_DEV_ID_I210_COPPER_IT             0x1535
-#define E1000_DEV_ID_I210_FIBER                 0x1536
-#define E1000_DEV_ID_I210_SERDES                0x1537
-#define E1000_DEV_ID_I210_SGMII                 0x1538
-#define E1000_DEV_ID_I210_COPPER_FLASHLESS      0x157B
-#define E1000_DEV_ID_I210_SERDES_FLASHLESS      0x157C
-#define E1000_DEV_ID_I211_COPPER                0x1539
-#define E1000_DEV_ID_I354_BACKPLANE_1GBPS       0x1F40
-#define E1000_DEV_ID_I354_SGMII                 0x1F41
-#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS     0x1F45
-#define E1000_DEV_ID_DH89XXCC_SGMII             0x0438
-#define E1000_DEV_ID_DH89XXCC_SERDES            0x043A
-#define E1000_DEV_ID_DH89XXCC_BACKPLANE         0x043C
-#define E1000_DEV_ID_DH89XXCC_SFP               0x0440
-
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_FIBER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES_QUAD)
-
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER)
-
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_FIBER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER_DUAL)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_QUAD_FIBER)
-
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_FIBER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_DA4)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_OEM1)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_IT)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_FIBER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I211_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP)
-
-/****************** Physical IXGBE devices from ixgbe_type.h ******************/
-
-#define IXGBE_DEV_ID_82598                      0x10B6
-#define IXGBE_DEV_ID_82598_BX                   0x1508
-#define IXGBE_DEV_ID_82598AF_DUAL_PORT          0x10C6
-#define IXGBE_DEV_ID_82598AF_SINGLE_PORT        0x10C7
-#define IXGBE_DEV_ID_82598AT                    0x10C8
-#define IXGBE_DEV_ID_82598AT2                   0x150B
-#define IXGBE_DEV_ID_82598EB_SFP_LOM            0x10DB
-#define IXGBE_DEV_ID_82598EB_CX4                0x10DD
-#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT        0x10EC
-#define IXGBE_DEV_ID_82598_DA_DUAL_PORT         0x10F1
-#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM      0x10E1
-#define IXGBE_DEV_ID_82598EB_XF_LR              0x10F4
-#define IXGBE_DEV_ID_82599_KX4                  0x10F7
-#define IXGBE_DEV_ID_82599_KX4_MEZZ             0x1514
-#define IXGBE_DEV_ID_82599_KR                   0x1517
-#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE      0x10F8
-#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ       0x000C
-#define IXGBE_DEV_ID_82599_CX4                  0x10F9
-#define IXGBE_DEV_ID_82599_SFP                  0x10FB
-#define IXGBE_SUBDEV_ID_82599_SFP               0x11A9
-#define IXGBE_SUBDEV_ID_82599_RNDC              0x1F72
-#define IXGBE_SUBDEV_ID_82599_560FLR            0x17D0
-#define IXGBE_SUBDEV_ID_82599_ECNA_DP           0x0470
-#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE       0x152A
-#define IXGBE_DEV_ID_82599_SFP_FCOE             0x1529
-#define IXGBE_DEV_ID_82599_SFP_EM               0x1507
-#define IXGBE_DEV_ID_82599_SFP_SF2              0x154D
-#define IXGBE_DEV_ID_82599_SFP_SF_QP            0x154A
-#define IXGBE_DEV_ID_82599_QSFP_SF_QP           0x1558
-#define IXGBE_DEV_ID_82599EN_SFP                0x1557
-#define IXGBE_DEV_ID_82599_XAUI_LOM             0x10FC
-#define IXGBE_DEV_ID_82599_T3_LOM               0x151C
-#define IXGBE_DEV_ID_82599_LS                   0x154F
-#define IXGBE_DEV_ID_X540T                      0x1528
-#define IXGBE_DEV_ID_X540T1                     0x1560
-#define IXGBE_DEV_ID_X550EM_X_SFP               0x15AC
-#define IXGBE_DEV_ID_X550EM_X_10G_T             0x15AD
-#define IXGBE_DEV_ID_X550EM_X_1G_T              0x15AE
-#define IXGBE_DEV_ID_X550T                      0x1563
-#define IXGBE_DEV_ID_X550T1                     0x15D1
-#define IXGBE_DEV_ID_X550EM_A_KR                0x15C2
-#define IXGBE_DEV_ID_X550EM_A_KR_L              0x15C3
-#define IXGBE_DEV_ID_X550EM_A_SFP_N             0x15C4
-#define IXGBE_DEV_ID_X550EM_A_SGMII             0x15C6
-#define IXGBE_DEV_ID_X550EM_A_SGMII_L           0x15C7
-#define IXGBE_DEV_ID_X550EM_A_10G_T             0x15C8
-#define IXGBE_DEV_ID_X550EM_A_QSFP              0x15CA
-#define IXGBE_DEV_ID_X550EM_A_QSFP_N            0x15CC
-#define IXGBE_DEV_ID_X550EM_A_SFP               0x15CE
-#define IXGBE_DEV_ID_X550EM_A_1G_T              0x15E4
-#define IXGBE_DEV_ID_X550EM_A_1G_T_L            0x15E5
-#define IXGBE_DEV_ID_X550EM_X_KX4               0x15AA
-#define IXGBE_DEV_ID_X550EM_X_KR                0x15AB
-
-#ifdef RTE_NIC_BYPASS
-#define IXGBE_DEV_ID_82599_BYPASS               0x155D
-#endif
-
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_BX)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
-       IXGBE_DEV_ID_82598AF_SINGLE_PORT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT2)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_CX4)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
-       IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_XF_LR)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KR)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
-       IXGBE_DEV_ID_82599_COMBO_BACKPLANE)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
-       IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_CX4)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_RNDC)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_560FLR)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_ECNA_DP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_FCOE)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_EM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF2)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599EN_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_XAUI_LOM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_T3_LOM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_LS)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T1)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_10G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_1G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T1)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR_L)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_10G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP_N)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR)
-
-#ifdef RTE_NIC_BYPASS
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS)
-#endif
-
-/****************** Virtual IGB devices from e1000_hw.h ******************/
-
-#define E1000_DEV_ID_82576_VF                   0x10CA
-#define E1000_DEV_ID_82576_VF_HV                0x152D
-#define E1000_DEV_ID_I350_VF                    0x1520
-#define E1000_DEV_ID_I350_VF_HV                 0x152F
-
-RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF)
-RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF_HV)
-RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF)
-RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF_HV)
-
-/****************** Virtual IXGBE devices from ixgbe_type.h ******************/
-
-#define IXGBE_DEV_ID_82599_VF                   0x10ED
-#define IXGBE_DEV_ID_82599_VF_HV                0x152E
-#define IXGBE_DEV_ID_X540_VF                    0x1515
-#define IXGBE_DEV_ID_X540_VF_HV                 0x1530
-#define IXGBE_DEV_ID_X550_VF_HV                 0x1564
-#define IXGBE_DEV_ID_X550_VF                    0x1565
-#define IXGBE_DEV_ID_X550EM_A_VF                0x15C5
-#define IXGBE_DEV_ID_X550EM_A_VF_HV             0x15B4
-#define IXGBE_DEV_ID_X550EM_X_VF                0x15A8
-#define IXGBE_DEV_ID_X550EM_X_VF_HV             0x15A9
-
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF_HV)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF_HV)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF_HV)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV)
-
-/*
- * Undef all RTE_PCI_DEV_ID_DECL_* here.
- */
-#undef RTE_PCI_DEV_ID_DECL_IGB
-#undef RTE_PCI_DEV_ID_DECL_IGBVF
-#undef RTE_PCI_DEV_ID_DECL_IXGBE
-#undef RTE_PCI_DEV_ID_DECL_IXGBEVF
index cc3c0f1..3aae098 100644 (file)
@@ -107,7 +107,7 @@ struct rte_tailq_elem {
        RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name)
 
 /**
- * Dump tail queues to the console.
+ * Dump tail queues to a file.
  *
  * @param f
  *   A pointer to a file for output
@@ -148,8 +148,8 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name);
 int rte_eal_tailq_register(struct rte_tailq_elem *t);
 
 #define EAL_REGISTER_TAILQ(t) \
-void tailqinitfn_ ##t(void); \
-void __attribute__((constructor, used)) tailqinitfn_ ##t(void) \
+RTE_INIT(tailqinitfn_ ##t); \
+static void tailqinitfn_ ##t(void) \
 { \
        if (rte_eal_tailq_register(&t) < 0) \
                rte_panic("Cannot initialize tailq: %s\n", t.name); \
index 4b13b9c..28c6274 100644 (file)
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifndef _RTE_TIME_H_
+#define _RTE_TIME_H_
+
+#include <stdint.h>
+#include <time.h>
+
 #define NSEC_PER_SEC             1000000000L
 
 /**
@@ -120,3 +126,5 @@ rte_ns_to_timespec(uint64_t nsec)
 
        return ts;
 }
+
+#endif /* _RTE_TIME_H_ */
diff --git a/src/dpdk/lib/librte_eal/common/include/rte_vdev.h b/src/dpdk/lib/librte_eal/common/include/rte_vdev.h
new file mode 100644 (file)
index 0000000..784e837
--- /dev/null
@@ -0,0 +1,102 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_VDEV_H
+#define RTE_VDEV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/queue.h>
+#include <rte_dev.h>
+
+/** Double linked list of virtual device drivers. */
+TAILQ_HEAD(vdev_driver_list, rte_vdev_driver);
+
+/**
+ * Probe function called for each virtual device driver once.
+ */
+typedef int (rte_vdev_probe_t)(const char *name, const char *args);
+
+/**
+ * Remove function called for each virtual device driver once.
+ */
+typedef int (rte_vdev_remove_t)(const char *name);
+
+/**
+ * A virtual device driver abstraction.
+ */
+struct rte_vdev_driver {
+       TAILQ_ENTRY(rte_vdev_driver) next; /**< Next in list. */
+       struct rte_driver driver;      /**< Inherited general driver. */
+       rte_vdev_probe_t *probe;       /**< Virtual device probe function. */
+       rte_vdev_remove_t *remove;     /**< Virtual device remove function. */
+};
+
+/**
+ * Register a virtual device driver.
+ *
+ * @param driver
+ *   A pointer to a rte_vdev_driver structure describing the driver
+ *   to be registered.
+ */
+void rte_eal_vdrv_register(struct rte_vdev_driver *driver);
+
+/**
+ * Unregister a virtual device driver.
+ *
+ * @param driver
+ *   A pointer to a rte_vdev_driver structure describing the driver
+ *   to be unregistered.
+ */
+void rte_eal_vdrv_unregister(struct rte_vdev_driver *driver);
+
+#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\
+RTE_INIT(vdrvinitfn_ ##vdrv);\
+static const char *vdrvinit_ ## nm ## _alias;\
+static void vdrvinitfn_ ##vdrv(void)\
+{\
+       (vdrv).driver.name = RTE_STR(nm);\
+       (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\
+       rte_eal_vdrv_register(&vdrv);\
+} \
+RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
+
+#define RTE_PMD_REGISTER_ALIAS(nm, alias)\
+static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index 615deb7..76bfe60 100644 (file)
@@ -45,6 +45,7 @@ extern "C" {
 
 #include <stdint.h>
 #include <string.h>
+#include <stdio.h>
 #include <rte_common.h>
 
 /**
@@ -55,12 +56,12 @@ extern "C" {
 /**
  * Major version/year number i.e. the yy in yy.mm.z
  */
-#define RTE_VER_YEAR 16
+#define RTE_VER_YEAR 17
 
 /**
  * Minor version/month number i.e. the mm in yy.mm.z
  */
-#define RTE_VER_MONTH 7
+#define RTE_VER_MONTH 2
 
 /**
  * Patch level number i.e. the z in yy.mm.z
@@ -70,14 +71,14 @@ extern "C" {
 /**
  * Extra string to be appended to version number
  */
-#define RTE_VER_SUFFIX ""
+#define RTE_VER_SUFFIX "-rc"
 
 /**
  * Patch release number
  *   0-15 = release candidates
  *   16   = release
  */
-#define RTE_VER_RELEASE 16
+#define RTE_VER_RELEASE 2
 
 /**
  * Macro to compute a version number usable for comparisons
index 763fa32..267a4c6 100644 (file)
@@ -221,14 +221,6 @@ rte_eal_malloc_heap_init(void)
        for (ms = &mcfg->memseg[0], ms_cnt = 0;
                        (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0);
                        ms_cnt++, ms++) {
-#ifdef RTE_LIBRTE_IVSHMEM
-               /*
-                * if segment has ioremap address set, it's an IVSHMEM segment and
-                * it is not memory to allocate from.
-                */
-               if (ms->ioremap_addr != 0)
-                       continue;
-#endif
                malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms);
        }
 
index 3fb2188..bf6b818 100644 (file)
@@ -69,7 +69,9 @@
 #include <rte_string_fns.h>
 #include <rte_cpuflags.h>
 #include <rte_interrupts.h>
+#include <rte_bus.h>
 #include <rte_pci.h>
+#include <rte_dev.h>
 #include <rte_devargs.h>
 #include <rte_common.h>
 #include <rte_version.h>
@@ -238,7 +240,8 @@ rte_eal_config_attach(void)
        mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
                        PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
        if (mem_config == MAP_FAILED)
-               rte_panic("Cannot mmap memory for rte_config\n");
+               rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
+                         errno, strerror(errno));
 
        rte_config.mem_config = mem_config;
 }
@@ -263,9 +266,17 @@ rte_eal_config_reattach(void)
        mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
                        sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
                        mem_cfg_fd, 0);
+       if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
+               if (mem_config != MAP_FAILED)
+                       /* errno is stale, don't use */
+                       rte_panic("Cannot mmap memory for rte_config at [%p], got [%p]"
+                                 " - please use '--base-virtaddr' option\n",
+                                 rte_mem_cfg_addr, mem_config);
+               else
+                       rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
+                                 errno, strerror(errno));
+       }
        close(mem_cfg_fd);
-       if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr)
-               rte_panic("Cannot mmap memory for rte_config\n");
 
        rte_config.mem_config = mem_config;
 }
@@ -740,6 +751,9 @@ rte_eal_init(int argc, char **argv)
        char cpuset[RTE_CPU_AFFINITY_STR_LEN];
        char thread_name[RTE_MAX_THREAD_NAME_LEN];
 
+       /* checks if the machine is adequate */
+       rte_cpu_check_supported();
+
        if (!rte_atomic32_test_and_set(&run_once))
                return -1;
 
@@ -748,9 +762,6 @@ rte_eal_init(int argc, char **argv)
 
        thread_id = pthread_self();
 
-       if (rte_eal_log_early_init() < 0)
-               rte_panic("Cannot init early logs\n");
-
        eal_log_level_parse(argc, argv);
 
        /* set log level as early as possible */
@@ -789,6 +800,9 @@ rte_eal_init(int argc, char **argv)
 
        rte_config_init();
 
+       if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
+               rte_panic("Cannot init logs\n");
+
        if (rte_eal_pci_init() < 0)
                rte_panic("Cannot init PCI\n");
 
@@ -797,11 +811,6 @@ rte_eal_init(int argc, char **argv)
                rte_panic("Cannot init VFIO\n");
 #endif
 
-#ifdef RTE_LIBRTE_IVSHMEM
-       if (rte_eal_ivshmem_init() < 0)
-               rte_panic("Cannot init IVSHMEM\n");
-#endif
-
        if (rte_eal_memory_init() < 0)
                rte_panic("Cannot init memory\n");
 
@@ -814,14 +823,6 @@ rte_eal_init(int argc, char **argv)
        if (rte_eal_tailqs_init() < 0)
                rte_panic("Cannot init tail queues for objects\n");
 
-#ifdef RTE_LIBRTE_IVSHMEM
-       if (rte_eal_ivshmem_obj_init() < 0)
-               rte_panic("Cannot init IVSHMEM objects\n");
-#endif
-
-       if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
-               rte_panic("Cannot init logs\n");
-
        if (rte_eal_alarm_init() < 0)
                rte_panic("Cannot init interrupt-handling thread\n");
 
@@ -841,12 +842,12 @@ rte_eal_init(int argc, char **argv)
                rte_config.master_lcore, (int)thread_id, cpuset,
                ret == 0 ? "" : "...");
 
-       if (rte_eal_dev_init() < 0)
-               rte_panic("Cannot init pmd devices\n");
-
        if (rte_eal_intr_init() < 0)
                rte_panic("Cannot init interrupt-handling thread\n");
 
+       if (rte_bus_scan())
+               rte_panic("Cannot scan the buses for devices\n");
+
        RTE_LCORE_FOREACH_SLAVE(i) {
 
                /*
@@ -883,10 +884,17 @@ rte_eal_init(int argc, char **argv)
        rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
        rte_eal_mp_wait_lcore();
 
+       /* Probe all the buses and devices/drivers on them */
+       if (rte_bus_probe())
+               rte_panic("Cannot probe devices\n");
+
        /* Probe & Initialize PCI devices */
        if (rte_eal_pci_probe())
                rte_panic("Cannot probe PCI\n");
 
+       if (rte_eal_dev_init() < 0)
+               rte_panic("Cannot init pmd devices\n");
+
        rte_eal_mcfg_complete();
 
        return fctret;
index 54ab625..b5b3f2b 100644 (file)
@@ -73,9 +73,6 @@
 
 static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */
 
-// TREX_PATCH
-int eal_err_read_from_file_is_error = 1;
-
 /**
  * union for pipe fds.
  */
@@ -139,7 +136,7 @@ static pthread_t intr_thread;
 
 /* enable legacy (INTx) interrupts */
 static int
-vfio_enable_intx(struct rte_intr_handle *intr_handle) {
+vfio_enable_intx(const struct rte_intr_handle *intr_handle) {
        struct vfio_irq_set *irq_set;
        char irq_set_buf[IRQ_SET_BUF_LEN];
        int len, ret;
@@ -186,7 +183,7 @@ vfio_enable_intx(struct rte_intr_handle *intr_handle) {
 
 /* disable legacy (INTx) interrupts */
 static int
-vfio_disable_intx(struct rte_intr_handle *intr_handle) {
+vfio_disable_intx(const struct rte_intr_handle *intr_handle) {
        struct vfio_irq_set *irq_set;
        char irq_set_buf[IRQ_SET_BUF_LEN];
        int len, ret;
@@ -229,7 +226,7 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
 
 /* enable MSI interrupts */
 static int
-vfio_enable_msi(struct rte_intr_handle *intr_handle) {
+vfio_enable_msi(const struct rte_intr_handle *intr_handle) {
        int len, ret;
        char irq_set_buf[IRQ_SET_BUF_LEN];
        struct vfio_irq_set *irq_set;
@@ -258,7 +255,7 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
 
 /* disable MSI interrupts */
 static int
-vfio_disable_msi(struct rte_intr_handle *intr_handle) {
+vfio_disable_msi(const struct rte_intr_handle *intr_handle) {
        struct vfio_irq_set *irq_set;
        char irq_set_buf[IRQ_SET_BUF_LEN];
        int len, ret;
@@ -281,9 +278,30 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) {
        return ret;
 }
 
+static int
+get_max_intr(const struct rte_intr_handle *intr_handle)
+{
+       struct rte_intr_source *src;
+
+       TAILQ_FOREACH(src, &intr_sources, next) {
+               if (src->intr_handle.fd != intr_handle->fd)
+                       continue;
+
+               if (!src->intr_handle.max_intr)
+                       src->intr_handle.max_intr = 1;
+               else if (src->intr_handle.max_intr > RTE_MAX_RXTX_INTR_VEC_ID)
+                       src->intr_handle.max_intr
+                               = RTE_MAX_RXTX_INTR_VEC_ID + 1;
+
+               return src->intr_handle.max_intr;
+       }
+
+       return -1;
+}
+
 /* enable MSI-X interrupts */
 static int
-vfio_enable_msix(struct rte_intr_handle *intr_handle) {
+vfio_enable_msix(const struct rte_intr_handle *intr_handle) {
        int len, ret;
        char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
        struct vfio_irq_set *irq_set;
@@ -293,12 +311,15 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
 
        irq_set = (struct vfio_irq_set *) irq_set_buf;
        irq_set->argsz = len;
-       if (!intr_handle->max_intr)
-               intr_handle->max_intr = 1;
-       else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID)
-               intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1;
 
-       irq_set->count = intr_handle->max_intr;
+       ret = get_max_intr(intr_handle);
+       if (ret < 0) {
+               RTE_LOG(ERR, EAL, "Invalid number of MSI-X irqs for fd %d\n",
+                       intr_handle->fd);
+               return -1;
+       }
+
+       irq_set->count = ret;
        irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
        irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
        irq_set->start = 0;
@@ -321,7 +342,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
 
 /* disable MSI-X interrupts */
 static int
-vfio_disable_msix(struct rte_intr_handle *intr_handle) {
+vfio_disable_msix(const struct rte_intr_handle *intr_handle) {
        struct vfio_irq_set *irq_set;
        char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
        int len, ret;
@@ -346,7 +367,7 @@ vfio_disable_msix(struct rte_intr_handle *intr_handle) {
 #endif
 
 static int
-uio_intx_intr_disable(struct rte_intr_handle *intr_handle)
+uio_intx_intr_disable(const struct rte_intr_handle *intr_handle)
 {
        unsigned char command_high;
 
@@ -370,7 +391,7 @@ uio_intx_intr_disable(struct rte_intr_handle *intr_handle)
 }
 
 static int
-uio_intx_intr_enable(struct rte_intr_handle *intr_handle)
+uio_intx_intr_enable(const struct rte_intr_handle *intr_handle)
 {
        unsigned char command_high;
 
@@ -394,7 +415,7 @@ uio_intx_intr_enable(struct rte_intr_handle *intr_handle)
 }
 
 static int
-uio_intr_disable(struct rte_intr_handle *intr_handle)
+uio_intr_disable(const struct rte_intr_handle *intr_handle)
 {
        const int value = 0;
 
@@ -408,7 +429,7 @@ uio_intr_disable(struct rte_intr_handle *intr_handle)
 }
 
 static int
-uio_intr_enable(struct rte_intr_handle *intr_handle)
+uio_intr_enable(const struct rte_intr_handle *intr_handle)
 {
        const int value = 1;
 
@@ -422,7 +443,7 @@ uio_intr_enable(struct rte_intr_handle *intr_handle)
 }
 
 int
-rte_intr_callback_register(struct rte_intr_handle *intr_handle,
+rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
                        rte_intr_callback_fn cb, void *cb_arg)
 {
        int ret, wake_thread;
@@ -494,7 +515,7 @@ rte_intr_callback_register(struct rte_intr_handle *intr_handle,
 }
 
 int
-rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
+rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
                        rte_intr_callback_fn cb_fn, void *cb_arg)
 {
        int ret;
@@ -558,7 +579,7 @@ rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
 }
 
 int
-rte_intr_enable(struct rte_intr_handle *intr_handle)
+rte_intr_enable(const struct rte_intr_handle *intr_handle)
 {
        if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
                return -1;
@@ -602,7 +623,7 @@ rte_intr_enable(struct rte_intr_handle *intr_handle)
 }
 
 int
-rte_intr_disable(struct rte_intr_handle *intr_handle)
+rte_intr_disable(const struct rte_intr_handle *intr_handle)
 {
        if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
                return -1;
@@ -712,19 +733,10 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds)
                                if (errno == EINTR || errno == EWOULDBLOCK)
                                        continue;
 
-                // TREX_PATCH. Because of issues with e1000, we want this message to
-                // have lower priority only if running on e1000 card
-                if (eal_err_read_from_file_is_error) {
-                    RTE_LOG(ERR, EAL, "Error reading from file "
-                            "descriptor %d: %s\n",
-                            events[n].data.fd,
-                            strerror(errno));
-                } else {
-                    RTE_LOG(INFO, EAL, "Error reading from file "
-                            "descriptor %d: %s\n",
-                            events[n].data.fd,
-                            strerror(errno));
-                }
+                               RTE_LOG(ERR, EAL, "Error reading from file "
+                                       "descriptor %d: %s\n",
+                                       events[n].data.fd,
+                                       strerror(errno));
                        } else if (bytes_read == 0)
                                RTE_LOG(ERR, EAL, "Read nothing from file "
                                        "descriptor %d\n", events[n].data.fd);
@@ -1169,7 +1181,7 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
                                RTE_LOG(ERR, EAL,
                                        "can't setup eventfd, error %i (%s)\n",
                                        errno, strerror(errno));
-                               return -1;
+                               return -errno;
                        }
                        intr_handle->efds[i] = fd;
                }
diff --git a/src/dpdk/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/src/dpdk/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
deleted file mode 100644 (file)
index 67b3caf..0000000
+++ /dev/null
@@ -1,954 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */
-
-#include <stdint.h>
-#include <unistd.h>
-#include <inttypes.h>
-#include <sys/mman.h>
-#include <sys/file.h>
-#include <string.h>
-#include <sys/queue.h>
-
-#include <rte_log.h>
-#include <rte_pci.h>
-#include <rte_memory.h>
-#include <rte_eal.h>
-#include <rte_eal_memconfig.h>
-#include <rte_string_fns.h>
-#include <rte_errno.h>
-#include <rte_ring.h>
-#include <rte_malloc.h>
-#include <rte_common.h>
-#include <rte_ivshmem.h>
-
-#include "eal_internal_cfg.h"
-#include "eal_private.h"
-
-#define PCI_VENDOR_ID_IVSHMEM 0x1Af4
-#define PCI_DEVICE_ID_IVSHMEM 0x1110
-
-#define IVSHMEM_MAGIC 0x0BADC0DE
-
-#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2"
-#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config"
-
-#define PHYS 0x1
-#define VIRT 0x2
-#define IOREMAP 0x4
-#define FULL (PHYS|VIRT|IOREMAP)
-
-#define METADATA_SIZE_ALIGNED \
-       (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
-
-#define CONTAINS(x,y)\
-       (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len))
-
-#define DIM(x) (sizeof(x)/sizeof(x[0]))
-
-struct ivshmem_pci_device {
-       char path[PATH_MAX];
-       phys_addr_t ioremap_addr;
-};
-
-/* data type to store in config */
-struct ivshmem_segment {
-       struct rte_ivshmem_metadata_entry entry;
-       uint64_t align;
-       char path[PATH_MAX];
-};
-struct ivshmem_shared_config {
-       struct ivshmem_segment segment[RTE_MAX_MEMSEG];
-       uint32_t segment_idx;
-       struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS];
-       uint32_t pci_devs_idx;
-};
-static struct ivshmem_shared_config * ivshmem_config;
-static int memseg_idx;
-static int pagesz;
-
-/* Tailq heads to add rings to */
-TAILQ_HEAD(rte_ring_list, rte_tailq_entry);
-
-/*
- * Utility functions
- */
-
-static int
-is_ivshmem_device(struct rte_pci_device * dev)
-{
-       return dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM
-                       && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM;
-}
-
-static void *
-map_metadata(int fd, uint64_t len)
-{
-       size_t metadata_len = sizeof(struct rte_ivshmem_metadata);
-       size_t aligned_len = METADATA_SIZE_ALIGNED;
-
-       return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE,
-                       MAP_SHARED, fd, len - aligned_len);
-}
-
-static void
-unmap_metadata(void * ptr)
-{
-       munmap(ptr, sizeof(struct rte_ivshmem_metadata));
-}
-
-static int
-has_ivshmem_metadata(int fd, uint64_t len)
-{
-       struct rte_ivshmem_metadata metadata;
-       void * ptr;
-
-       ptr = map_metadata(fd, len);
-
-       if (ptr == MAP_FAILED)
-               return -1;
-
-       metadata = *(struct rte_ivshmem_metadata*) (ptr);
-
-       unmap_metadata(ptr);
-
-       return metadata.magic_number == IVSHMEM_MAGIC;
-}
-
-static void
-remove_segment(struct ivshmem_segment * ms, int len, int idx)
-{
-       int i;
-
-       for (i = idx; i < len - 1; i++)
-               memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment));
-       memset(&ms[len-1], 0, sizeof(struct ivshmem_segment));
-}
-
-static int
-overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
-{
-       uint64_t start1, end1, start2, end2;
-       uint64_t p_start1, p_end1, p_start2, p_end2;
-       uint64_t i_start1, i_end1, i_start2, i_end2;
-       int result = 0;
-
-       /* gather virtual addresses */
-       start1 = mz1->addr_64;
-       end1 = mz1->addr_64 + mz1->len;
-       start2 = mz2->addr_64;
-       end2 = mz2->addr_64 + mz2->len;
-
-       /* gather physical addresses */
-       p_start1 = mz1->phys_addr;
-       p_end1 = mz1->phys_addr + mz1->len;
-       p_start2 = mz2->phys_addr;
-       p_end2 = mz2->phys_addr + mz2->len;
-
-       /* gather ioremap addresses */
-       i_start1 = mz1->ioremap_addr;
-       i_end1 = mz1->ioremap_addr + mz1->len;
-       i_start2 = mz2->ioremap_addr;
-       i_end2 = mz2->ioremap_addr + mz2->len;
-
-       /* check for overlap in virtual addresses */
-       if (start1 >= start2 && start1 < end2)
-               result |= VIRT;
-       if (start2 >= start1 && start2 < end1)
-               result |= VIRT;
-
-       /* check for overlap in physical addresses */
-       if (p_start1 >= p_start2 && p_start1 < p_end2)
-               result |= PHYS;
-       if (p_start2 >= p_start1 && p_start2 < p_end1)
-               result |= PHYS;
-
-       /* check for overlap in ioremap addresses */
-       if (i_start1 >= i_start2 && i_start1 < i_end2)
-               result |= IOREMAP;
-       if (i_start2 >= i_start1 && i_start2 < i_end1)
-               result |= IOREMAP;
-
-       return result;
-}
-
-static int
-adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
-{
-       uint64_t start1, end1, start2, end2;
-       uint64_t p_start1, p_end1, p_start2, p_end2;
-       uint64_t i_start1, i_end1, i_start2, i_end2;
-       int result = 0;
-
-       /* gather virtual addresses */
-       start1 = mz1->addr_64;
-       end1 = mz1->addr_64 + mz1->len;
-       start2 = mz2->addr_64;
-       end2 = mz2->addr_64 + mz2->len;
-
-       /* gather physical addresses */
-       p_start1 = mz1->phys_addr;
-       p_end1 = mz1->phys_addr + mz1->len;
-       p_start2 = mz2->phys_addr;
-       p_end2 = mz2->phys_addr + mz2->len;
-
-       /* gather ioremap addresses */
-       i_start1 = mz1->ioremap_addr;
-       i_end1 = mz1->ioremap_addr + mz1->len;
-       i_start2 = mz2->ioremap_addr;
-       i_end2 = mz2->ioremap_addr + mz2->len;
-
-       /* check if segments are virtually adjacent */
-       if (start1 == end2)
-               result |= VIRT;
-       if (start2 == end1)
-               result |= VIRT;
-
-       /* check if segments are physically adjacent */
-       if (p_start1 == p_end2)
-               result |= PHYS;
-       if (p_start2 == p_end1)
-               result |= PHYS;
-
-       /* check if segments are ioremap-adjacent */
-       if (i_start1 == i_end2)
-               result |= IOREMAP;
-       if (i_start2 == i_end1)
-               result |= IOREMAP;
-
-       return result;
-}
-
-static int
-has_adjacent_segments(struct ivshmem_segment * ms, int len)
-{
-       int i, j;
-
-       for (i = 0; i < len; i++)
-               for (j = i + 1; j < len; j++) {
-                       /* we're only interested in fully adjacent segments; partially
-                        * adjacent segments can coexist.
-                        */
-                       if (adjacent(&ms[i].entry.mz, &ms[j].entry.mz) == FULL)
-                               return 1;
-               }
-       return 0;
-}
-
-static int
-has_overlapping_segments(struct ivshmem_segment * ms, int len)
-{
-       int i, j;
-
-       for (i = 0; i < len; i++)
-               for (j = i + 1; j < len; j++)
-                       if (overlap(&ms[i].entry.mz, &ms[j].entry.mz))
-                               return 1;
-       return 0;
-}
-
-static int
-seg_compare(const void * a, const void * b)
-{
-       const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a;
-       const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b;
-
-       /* move unallocated zones to the end */
-       if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL)
-               return 0;
-       if (s1->entry.mz.addr == 0)
-               return 1;
-       if (s2->entry.mz.addr == 0)
-               return -1;
-
-       return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr;
-}
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-static void
-entry_dump(struct rte_ivshmem_metadata_entry *e)
-{
-       RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr,
-                       RTE_PTR_ADD(e->mz.addr, e->mz.len));
-       RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n",
-                       e->mz.phys_addr,
-                       e->mz.phys_addr + e->mz.len);
-       RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n",
-                       e->mz.ioremap_addr,
-                       e->mz.ioremap_addr + e->mz.len);
-       RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len);
-       RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset);
-}
-#endif
-
-
-
-/*
- * Actual useful code
- */
-
-/* read through metadata mapped from the IVSHMEM device */
-static int
-read_metadata(char * path, int path_len, int fd, uint64_t flen)
-{
-       struct rte_ivshmem_metadata metadata;
-       struct rte_ivshmem_metadata_entry * entry;
-       int idx, i;
-       void * ptr;
-
-       ptr = map_metadata(fd, flen);
-
-       if (ptr == MAP_FAILED)
-               return -1;
-
-       metadata = *(struct rte_ivshmem_metadata*) (ptr);
-
-       unmap_metadata(ptr);
-
-       RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name);
-
-       idx = ivshmem_config->segment_idx;
-
-       for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES &&
-               idx <= RTE_MAX_MEMSEG; i++) {
-
-               if (idx == RTE_MAX_MEMSEG) {
-                       RTE_LOG(ERR, EAL, "Not enough memory segments!\n");
-                       return -1;
-               }
-
-               entry = &metadata.entry[i];
-
-               /* stop on uninitialized memzone */
-               if (entry->mz.len == 0)
-                       break;
-
-               /* copy metadata entry */
-               memcpy(&ivshmem_config->segment[idx].entry, entry,
-                               sizeof(struct rte_ivshmem_metadata_entry));
-
-               /* copy path */
-               snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path);
-
-               idx++;
-       }
-       ivshmem_config->segment_idx = idx;
-
-       return 0;
-}
-
-/* check through each segment and look for adjacent or overlapping ones. */
-static int
-cleanup_segments(struct ivshmem_segment * ms, int tbl_len)
-{
-       struct ivshmem_segment * s, * tmp;
-       int i, j, concat, seg_adjacent, seg_overlapping;
-       uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2;
-
-       qsort(ms, tbl_len, sizeof(struct ivshmem_segment),
-                               seg_compare);
-
-       while (has_overlapping_segments(ms, tbl_len) ||
-                       has_adjacent_segments(ms, tbl_len)) {
-
-               for (i = 0; i < tbl_len; i++) {
-                       s = &ms[i];
-
-                       concat = 0;
-
-                       for (j = i + 1; j < tbl_len; j++) {
-                               tmp = &ms[j];
-
-                               /* check if this segment is overlapping with existing segment,
-                                * or is adjacent to existing segment */
-                               seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz);
-                               seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz);
-
-                               /* check if segments fully overlap or are fully adjacent */
-                               if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) {
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-                                       RTE_LOG(DEBUG, EAL, "Concatenating segments\n");
-                                       RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
-                                       entry_dump(&s->entry);
-                                       RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
-                                       entry_dump(&tmp->entry);
-#endif
-
-                                       start1 = s->entry.mz.addr_64;
-                                       start2 = tmp->entry.mz.addr_64;
-                                       p_start1 = s->entry.mz.phys_addr;
-                                       p_start2 = tmp->entry.mz.phys_addr;
-                                       i_start1 = s->entry.mz.ioremap_addr;
-                                       i_start2 = tmp->entry.mz.ioremap_addr;
-                                       end1 = s->entry.mz.addr_64 + s->entry.mz.len;
-                                       end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len;
-
-                                       /* settle for minimum start address and maximum length */
-                                       s->entry.mz.addr_64 = RTE_MIN(start1, start2);
-                                       s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2);
-                                       s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2);
-                                       s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset);
-                                       s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64;
-                                       concat = 1;
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-                                       RTE_LOG(DEBUG, EAL, "Resulting segment:\n");
-                                       entry_dump(&s->entry);
-
-#endif
-                               }
-                               /* if segments not fully overlap, we have an error condition.
-                                * adjacent segments can coexist.
-                                */
-                               else if (seg_overlapping > 0) {
-                                       RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j);
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-                                       RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
-                                       entry_dump(&s->entry);
-                                       RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
-                                       entry_dump(&tmp->entry);
-#endif
-                                       return -1;
-                               }
-                               if (concat)
-                                       break;
-                       }
-                       /* if we concatenated, remove segment at j */
-                       if (concat) {
-                               remove_segment(ms, tbl_len, j);
-                               tbl_len--;
-                               break;
-                       }
-               }
-       }
-
-       return tbl_len;
-}
-
-static int
-create_shared_config(void)
-{
-       char path[PATH_MAX];
-       int fd;
-
-       /* build ivshmem config file path */
-       snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
-                       internal_config.hugefile_prefix);
-
-       fd = open(path, O_CREAT | O_RDWR, 0600);
-
-       if (fd < 0) {
-               RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno));
-               return -1;
-       }
-
-       /* try ex-locking first - if the file is locked, we have a problem */
-       if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
-               RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno));
-               close(fd);
-               return -1;
-       }
-
-       if (ftruncate(fd, sizeof(struct ivshmem_shared_config)) < 0) {
-               RTE_LOG(ERR, EAL, "ftruncate failed: %s\n", strerror(errno));
-               return -1;
-       }
-
-       ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
-                       PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-
-       if (ivshmem_config == MAP_FAILED)
-               return -1;
-
-       memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config));
-
-       /* change the exclusive lock we got earlier to a shared lock */
-       if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
-               RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
-               return -1;
-       }
-
-       close(fd);
-
-       return 0;
-}
-
-/* open shared config file and, if present, map the config.
- * having no config file is not an error condition, as we later check if
- * ivshmem_config is NULL (if it is, that means nothing was mapped). */
-static int
-open_shared_config(void)
-{
-       char path[PATH_MAX];
-       int fd;
-
-       /* build ivshmem config file path */
-       snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
-                       internal_config.hugefile_prefix);
-
-       fd = open(path, O_RDONLY);
-
-       /* if the file doesn't exist, just return success */
-       if (fd < 0 && errno == ENOENT)
-               return 0;
-       /* else we have an error condition */
-       else if (fd < 0) {
-               RTE_LOG(ERR, EAL, "Could not open %s: %s\n",
-                               path, strerror(errno));
-               return -1;
-       }
-
-       /* try ex-locking first - if the lock *does* succeed, this means it's a
-        * stray config file, so it should be deleted.
-        */
-       if (flock(fd, LOCK_EX | LOCK_NB) != -1) {
-
-               /* if we can't remove the file, something is wrong */
-               if (unlink(path) < 0) {
-                       RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path,
-                                       strerror(errno));
-                       return -1;
-               }
-
-               /* release the lock */
-               flock(fd, LOCK_UN);
-               close(fd);
-
-               /* return success as having a stray config file is equivalent to not
-                * having config file at all.
-                */
-               return 0;
-       }
-
-       ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
-                       PROT_READ, MAP_SHARED, fd, 0);
-
-       if (ivshmem_config == MAP_FAILED)
-               return -1;
-
-       /* place a shared lock on config file */
-       if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
-               RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
-               return -1;
-       }
-
-       close(fd);
-
-       return 0;
-}
-
-/*
- * This function does the following:
- *
- * 1) Builds a table of ivshmem_segments with proper offset alignment
- * 2) Cleans up that table so that we don't have any overlapping or adjacent
- *    memory segments
- * 3) Creates memsegs from this table and maps them into memory.
- */
-static inline int
-map_all_segments(void)
-{
-       struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG];
-       struct ivshmem_pci_device * pci_dev;
-       struct rte_mem_config * mcfg;
-       struct ivshmem_segment * seg;
-       int fd, fd_zero;
-       unsigned i, j;
-       struct rte_memzone mz;
-       struct rte_memseg ms;
-       void * base_addr;
-       uint64_t align, len;
-       phys_addr_t ioremap_addr;
-
-       ioremap_addr = 0;
-
-       memset(ms_tbl, 0, sizeof(ms_tbl));
-       memset(&mz, 0, sizeof(struct rte_memzone));
-       memset(&ms, 0, sizeof(struct rte_memseg));
-
-       /* first, build a table of memsegs to map, to avoid failed mmaps due to
-        * overlaps
-        */
-       for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) {
-               if (i == RTE_MAX_MEMSEG) {
-                       RTE_LOG(ERR, EAL, "Too many segments requested!\n");
-                       return -1;
-               }
-
-               seg = &ivshmem_config->segment[i];
-
-               /* copy segment to table */
-               memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment));
-
-               /* find ioremap addr */
-               for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) {
-                       pci_dev = &ivshmem_config->pci_devs[j];
-                       if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) {
-                               ioremap_addr = pci_dev->ioremap_addr;
-                               break;
-                       }
-               }
-               if (ioremap_addr == 0) {
-                       RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n");
-                       return -1;
-               }
-
-               /* work out alignments */
-               align = seg->entry.mz.addr_64 -
-                               RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000);
-               len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000);
-
-               /* save original alignments */
-               ms_tbl[i].align = align;
-
-               /* create a memory zone */
-               mz.addr_64 = seg->entry.mz.addr_64 - align;
-               mz.len = len;
-               mz.hugepage_sz = seg->entry.mz.hugepage_sz;
-               mz.phys_addr = seg->entry.mz.phys_addr - align;
-
-               /* find true physical address */
-               mz.ioremap_addr = ioremap_addr + seg->entry.offset - align;
-
-               ms_tbl[i].entry.offset = seg->entry.offset - align;
-
-               memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone));
-       }
-
-       /* clean up the segments */
-       memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx);
-
-       if (memseg_idx < 0)
-               return -1;
-
-       mcfg = rte_eal_get_configuration()->mem_config;
-
-       fd_zero = open("/dev/zero", O_RDWR);
-
-       if (fd_zero < 0) {
-               RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno));
-               return -1;
-       }
-
-       /* create memsegs and put them into DPDK memory */
-       for (i = 0; i < (unsigned) memseg_idx; i++) {
-
-               seg = &ms_tbl[i];
-
-               ms.addr_64 = seg->entry.mz.addr_64;
-               ms.hugepage_sz = seg->entry.mz.hugepage_sz;
-               ms.len = seg->entry.mz.len;
-               ms.nchannel = rte_memory_get_nchannel();
-               ms.nrank = rte_memory_get_nrank();
-               ms.phys_addr = seg->entry.mz.phys_addr;
-               ms.ioremap_addr = seg->entry.mz.ioremap_addr;
-               ms.socket_id = seg->entry.mz.socket_id;
-
-               base_addr = mmap(ms.addr, ms.len,
-                               PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0);
-
-               if (base_addr == MAP_FAILED || base_addr != ms.addr) {
-                       RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n");
-                       return -1;
-               }
-
-               fd = open(seg->path, O_RDWR);
-
-               if (fd < 0) {
-                       RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path,
-                                       strerror(errno));
-                       return -1;
-               }
-
-               munmap(ms.addr, ms.len);
-
-               base_addr = mmap(ms.addr, ms.len,
-                               PROT_READ | PROT_WRITE, MAP_SHARED, fd,
-                               seg->entry.offset);
-
-
-               if (base_addr == MAP_FAILED || base_addr != ms.addr) {
-                       RTE_LOG(ERR, EAL, "Cannot map segment into memory: "
-                                       "expected %p got %p (%s)\n", ms.addr, base_addr,
-                                       strerror(errno));
-                       return -1;
-               }
-
-               RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at "
-                               "offset 0x%" PRIx64 "\n",
-                               ms.addr, ms.len, seg->entry.offset);
-
-               /* put the pointers back into their real positions using original
-                * alignment */
-               ms.addr_64 += seg->align;
-               ms.phys_addr += seg->align;
-               ms.ioremap_addr += seg->align;
-               ms.len -= seg->align;
-
-               /* at this point, the rest of DPDK memory is not initialized, so we
-                * expect memsegs to be empty */
-               memcpy(&mcfg->memseg[i], &ms,
-                               sizeof(struct rte_memseg));
-
-               close(fd);
-
-               RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n",
-                               ms.len);
-       }
-
-       return 0;
-}
-
-/* this happens at a later stage, after general EAL memory initialization */
-int
-rte_eal_ivshmem_obj_init(void)
-{
-       struct rte_ring_list* ring_list = NULL;
-       struct rte_mem_config * mcfg;
-       struct ivshmem_segment * seg;
-       struct rte_memzone * mz;
-       struct rte_ring * r;
-       struct rte_tailq_entry *te;
-       unsigned i, ms, idx;
-       uint64_t offset;
-
-       /* secondary process would not need any object discovery - it'll all
-        * already be in shared config */
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL)
-               return 0;
-
-       /* check that we have an initialised ring tail queue */
-       ring_list = RTE_TAILQ_LOOKUP(RTE_TAILQ_RING_NAME, rte_ring_list);
-       if (ring_list == NULL) {
-               RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n");
-               return -1;
-       }
-
-       mcfg = rte_eal_get_configuration()->mem_config;
-
-       /* create memzones */
-       for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) {
-
-               seg = &ivshmem_config->segment[i];
-
-               /* add memzone */
-               if (mcfg->memzone_cnt == RTE_MAX_MEMZONE) {
-                       RTE_LOG(ERR, EAL, "No more memory zones available!\n");
-                       return -1;
-               }
-
-               idx = mcfg->memzone_cnt;
-
-               RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n",
-                               seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len);
-
-               memcpy(&mcfg->memzone[idx], &seg->entry.mz,
-                               sizeof(struct rte_memzone));
-
-               /* find ioremap address */
-               for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) {
-                       if (ms == RTE_MAX_MEMSEG) {
-                               RTE_LOG(ERR, EAL, "Physical address of segment not found!\n");
-                               return -1;
-                       }
-                       if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) {
-                               offset = mcfg->memzone[idx].addr_64 -
-                                                               mcfg->memseg[ms].addr_64;
-                               mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr +
-                                               offset;
-                               break;
-                       }
-               }
-
-               mcfg->memzone_cnt++;
-       }
-
-       rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
-
-       /* find rings */
-       for (i = 0; i < mcfg->memzone_cnt; i++) {
-               mz = &mcfg->memzone[i];
-
-               /* check if memzone has a ring prefix */
-               if (strncmp(mz->name, RTE_RING_MZ_PREFIX,
-                               sizeof(RTE_RING_MZ_PREFIX) - 1) != 0)
-                       continue;
-
-               r = (struct rte_ring*) (mz->addr_64);
-
-               te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0);
-               if (te == NULL) {
-                       RTE_LOG(ERR, EAL, "Cannot allocate ring tailq entry!\n");
-                       return -1;
-               }
-
-               te->data = (void *) r;
-
-               TAILQ_INSERT_TAIL(ring_list, te, next);
-
-               RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr);
-       }
-       rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-       rte_memzone_dump(stdout);
-       rte_ring_list_dump(stdout);
-#endif
-
-       return 0;
-}
-
-/* initialize ivshmem structures */
-int rte_eal_ivshmem_init(void)
-{
-       struct rte_pci_device * dev;
-       struct rte_pci_resource * res;
-       int fd, ret;
-       char path[PATH_MAX];
-
-       /* initialize everything to 0 */
-       memset(path, 0, sizeof(path));
-       ivshmem_config = NULL;
-
-       pagesz = getpagesize();
-
-       RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n");
-
-       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-
-               if (open_shared_config() < 0) {
-                       RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n");
-                       return -1;
-               }
-       }
-       else {
-
-               TAILQ_FOREACH(dev, &pci_device_list, next) {
-
-                       if (is_ivshmem_device(dev)) {
-
-                               /* IVSHMEM memory is always on BAR2 */
-                               res = &dev->mem_resource[2];
-
-                               /* if we don't have a BAR2 */
-                               if (res->len == 0)
-                                       continue;
-
-                               /* construct pci device path */
-                               snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH,
-                                               dev->addr.domain, dev->addr.bus, dev->addr.devid,
-                                               dev->addr.function);
-
-                               /* try to find memseg */
-                               fd = open(path, O_RDWR);
-                               if (fd < 0) {
-                                       RTE_LOG(ERR, EAL, "Could not open %s\n", path);
-                                       return -1;
-                               }
-
-                               /* check if it's a DPDK IVSHMEM device */
-                               ret = has_ivshmem_metadata(fd, res->len);
-
-                               /* is DPDK device */
-                               if (ret == 1) {
-
-                                       /* config file creation is deferred until the first
-                                        * DPDK device is found. then, it has to be created
-                                        * only once. */
-                                       if (ivshmem_config == NULL &&
-                                                       create_shared_config() < 0) {
-                                               RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n");
-                                               close(fd);
-                                               return -1;
-                                       }
-
-                                       if (read_metadata(path, sizeof(path), fd, res->len) < 0) {
-                                               RTE_LOG(ERR, EAL, "Could not read metadata from"
-                                                               " device %02x:%02x.%x!\n", dev->addr.bus,
-                                                               dev->addr.devid, dev->addr.function);
-                                               close(fd);
-                                               return -1;
-                                       }
-
-                                       if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) {
-                                               RTE_LOG(WARNING, EAL,
-                                                               "IVSHMEM PCI device limit exceeded. Increase "
-                                                               "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS  in "
-                                                               "your config file.\n");
-                                               break;
-                                       }
-
-                                       RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n",
-                                                       dev->addr.bus, dev->addr.devid, dev->addr.function);
-
-                                       ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr;
-                                       snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path,
-                                                       sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path),
-                                                       "%s", path);
-
-                                       ivshmem_config->pci_devs_idx++;
-                               }
-                               /* failed to read */
-                               else if (ret < 0) {
-                                       RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n",
-                                                       strerror(errno));
-                                       close(fd);
-                                       return -1;
-                               }
-                               /* not a DPDK device */
-                               else
-                                       RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n");
-
-                               /* close the BAR fd */
-                               close(fd);
-                       }
-               }
-       }
-
-       /* ivshmem_config is not NULL only if config was created and/or mapped */
-       if (ivshmem_config) {
-               if (map_all_segments() < 0) {
-                       RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n");
-                       return -1;
-               }
-       }
-       else {
-               RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n");
-       }
-
-       return 0;
-}
-
-#endif
index d391100..e3a50aa 100644 (file)
@@ -97,45 +97,7 @@ rte_eal_log_init(const char *id, int facility)
 
        openlog(id, LOG_NDELAY | LOG_PID, facility);
 
-       if (rte_eal_common_log_init(log_stream) < 0)
-               return -1;
-
-       return 0;
-}
-
-/* early logs */
-
-/*
- * early log function, used before rte_eal_log_init
- */
-static ssize_t
-early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
-{
-       ssize_t ret;
-       ret = fwrite(buf, size, 1, stdout);
-       fflush(stdout);
-       if (ret == 0)
-               return -1;
-       return ret;
-}
-
-static cookie_io_functions_t early_log_func = {
-       .write = early_log_write,
-};
-static FILE *early_log_stream;
+       eal_log_set_default(log_stream);
 
-/*
- * init the log library, called by rte_eal_init() to enable early
- * logs
- */
-int
-rte_eal_log_early_init(void)
-{
-       early_log_stream = fopencookie(NULL, "w+", early_log_func);
-       if (early_log_stream == NULL) {
-               printf("Cannot configure early_log_stream\n");
-               return -1;
-       }
-       rte_openlog_stream(early_log_stream);
        return 0;
 }
index 41e0a92..a956bb2 100644 (file)
@@ -376,25 +376,15 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
        void *vma_addr = NULL;
        size_t vma_len = 0;
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-       RTE_SET_USED(vma_len);
-#endif
-
        for (i = 0; i < hpi->num_pages[0]; i++) {
                uint64_t hugepage_sz = hpi->hugepage_sz;
 
                if (orig) {
                        hugepg_tbl[i].file_id = i;
                        hugepg_tbl[i].size = hugepage_sz;
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-                       eal_get_hugefile_temp_path(hugepg_tbl[i].filepath,
-                                       sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
-                                       hugepg_tbl[i].file_id);
-#else
                        eal_get_hugefile_path(hugepg_tbl[i].filepath,
                                        sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
                                        hugepg_tbl[i].file_id);
-#endif
                        hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
                }
 #ifndef RTE_ARCH_64
@@ -408,8 +398,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
                        continue;
                }
 #endif
-
-#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS
                else if (vma_len == 0) {
                        unsigned j, num_pages;
 
@@ -439,10 +427,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
                        if (vma_addr == NULL)
                                vma_len = hugepage_sz;
                }
-#endif
 
                /* try to create hugepage file */
-               fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
+               fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0600);
                if (fd < 0) {
                        RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
                                        strerror(errno));
@@ -505,169 +492,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
        return i;
 }
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-
-/*
- * Remaps all hugepages into single file segments
- */
-static int
-remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
-{
-       int fd;
-       unsigned i = 0, j, num_pages, page_idx = 0;
-       void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL;
-       size_t vma_len = 0;
-       size_t hugepage_sz = hpi->hugepage_sz;
-       size_t total_size, offset;
-       char filepath[MAX_HUGEPAGE_PATH];
-       phys_addr_t physaddr;
-       int socket;
-
-       while (i < hpi->num_pages[0]) {
-
-#ifndef RTE_ARCH_64
-               /* for 32-bit systems, don't remap 1G pages and 16G pages,
-                * just reuse original map address as final map address.
-                */
-               if ((hugepage_sz == RTE_PGSIZE_1G)
-                       || (hugepage_sz == RTE_PGSIZE_16G)) {
-                       hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
-                       hugepg_tbl[i].orig_va = NULL;
-                       i++;
-                       continue;
-               }
-#endif
-
-               /* reserve a virtual area for next contiguous
-                * physical block: count the number of
-                * contiguous physical pages. */
-               for (j = i+1; j < hpi->num_pages[0] ; j++) {
-#ifdef RTE_ARCH_PPC_64
-                       /* The physical addresses are sorted in descending
-                        * order on PPC64 */
-                       if (hugepg_tbl[j].physaddr !=
-                               hugepg_tbl[j-1].physaddr - hugepage_sz)
-                               break;
-#else
-                       if (hugepg_tbl[j].physaddr !=
-                               hugepg_tbl[j-1].physaddr + hugepage_sz)
-                               break;
-#endif
-               }
-               num_pages = j - i;
-               vma_len = num_pages * hugepage_sz;
-
-               socket = hugepg_tbl[i].socket_id;
-
-               /* get the biggest virtual memory area up to
-                * vma_len. If it fails, vma_addr is NULL, so
-                * let the kernel provide the address. */
-               vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
-
-               /* If we can't find a big enough virtual area, work out how many pages
-                * we are going to get */
-               if (vma_addr == NULL)
-                       j = i + 1;
-               else if (vma_len != num_pages * hugepage_sz) {
-                       num_pages = vma_len / hugepage_sz;
-                       j = i + num_pages;
-
-               }
-
-               hugepg_tbl[page_idx].file_id = page_idx;
-               eal_get_hugefile_path(filepath,
-                               sizeof(filepath),
-                               hpi->hugedir,
-                               hugepg_tbl[page_idx].file_id);
-
-               /* try to create hugepage file */
-               fd = open(filepath, O_CREAT | O_RDWR, 0755);
-               if (fd < 0) {
-                       RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno));
-                       return -1;
-               }
-
-               total_size = 0;
-               for (;i < j; i++) {
-
-                       /* unmap current segment */
-                       if (total_size > 0)
-                               munmap(vma_addr, total_size);
-
-                       /* unmap original page */
-                       munmap(hugepg_tbl[i].orig_va, hugepage_sz);
-                       unlink(hugepg_tbl[i].filepath);
-
-                       total_size += hugepage_sz;
-
-                       old_addr = vma_addr;
-
-                       /* map new, bigger segment, and populate page tables,
-                        * the kernel fills this segment with zeros */
-                       vma_addr = mmap(vma_addr, total_size,
-                                       PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0);
-
-                       if (vma_addr == MAP_FAILED || vma_addr != old_addr) {
-                               RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno));
-                               close(fd);
-                               return -1;
-                       }
-               }
-
-               /* set shared flock on the file. */
-               if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
-                       RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
-                               __func__, strerror(errno));
-                       close(fd);
-                       return -1;
-               }
-
-               snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s",
-                               filepath);
-
-               physaddr = rte_mem_virt2phy(vma_addr);
-
-               if (physaddr == RTE_BAD_PHYS_ADDR)
-                       return -1;
-
-               hugepg_tbl[page_idx].final_va = vma_addr;
-
-               hugepg_tbl[page_idx].physaddr = physaddr;
-
-               hugepg_tbl[page_idx].repeated = num_pages;
-
-               hugepg_tbl[page_idx].socket_id = socket;
-
-               close(fd);
-
-               /* verify the memory segment - that is, check that every VA corresponds
-                * to the physical address we expect to see
-                */
-               for (offset = 0; offset < vma_len; offset += hugepage_sz) {
-                       uint64_t expected_physaddr;
-
-                       expected_physaddr = hugepg_tbl[page_idx].physaddr + offset;
-                       page_addr = RTE_PTR_ADD(vma_addr, offset);
-                       physaddr = rte_mem_virt2phy(page_addr);
-
-                       if (physaddr != expected_physaddr) {
-                               RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr "
-                                               "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64
-                                               " (expected 0x%" PRIx64 ")\n",
-                                               page_addr, offset, physaddr, expected_physaddr);
-                               return -1;
-                       }
-               }
-
-               page_idx++;
-       }
-
-       /* zero out the rest */
-       memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file));
-       return page_idx;
-}
-#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */
-
 /* Unmap all hugepages from original mapping */
 static int
 unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
@@ -681,7 +505,6 @@ unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info
         }
         return 0;
 }
-#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */
 
 /*
  * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
@@ -875,12 +698,6 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
                        for (page = 0; page < nrpages; page++) {
                                struct hugepage_file *hp = &hugepg_tbl[page];
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-                               /* if this page was already cleared */
-                               if (hp->final_va == NULL)
-                                       continue;
-#endif
-
                                /* find a page that matches the criteria */
                                if ((hp->size == hpi[size].hugepage_sz) &&
                                                (hp->socket_id == (int) socket)) {
@@ -889,11 +706,7 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
                                        if (pages_found == hpi[size].num_pages[socket]) {
                                                uint64_t unmap_len;
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-                                               unmap_len = hp->size * hp->repeated;
-#else
                                                unmap_len = hp->size;
-#endif
 
                                                /* get start addr and len of the remaining segment */
                                                munmap(hp->final_va, (size_t) unmap_len);
@@ -904,50 +717,10 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
                                                                        __func__, hp->filepath, strerror(errno));
                                                        return -1;
                                                }
-                                       }
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-                                       /* else, check how much do we need to map */
-                                       else {
-                                               int nr_pg_left =
-                                                               hpi[size].num_pages[socket] - pages_found;
-
-                                               /* if we need enough memory to fit into the segment */
-                                               if (hp->repeated <= nr_pg_left) {
-                                                       pages_found += hp->repeated;
-                                               }
-                                               /* truncate the segment */
-                                               else {
-                                                       uint64_t final_size = nr_pg_left * hp->size;
-                                                       uint64_t seg_size = hp->repeated * hp->size;
-
-                                                       void * unmap_va = RTE_PTR_ADD(hp->final_va,
-                                                                       final_size);
-                                                       int fd;
-
-                                                       munmap(unmap_va, seg_size - final_size);
-
-                                                       fd = open(hp->filepath, O_RDWR);
-                                                       if (fd < 0) {
-                                                               RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
-                                                                               hp->filepath, strerror(errno));
-                                                               return -1;
-                                                       }
-                                                       if (ftruncate(fd, final_size) < 0) {
-                                                               RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n",
-                                                                               hp->filepath, strerror(errno));
-                                                               return -1;
-                                                       }
-                                                       close(fd);
-
-                                                       pages_found += nr_pg_left;
-                                                       hp->repeated = nr_pg_left;
-                                               }
-                                       }
-#else
-                                       /* else, lock the page and skip */
-                                       else
+                                       } else {
+                                               /* lock the page and skip */
                                                pages_found++;
-#endif
+                                       }
 
                                } /* match page */
                        } /* foreach page */
@@ -1177,9 +950,6 @@ rte_eal_hugepage_init(void)
        int i, j, new_memseg;
        int nr_hugefiles, nr_hugepages = 0;
        void *addr;
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-       int new_pages_count[MAX_HUGEPAGE_SIZES];
-#endif
 
        test_proc_pagemap_readable();
 
@@ -1260,13 +1030,6 @@ rte_eal_hugepage_init(void)
                pages_old = hpi->num_pages[0];
                pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1);
                if (pages_new < pages_old) {
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-                       RTE_LOG(ERR, EAL,
-                               "%d not %d hugepages of size %u MB allocated\n",
-                               pages_new, pages_old,
-                               (unsigned)(hpi->hugepage_sz / 0x100000));
-                       goto fail;
-#else
                        RTE_LOG(DEBUG, EAL,
                                "%d not %d hugepages of size %u MB allocated\n",
                                pages_new, pages_old,
@@ -1278,7 +1041,6 @@ rte_eal_hugepage_init(void)
                        hpi->num_pages[0] = pages_new;
                        if (pages_new == 0)
                                continue;
-#endif
                }
 
                /* find physical addresses and sockets for each hugepage */
@@ -1297,18 +1059,6 @@ rte_eal_hugepage_init(void)
                qsort(&tmp_hp[hp_offset], hpi->num_pages[0],
                      sizeof(struct hugepage_file), cmp_physaddr);
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-               /* remap all hugepages into single file segments */
-               new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi);
-               if (new_pages_count[i] < 0){
-                       RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
-                                       (unsigned)(hpi->hugepage_sz / 0x100000));
-                       goto fail;
-               }
-
-               /* we have processed a num of hugepages of this size, so inc offset */
-               hp_offset += new_pages_count[i];
-#else
                /* remap all hugepages */
                if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) !=
                    hpi->num_pages[0]) {
@@ -1323,7 +1073,6 @@ rte_eal_hugepage_init(void)
 
                /* we have processed a num of hugepages of this size, so inc offset */
                hp_offset += hpi->num_pages[0];
-#endif
        }
 
        huge_recover_sigbus();
@@ -1331,14 +1080,7 @@ rte_eal_hugepage_init(void)
        if (internal_config.memory == 0 && internal_config.force_sockets == 0)
                internal_config.memory = eal_get_hugepage_mem_size();
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-       nr_hugefiles = 0;
-       for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
-               nr_hugefiles += new_pages_count[i];
-       }
-#else
        nr_hugefiles = nr_hugepages;
-#endif
 
 
        /* clean out the numbers of pages */
@@ -1356,12 +1098,7 @@ rte_eal_hugepage_init(void)
                for (j = 0; j < nb_hpsizes; j++) {
                        if (tmp_hp[i].size ==
                                        internal_config.hugepage_info[j].hugepage_sz) {
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-                                       internal_config.hugepage_info[j].num_pages[socket] +=
-                                               tmp_hp[i].repeated;
-#else
                                internal_config.hugepage_info[j].num_pages[socket]++;
-#endif
                        }
                }
        }
@@ -1436,15 +1173,8 @@ rte_eal_hugepage_init(void)
        free(tmp_hp);
        tmp_hp = NULL;
 
-       /* find earliest free memseg - this is needed because in case of IVSHMEM,
-        * segments might have already been initialized */
-       for (j = 0; j < RTE_MAX_MEMSEG; j++)
-               if (mcfg->memseg[j].addr == NULL) {
-                       /* move to previous segment and exit loop */
-                       j--;
-                       break;
-               }
-
+       /* first memseg index shall be 0 after incrementing it below */
+       j = -1;
        for (i = 0; i < nr_hugefiles; i++) {
                new_memseg = 0;
 
@@ -1482,11 +1212,7 @@ rte_eal_hugepage_init(void)
 
                        mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
                        mcfg->memseg[j].addr = hugepage[i].final_va;
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-                       mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated;
-#else
                        mcfg->memseg[j].len = hugepage[i].size;
-#endif
                        mcfg->memseg[j].socket_id = hugepage[i].socket_id;
                        mcfg->memseg[j].hugepage_sz = hugepage[i].size;
                }
@@ -1552,7 +1278,8 @@ rte_eal_hugepage_attach(void)
        struct hugepage_file *hp = NULL;
        unsigned num_hp = 0;
        unsigned i, s = 0; /* s used to track the segment number */
-       off_t size;
+       unsigned max_seg = RTE_MAX_MEMSEG;
+       off_t size = 0;
        int fd, fd_zero = -1, fd_hugepage = -1;
 
        if (aslr_enabled() > 0) {
@@ -1597,15 +1324,6 @@ rte_eal_hugepage_attach(void)
                if (mcfg->memseg[s].len == 0)
                        break;
 
-#ifdef RTE_LIBRTE_IVSHMEM
-               /*
-                * if segment has ioremap address set, it's an IVSHMEM segment and
-                * doesn't need mapping as it was already mapped earlier
-                */
-               if (mcfg->memseg[s].ioremap_addr != 0)
-                       continue;
-#endif
-
                /*
                 * fdzero is mmapped to get a contiguous block of virtual
                 * addresses of the appropriate memseg size.
@@ -1615,10 +1333,21 @@ rte_eal_hugepage_attach(void)
                                 PROT_READ, MAP_PRIVATE, fd_zero, 0);
                if (base_addr == MAP_FAILED ||
                    base_addr != mcfg->memseg[s].addr) {
-                       RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
-                               "in /dev/zero to requested address [%p]: '%s'\n",
-                               (unsigned long long)mcfg->memseg[s].len,
-                               mcfg->memseg[s].addr, strerror(errno));
+                       max_seg = s;
+                       if (base_addr != MAP_FAILED) {
+                               /* errno is stale, don't use */
+                               RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+                                       "in /dev/zero at [%p], got [%p] - "
+                                       "please use '--base-virtaddr' option\n",
+                                       (unsigned long long)mcfg->memseg[s].len,
+                                       mcfg->memseg[s].addr, base_addr);
+                               munmap(base_addr, mcfg->memseg[s].len);
+                       } else {
+                               RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+                                       "in /dev/zero at [%p]: '%s'\n",
+                                       (unsigned long long)mcfg->memseg[s].len,
+                                       mcfg->memseg[s].addr, strerror(errno));
+                       }
                        if (aslr_enabled() > 0) {
                                RTE_LOG(ERR, EAL, "It is recommended to "
                                        "disable ASLR in the kernel "
@@ -1644,16 +1373,6 @@ rte_eal_hugepage_attach(void)
                void *addr, *base_addr;
                uintptr_t offset = 0;
                size_t mapping_size;
-#ifdef RTE_LIBRTE_IVSHMEM
-               /*
-                * if segment has ioremap address set, it's an IVSHMEM segment and
-                * doesn't need mapping as it was already mapped earlier
-                */
-               if (mcfg->memseg[s].ioremap_addr != 0) {
-                       s++;
-                       continue;
-               }
-#endif
                /*
                 * free previously mapped memory so we can map the
                 * hugepages into the space
@@ -1672,11 +1391,7 @@ rte_eal_hugepage_attach(void)
                                                hp[i].filepath);
                                        goto error;
                                }
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-                               mapping_size = hp[i].size * hp[i].repeated;
-#else
                                mapping_size = hp[i].size;
-#endif
                                addr = mmap(RTE_PTR_ADD(base_addr, offset),
                                                mapping_size, PROT_READ | PROT_WRITE,
                                                MAP_SHARED, fd, 0);
@@ -1701,11 +1416,8 @@ rte_eal_hugepage_attach(void)
        return 0;
 
 error:
-       s = 0;
-       while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0) {
-               munmap(mcfg->memseg[s].addr, mcfg->memseg[s].len);
-               s++;
-       }
+       for (i = 0; i < max_seg && mcfg->memseg[i].len > 0; i++)
+               munmap(mcfg->memseg[i].addr, mcfg->memseg[i].len);
        if (hp != NULL && hp != MAP_FAILED)
                munmap(hp, size);
        if (fd_zero >= 0)
index cd9de7c..e2fc219 100644 (file)
  * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it).
  */
 
-/* unbind kernel driver for this device */
-int
-pci_unbind_kernel_driver(struct rte_pci_device *dev)
-{
-       int n;
-       FILE *f;
-       char filename[PATH_MAX];
-       char buf[BUFSIZ];
-       struct rte_pci_addr *loc = &dev->addr;
-
-       /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */
-       snprintf(filename, sizeof(filename),
-               "%s/" PCI_PRI_FMT "/driver/unbind", pci_get_sysfs_path(),
-               loc->domain, loc->bus, loc->devid, loc->function);
-
-       f = fopen(filename, "w");
-       if (f == NULL) /* device was not bound */
-               return 0;
-
-       n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n",
-                    loc->domain, loc->bus, loc->devid, loc->function);
-       if ((n < 0) || (n >= (int)sizeof(buf))) {
-               RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__);
-               goto error;
-       }
-       if (fwrite(buf, n, 1, f) == 0) {
-               RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__,
-                               filename);
-               goto error;
-       }
-
-       fclose(f);
-       return 0;
-
-error:
-       fclose(f);
-       return -1;
-}
-
 static int
 pci_get_kernel_driver_by_path(const char *filename, char *dri_name)
 {
@@ -267,8 +228,7 @@ error:
 
 /* Scan one pci sysfs entry, and fill the devices list from it. */
 static int
-pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
-            uint8_t devid, uint8_t function)
+pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
 {
        char filename[PATH_MAX];
        unsigned long tmp;
@@ -281,10 +241,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
                return -1;
 
        memset(dev, 0, sizeof(*dev));
-       dev->addr.domain = domain;
-       dev->addr.bus = bus;
-       dev->addr.devid = devid;
-       dev->addr.function = function;
+       dev->addr = *addr;
 
        /* get vendor id */
        snprintf(filename, sizeof(filename), "%s/vendor", dirname);
@@ -350,13 +307,13 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
                 dirname);
        if (access(filename, R_OK) != 0) {
                /* if no NUMA support, set default to 0 */
-               dev->numa_node = 0;
+               dev->device.numa_node = 0;
        } else {
                if (eal_parse_sysfs_value(filename, &tmp) < 0) {
                        free(dev);
                        return -1;
                }
-               dev->numa_node = tmp;
+               dev->device.numa_node = tmp;
        }
 
        /* parse resources */
@@ -390,6 +347,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
 
        /* device is valid, add in list (sorted) */
        if (TAILQ_EMPTY(&pci_device_list)) {
+               rte_eal_device_insert(&dev->device);
                TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
        } else {
                struct rte_pci_device *dev2;
@@ -402,6 +360,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
 
                        if (ret < 0) {
                                TAILQ_INSERT_BEFORE(dev2, dev, next);
+                               rte_eal_device_insert(&dev->device);
                        } else { /* already registered */
                                dev2->kdrv = dev->kdrv;
                                dev2->max_vfs = dev->max_vfs;
@@ -411,18 +370,30 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
                        }
                        return 0;
                }
+               rte_eal_device_insert(&dev->device);
                TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
        }
 
        return 0;
 }
 
+int
+pci_update_device(const struct rte_pci_addr *addr)
+{
+       char filename[PATH_MAX];
+
+       snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT,
+                pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
+                addr->function);
+
+       return pci_scan_one(filename, addr);
+}
+
 /*
  * split up a pci address into its constituent parts.
  */
 static int
-parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain,
-               uint8_t *bus, uint8_t *devid, uint8_t *function)
+parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr)
 {
        /* first split on ':' */
        union splitaddr {
@@ -450,10 +421,10 @@ parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain,
 
        /* now convert to int values */
        errno = 0;
-       *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16);
-       *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16);
-       *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16);
-       *function = (uint8_t)strtoul(splitaddr.function, NULL, 10);
+       addr->domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16);
+       addr->bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16);
+       addr->devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16);
+       addr->function = (uint8_t)strtoul(splitaddr.function, NULL, 10);
        if (errno != 0)
                goto error;
 
@@ -474,8 +445,7 @@ rte_eal_pci_scan(void)
        struct dirent *e;
        DIR *dir;
        char dirname[PATH_MAX];
-       uint16_t domain;
-       uint8_t bus, devid, function;
+       struct rte_pci_addr addr;
 
        dir = opendir(pci_get_sysfs_path());
        if (dir == NULL) {
@@ -488,13 +458,12 @@ rte_eal_pci_scan(void)
                if (e->d_name[0] == '.')
                        continue;
 
-               if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain,
-                               &bus, &devid, &function) != 0)
+               if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0)
                        continue;
 
                snprintf(dirname, sizeof(dirname), "%s/%s",
                                pci_get_sysfs_path(), e->d_name);
-               if (pci_scan_one(dirname, domain, bus, devid, function) < 0)
+               if (pci_scan_one(dirname, &addr) < 0)
                        goto error;
        }
        closedir(dir);
@@ -743,9 +712,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
 int
 rte_eal_pci_init(void)
 {
-       TAILQ_INIT(&pci_driver_list);
-       TAILQ_INIT(&pci_device_list);
-
        /* for debug purposes, PCI can be disabled */
        if (internal_config.no_pci)
                return 0;
index 1786b75..3e4ffb5 100644 (file)
@@ -133,7 +133,7 @@ pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num)
        snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
        dev = makedev(major, minor);
        ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev);
-       if (f == NULL) {
+       if (ret != 0) {
                RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n",
                        __func__, strerror(errno));
                return -1;
index 3dacbff..d459bf4 100644 (file)
@@ -82,6 +82,7 @@ struct rte_epoll_event {
 
 /** Handle for interrupts. */
 struct rte_intr_handle {
+       RTE_STD_C11
        union {
                int vfio_dev_fd;  /**< VFIO device file descriptor */
                int uio_cfg_fd;  /**< UIO config file descriptor
index 2acdfd9..09713b0 100644 (file)
@@ -61,6 +61,9 @@
 
 #ifdef __KERNEL__
 #include <linux/if.h>
+#define RTE_STD_C11
+#else
+#include <rte_common.h>
 #endif
 
 /**
@@ -85,6 +88,7 @@ enum rte_kni_req_id {
  */
 struct rte_kni_request {
        uint32_t req_id;             /**< Request id */
+       RTE_STD_C11
        union {
                uint32_t new_mtu;    /**< New MTU */
                uint8_t if_up;       /**< 1: interface up, 0: interface down */
@@ -102,7 +106,7 @@ struct rte_kni_fifo {
        volatile unsigned read;      /**< Next position to be read */
        unsigned len;                /**< Circular buffer length */
        unsigned elem_size;          /**< Pointer size - for 32/64 bit OS */
-       void * volatile buffer[0];   /**< The buffer contains mbuf pointers */
+       void *volatile buffer[];     /**< The buffer contains mbuf pointers */
 };
 
 /*
@@ -111,7 +115,8 @@ struct rte_kni_fifo {
  */
 struct rte_kni_mbuf {
        void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
-       char pad0[10];
+       uint64_t buf_physaddr;
+       char pad0[2];
        uint16_t data_off;      /**< Start address of data in segment buffer. */
        char pad1[2];
        uint8_t nb_segs;        /**< Number of segments. */
@@ -159,6 +164,7 @@ struct rte_kni_device_info {
        uint16_t group_id;            /**< Group ID */
        uint32_t core_id;             /**< core ID to bind for kernel thread */
 
+       __extension__
        uint8_t force_bind : 1;       /**< Flag for kernel thread binding */
 
        /* mbuf size */
index 647ba3c..78da08e 100644 (file)
 #endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
-#define sk_sleep(s) (s)->sk_sleep
+#define sk_sleep(s) ((s)->sk_sleep)
+#else
+#define HAVE_SOCKET_WQ
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+#define HAVE_STATIC_SOCK_MAP_FD
+#else
+#define kni_sock_map_fd(s) sock_map_fd(s, 0)
 #endif
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 #define HAVE_CHANGE_CARRIER_CB
 #endif
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define ether_addr_copy(dst, src) memcpy(dst, src, ETH_ALEN)
+#endif
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
 #define HAVE_IOV_ITER_MSGHDR
 #endif
 #define HAVE_REBUILD_HEADER
 #endif
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+#define HAVE_SK_ALLOC_KERN_PARAM
+#endif
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
 #define HAVE_TRANS_START_HELPER
 #endif
+
+/*
+ * KNI uses NET_NAME_UNKNOWN macro to select correct version of alloc_netdev()
+ * For old kernels just backported the commit that enables the macro
+ * (685343fc3ba6) but still uses old API, it is required to undefine macro to
+ * select correct version of API, this is safe since KNI doesn't use the value.
+ * This fix is specific to RedHat/CentOS kernels.
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+       (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \
+       (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)))
+#undef NET_NAME_UNKNOWN
+#endif
index b8c9a13..d558af2 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 1aec75a..185ccdf 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 6095d3b..220c9a4 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index b21294e..55c8a5f 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 63b228c..d42c799 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 347cef7..35886e9 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 1e9f3e6..7e4c20a 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 57b2eb5..b8fa70d 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 4ee59ba..74319de 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 6a1b0f5..3bcdd88 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index a170039..51dfae5 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index c94b218..0627f27 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 3ef0d98..bd64429 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index bbf838c..64685d9 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 6188d00..1ce5915 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index fe62785..17bc53c 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index d1cf98e..c1ab60c 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 140a2a4..d8a77c4 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 5387c5e..db24fb0 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 0e083c5..830ec99 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index e5554ca..d077b49 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c
deleted file mode 100644 (file)
index c07f9f5..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <[email protected]>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "igb.h"
index af7e68a..d7a987d 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c
deleted file mode 100644 (file)
index 07a1ae0..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <[email protected]>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "igb.h"
-#include "e1000_82575.h"
-#include "e1000_hw.h"
-#ifdef IGB_HWMON
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/sysfs.h>
-#include <linux/kobject.h>
-#include <linux/device.h>
-#include <linux/netdevice.h>
-#include <linux/hwmon.h>
-#include <linux/pci.h>
-
-#ifdef HAVE_I2C_SUPPORT
-static struct i2c_board_info i350_sensor_info = {
-       I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)),
-};
-#endif /* HAVE_I2C_SUPPORT */
-
-/* hwmon callback functions */
-static ssize_t igb_hwmon_show_location(struct device *dev,
-                                        struct device_attribute *attr,
-                                        char *buf)
-{
-       struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
-                                                    dev_attr);
-       return sprintf(buf, "loc%u\n",
-                      igb_attr->sensor->location);
-}
-
-static ssize_t igb_hwmon_show_temp(struct device *dev,
-                                    struct device_attribute *attr,
-                                    char *buf)
-{
-       struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
-                                                    dev_attr);
-       unsigned int value;
-
-       /* reset the temp field */
-       igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw);
-
-       value = igb_attr->sensor->temp;
-
-       /* display millidegree */
-       value *= 1000;
-
-       return sprintf(buf, "%u\n", value);
-}
-
-static ssize_t igb_hwmon_show_cautionthresh(struct device *dev,
-                                    struct device_attribute *attr,
-                                    char *buf)
-{
-       struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
-                                                    dev_attr);
-       unsigned int value = igb_attr->sensor->caution_thresh;
-
-       /* display millidegree */
-       value *= 1000;
-
-       return sprintf(buf, "%u\n", value);
-}
-
-static ssize_t igb_hwmon_show_maxopthresh(struct device *dev,
-                                    struct device_attribute *attr,
-                                    char *buf)
-{
-       struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
-                                                    dev_attr);
-       unsigned int value = igb_attr->sensor->max_op_thresh;
-
-       /* display millidegree */
-       value *= 1000;
-
-       return sprintf(buf, "%u\n", value);
-}
-
-/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file.
- * @ adapter: pointer to the adapter structure
- * @ offset: offset in the eeprom sensor data table
- * @ type: type of sensor data to display
- *
- * For each file we want in hwmon's sysfs interface we need a device_attribute
- * This is included in our hwmon_attr struct that contains the references to
- * the data structures we need to get the data to display.
- */
-static int igb_add_hwmon_attr(struct igb_adapter *adapter,
-                               unsigned int offset, int type) {
-       int rc;
-       unsigned int n_attr;
-       struct hwmon_attr *igb_attr;
-
-       n_attr = adapter->igb_hwmon_buff.n_hwmon;
-       igb_attr = &adapter->igb_hwmon_buff.hwmon_list[n_attr];
-
-       switch (type) {
-       case IGB_HWMON_TYPE_LOC:
-               igb_attr->dev_attr.show = igb_hwmon_show_location;
-               snprintf(igb_attr->name, sizeof(igb_attr->name),
-                        "temp%u_label", offset);
-               break;
-       case IGB_HWMON_TYPE_TEMP:
-               igb_attr->dev_attr.show = igb_hwmon_show_temp;
-               snprintf(igb_attr->name, sizeof(igb_attr->name),
-                        "temp%u_input", offset);
-               break;
-       case IGB_HWMON_TYPE_CAUTION:
-               igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh;
-               snprintf(igb_attr->name, sizeof(igb_attr->name),
-                        "temp%u_max", offset);
-               break;
-       case IGB_HWMON_TYPE_MAX:
-               igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh;
-               snprintf(igb_attr->name, sizeof(igb_attr->name),
-                        "temp%u_crit", offset);
-               break;
-       default:
-               rc = -EPERM;
-               return rc;
-       }
-
-       /* These always the same regardless of type */
-       igb_attr->sensor =
-               &adapter->hw.mac.thermal_sensor_data.sensor[offset];
-       igb_attr->hw = &adapter->hw;
-       igb_attr->dev_attr.store = NULL;
-       igb_attr->dev_attr.attr.mode = S_IRUGO;
-       igb_attr->dev_attr.attr.name = igb_attr->name;
-       sysfs_attr_init(&igb_attr->dev_attr.attr);
-       rc = device_create_file(&adapter->pdev->dev,
-                               &igb_attr->dev_attr);
-       if (rc == 0)
-               ++adapter->igb_hwmon_buff.n_hwmon;
-
-       return rc;
-}
-
-static void igb_sysfs_del_adapter(struct igb_adapter *adapter)
-{
-       int i;
-
-       if (adapter == NULL)
-               return;
-
-       for (i = 0; i < adapter->igb_hwmon_buff.n_hwmon; i++) {
-               device_remove_file(&adapter->pdev->dev,
-                          &adapter->igb_hwmon_buff.hwmon_list[i].dev_attr);
-       }
-
-       kfree(adapter->igb_hwmon_buff.hwmon_list);
-
-       if (adapter->igb_hwmon_buff.device)
-               hwmon_device_unregister(adapter->igb_hwmon_buff.device);
-}
-
-/* called from igb_main.c */
-void igb_sysfs_exit(struct igb_adapter *adapter)
-{
-       igb_sysfs_del_adapter(adapter);
-}
-
-/* called from igb_main.c */
-int igb_sysfs_init(struct igb_adapter *adapter)
-{
-       struct hwmon_buff *igb_hwmon = &adapter->igb_hwmon_buff;
-       unsigned int i;
-       int n_attrs;
-       int rc = 0;
-#ifdef HAVE_I2C_SUPPORT
-       struct i2c_client *client = NULL;
-#endif /* HAVE_I2C_SUPPORT */
-
-       /* If this method isn't defined we don't support thermals */
-       if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL)
-               goto exit;
-
-       /* Don't create thermal hwmon interface if no sensors present */
-       rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw));
-               if (rc)
-                       goto exit;
-#ifdef HAVE_I2C_SUPPORT
-       /* init i2c_client */
-       client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info);
-       if (client == NULL) {
-               dev_info(&adapter->pdev->dev,
-                       "Failed to create new i2c device..\n");
-               goto exit;
-       }
-       adapter->i2c_client = client;
-#endif /* HAVE_I2C_SUPPORT */
-
-       /* Allocation space for max attributes
-        * max num sensors * values (loc, temp, max, caution)
-        */
-       n_attrs = E1000_MAX_SENSORS * 4;
-       igb_hwmon->hwmon_list = kcalloc(n_attrs, sizeof(struct hwmon_attr),
-                                         GFP_KERNEL);
-       if (!igb_hwmon->hwmon_list) {
-               rc = -ENOMEM;
-               goto err;
-       }
-
-       igb_hwmon->device = hwmon_device_register(&adapter->pdev->dev);
-       if (IS_ERR(igb_hwmon->device)) {
-               rc = PTR_ERR(igb_hwmon->device);
-               goto err;
-       }
-
-       for (i = 0; i < E1000_MAX_SENSORS; i++) {
-
-               /* Only create hwmon sysfs entries for sensors that have
-                * meaningful data.
-                */
-               if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0)
-                       continue;
-
-               /* Bail if any hwmon attr struct fails to initialize */
-               rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION);
-               rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC);
-               rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP);
-               rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX);
-               if (rc)
-                       goto err;
-       }
-
-       goto exit;
-
-err:
-       igb_sysfs_del_adapter(adapter);
-exit:
-       return rc;
-}
-#endif /* IGB_HWMON */
index 96acec5..f4dca5a 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
@@ -76,7 +76,7 @@ static const char igb_driver_string[] =
 static const char igb_copyright[] =
                                "Copyright (c) 2007-2013 Intel Corporation.";
 
-static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
+const struct pci_device_id igb_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) },
@@ -195,7 +195,11 @@ static void igb_process_mdd_event(struct igb_adapter *);
 #ifdef IFLA_VF_MAX
 static int igb_ndo_set_vf_mac( struct net_device *netdev, int vf, u8 *mac);
 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
+#ifdef HAVE_VF_VLAN_PROTO
+                               int vf, u16 vlan, u8 qos, __be16 vlan_proto);
+#else
                                int vf, u16 vlan, u8 qos);
+#endif
 #ifdef HAVE_VF_SPOOFCHK_CONFIGURE
 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
                                bool setting);
@@ -1558,6 +1562,7 @@ static void igb_check_swap_media(struct igb_adapter *adapter)
        ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
        connsw = E1000_READ_REG(hw, E1000_CONNSW);
        link = igb_has_link(adapter);
+       (void) link;
 
        /* need to live swap if current media is copper and we have fiber/serdes
         * to go to.
@@ -6411,7 +6416,11 @@ static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
 }
 
 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
+#ifdef HAVE_VF_VLAN_PROTO
+                              int vf, u16 vlan, u8 qos, __be16 vlan_proto)
+#else
                               int vf, u16 vlan, u8 qos)
+#endif
 {
        int err = 0;
        struct igb_adapter *adapter = netdev_priv(netdev);
@@ -6419,6 +6428,12 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev,
        /* VLAN IDs accepted range 0-4094 */
        if ((vf >= adapter->vfs_allocated_count) || (vlan > VLAN_VID_MASK-1) || (qos > 7))
                return -EINVAL;
+
+#ifdef HAVE_VF_VLAN_PROTO
+       if (vlan_proto != htons(ETH_P_8021Q))
+               return -EPROTONOSUPPORT;
+#endif
+
        if (vlan || qos) {
                err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
                if (err)
@@ -6579,7 +6594,12 @@ static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
        if (adapter->vf_data[vf].pf_vlan)
                igb_ndo_set_vf_vlan(adapter->netdev, vf,
                                    adapter->vf_data[vf].pf_vlan,
+#ifdef HAVE_VF_VLAN_PROTO
+                                   adapter->vf_data[vf].pf_qos,
+                                   htons(ETH_P_8021Q));
+#else
                                    adapter->vf_data[vf].pf_qos);
+#endif
        else
                igb_clear_vf_vfta(adapter, vf);
 #endif
index f79ce7c..c922ca2 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c
deleted file mode 100644 (file)
index 66236d2..0000000
+++ /dev/null
@@ -1,363 +0,0 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <[email protected]>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "igb.h"
-#include "e1000_82575.h"
-#include "e1000_hw.h"
-
-#ifdef IGB_PROCFS
-#ifndef IGB_HWMON
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/proc_fs.h>
-#include <linux/device.h>
-#include <linux/netdevice.h>
-
-static struct proc_dir_entry *igb_top_dir = NULL;
-
-
-bool igb_thermal_present(struct igb_adapter *adapter)
-{
-       s32 status;
-       struct e1000_hw *hw;
-
-       if (adapter == NULL)
-               return false;
-       hw = &adapter->hw;
-
-       /*
-        * Only set I2C bit-bang mode if an external thermal sensor is
-        * supported on this device.
-        */
-       if (adapter->ets) {
-               status = e1000_set_i2c_bb(hw);
-               if (status != E1000_SUCCESS)
-                       return false;
-       }
-
-       status = hw->mac.ops.init_thermal_sensor_thresh(hw);
-       if (status != E1000_SUCCESS)
-               return false;
-
-       return true;
-}
-
-
-static int igb_macburn(char *page, char **start, off_t off, int count,
-                       int *eof, void *data)
-{
-       struct e1000_hw *hw;
-       struct igb_adapter *adapter = (struct igb_adapter *)data;
-       if (adapter == NULL)
-               return snprintf(page, count, "error: no adapter\n");
-
-       hw = &adapter->hw;
-       if (hw == NULL)
-               return snprintf(page, count, "error: no hw data\n");
-
-       return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n",
-                      (unsigned int)hw->mac.perm_addr[0],
-                      (unsigned int)hw->mac.perm_addr[1],
-                      (unsigned int)hw->mac.perm_addr[2],
-                      (unsigned int)hw->mac.perm_addr[3],
-                      (unsigned int)hw->mac.perm_addr[4],
-                      (unsigned int)hw->mac.perm_addr[5]);
-}
-
-static int igb_macadmn(char *page, char **start, off_t off,
-                      int count, int *eof, void *data)
-{
-       struct e1000_hw *hw;
-       struct igb_adapter *adapter = (struct igb_adapter *)data;
-       if (adapter == NULL)
-               return snprintf(page, count, "error: no adapter\n");
-
-       hw = &adapter->hw;
-       if (hw == NULL)
-               return snprintf(page, count, "error: no hw data\n");
-
-       return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n",
-                      (unsigned int)hw->mac.addr[0],
-                      (unsigned int)hw->mac.addr[1],
-                      (unsigned int)hw->mac.addr[2],
-                      (unsigned int)hw->mac.addr[3],
-                      (unsigned int)hw->mac.addr[4],
-                      (unsigned int)hw->mac.addr[5]);
-}
-
-static int igb_numeports(char *page, char **start, off_t off, int count,
-                        int *eof, void *data)
-{
-       struct e1000_hw *hw;
-       int ports;
-       struct igb_adapter *adapter = (struct igb_adapter *)data;
-       if (adapter == NULL)
-               return snprintf(page, count, "error: no adapter\n");
-
-       hw = &adapter->hw;
-       if (hw == NULL)
-               return snprintf(page, count, "error: no hw data\n");
-
-       ports = 4;
-
-       return snprintf(page, count, "%d\n", ports);
-}
-
-static int igb_porttype(char *page, char **start, off_t off, int count,
-                       int *eof, void *data)
-{
-       struct igb_adapter *adapter = (struct igb_adapter *)data;
-       if (adapter == NULL)
-               return snprintf(page, count, "error: no adapter\n");
-
-       return snprintf(page, count, "%d\n",
-                       test_bit(__IGB_DOWN, &adapter->state));
-}
-
-static int igb_therm_location(char *page, char **start, off_t off,
-                                    int count, int *eof, void *data)
-{
-       struct igb_therm_proc_data *therm_data =
-               (struct igb_therm_proc_data *)data;
-
-       if (therm_data == NULL)
-               return snprintf(page, count, "error: no therm_data\n");
-
-       return snprintf(page, count, "%d\n", therm_data->sensor_data->location);
-}
-
-static int igb_therm_maxopthresh(char *page, char **start, off_t off,
-                                   int count, int *eof, void *data)
-{
-       struct igb_therm_proc_data *therm_data =
-               (struct igb_therm_proc_data *)data;
-
-       if (therm_data == NULL)
-               return snprintf(page, count, "error: no therm_data\n");
-
-       return snprintf(page, count, "%d\n",
-                       therm_data->sensor_data->max_op_thresh);
-}
-
-static int igb_therm_cautionthresh(char *page, char **start, off_t off,
-                                     int count, int *eof, void *data)
-{
-       struct igb_therm_proc_data *therm_data =
-               (struct igb_therm_proc_data *)data;
-
-       if (therm_data == NULL)
-               return snprintf(page, count, "error: no therm_data\n");
-
-       return snprintf(page, count, "%d\n",
-                       therm_data->sensor_data->caution_thresh);
-}
-
-static int igb_therm_temp(char *page, char **start, off_t off,
-                            int count, int *eof, void *data)
-{
-       s32 status;
-       struct igb_therm_proc_data *therm_data =
-               (struct igb_therm_proc_data *)data;
-
-       if (therm_data == NULL)
-               return snprintf(page, count, "error: no therm_data\n");
-
-       status = e1000_get_thermal_sensor_data(therm_data->hw);
-       if (status != E1000_SUCCESS)
-               snprintf(page, count, "error: status %d returned\n", status);
-
-       return snprintf(page, count, "%d\n", therm_data->sensor_data->temp);
-}
-
-struct igb_proc_type{
-       char name[32];
-       int (*read)(char*, char**, off_t, int, int*, void*);
-};
-
-struct igb_proc_type igb_proc_entries[] = {
-       {"numeports", &igb_numeports},
-       {"porttype", &igb_porttype},
-       {"macburn", &igb_macburn},
-       {"macadmn", &igb_macadmn},
-       {"", NULL}
-};
-
-struct igb_proc_type igb_internal_entries[] = {
-       {"location", &igb_therm_location},
-       {"temp", &igb_therm_temp},
-       {"cautionthresh", &igb_therm_cautionthresh},
-       {"maxopthresh", &igb_therm_maxopthresh},
-       {"", NULL}
-};
-
-void igb_del_proc_entries(struct igb_adapter *adapter)
-{
-       int index, i;
-       char buf[16];   /* much larger than the sensor number will ever be */
-
-       if (igb_top_dir == NULL)
-               return;
-
-       for (i = 0; i < E1000_MAX_SENSORS; i++) {
-               if (adapter->therm_dir[i] == NULL)
-                       continue;
-
-               for (index = 0; ; index++) {
-                       if (igb_internal_entries[index].read == NULL)
-                               break;
-
-                        remove_proc_entry(igb_internal_entries[index].name,
-                                          adapter->therm_dir[i]);
-               }
-               snprintf(buf, sizeof(buf), "sensor_%d", i);
-               remove_proc_entry(buf, adapter->info_dir);
-       }
-
-       if (adapter->info_dir != NULL) {
-               for (index = 0; ; index++) {
-                       if (igb_proc_entries[index].read == NULL)
-                               break;
-                       remove_proc_entry(igb_proc_entries[index].name,
-                                         adapter->info_dir);
-               }
-               remove_proc_entry("info", adapter->eth_dir);
-       }
-
-       if (adapter->eth_dir != NULL)
-               remove_proc_entry(pci_name(adapter->pdev), igb_top_dir);
-}
-
-/* called from igb_main.c */
-void igb_procfs_exit(struct igb_adapter *adapter)
-{
-       igb_del_proc_entries(adapter);
-}
-
-int igb_procfs_topdir_init(void)
-{
-       igb_top_dir = proc_mkdir("driver/igb", NULL);
-       if (igb_top_dir == NULL)
-               return -ENOMEM;
-
-       return 0;
-}
-
-void igb_procfs_topdir_exit(void)
-{
-       remove_proc_entry("driver/igb", NULL);
-}
-
-/* called from igb_main.c */
-int igb_procfs_init(struct igb_adapter *adapter)
-{
-       int rc = 0;
-       int i;
-       int index;
-       char buf[16];   /* much larger than the sensor number will ever be */
-
-       adapter->eth_dir = NULL;
-       adapter->info_dir = NULL;
-       for (i = 0; i < E1000_MAX_SENSORS; i++)
-               adapter->therm_dir[i] = NULL;
-
-       if ( igb_top_dir == NULL ) {
-               rc = -ENOMEM;
-               goto fail;
-       }
-
-       adapter->eth_dir = proc_mkdir(pci_name(adapter->pdev), igb_top_dir);
-       if (adapter->eth_dir == NULL) {
-               rc = -ENOMEM;
-               goto fail;
-       }
-
-       adapter->info_dir = proc_mkdir("info", adapter->eth_dir);
-       if (adapter->info_dir == NULL) {
-               rc = -ENOMEM;
-               goto fail;
-       }
-       for (index = 0; ; index++) {
-               if (igb_proc_entries[index].read == NULL) {
-                       break;
-               }
-               if (!(create_proc_read_entry(igb_proc_entries[index].name,
-                                          0444,
-                                          adapter->info_dir,
-                                          igb_proc_entries[index].read,
-                                          adapter))) {
-
-                       rc = -ENOMEM;
-                       goto fail;
-               }
-       }
-       if (igb_thermal_present(adapter) == false)
-               goto exit;
-
-       for (i = 0; i < E1000_MAX_SENSORS; i++) {
-
-                if (adapter->hw.mac.thermal_sensor_data.sensor[i].location== 0)
-                       continue;
-
-               snprintf(buf, sizeof(buf), "sensor_%d", i);
-               adapter->therm_dir[i] = proc_mkdir(buf, adapter->info_dir);
-               if (adapter->therm_dir[i] == NULL) {
-                       rc = -ENOMEM;
-                       goto fail;
-               }
-               for (index = 0; ; index++) {
-                       if (igb_internal_entries[index].read == NULL)
-                               break;
-                       /*
-                        * therm_data struct contains pointer the read func
-                        * will be needing
-                        */
-                       adapter->therm_data[i].hw = &adapter->hw;
-                       adapter->therm_data[i].sensor_data =
-                               &adapter->hw.mac.thermal_sensor_data.sensor[i];
-
-                       if (!(create_proc_read_entry(
-                                          igb_internal_entries[index].name,
-                                          0444,
-                                          adapter->therm_dir[i],
-                                          igb_internal_entries[index].read,
-                                          &adapter->therm_data[i]))) {
-                               rc = -ENOMEM;
-                               goto fail;
-                       }
-               }
-       }
-       goto exit;
-
-fail:
-       igb_del_proc_entries(adapter);
-exit:
-       return rc;
-}
-
-#endif /* !IGB_HWMON */
-#endif /* IGB_PROCFS */
diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c
deleted file mode 100644 (file)
index 454b70c..0000000
+++ /dev/null
@@ -1,944 +0,0 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <[email protected]>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-/******************************************************************************
- Copyright(c) 2011 Richard Cochran <[email protected]> for some of the
- 82576 and 82580 code
-******************************************************************************/
-
-#include "igb.h"
-
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/pci.h>
-#include <linux/ptp_classify.h>
-
-#define INCVALUE_MASK          0x7fffffff
-#define ISGN                   0x80000000
-
-/*
- * The 82580 timesync updates the system timer every 8ns by 8ns,
- * and this update value cannot be reprogrammed.
- *
- * Neither the 82576 nor the 82580 offer registers wide enough to hold
- * nanoseconds time values for very long. For the 82580, SYSTIM always
- * counts nanoseconds, but the upper 24 bits are not available. The
- * frequency is adjusted by changing the 32 bit fractional nanoseconds
- * register, TIMINCA.
- *
- * For the 82576, the SYSTIM register time unit is affect by the
- * choice of the 24 bit TININCA:IV (incvalue) field. Five bits of this
- * field are needed to provide the nominal 16 nanosecond period,
- * leaving 19 bits for fractional nanoseconds.
- *
- * We scale the NIC clock cycle by a large factor so that relatively
- * small clock corrections can be added or subtracted at each clock
- * tick. The drawbacks of a large factor are a) that the clock
- * register overflows more quickly (not such a big deal) and b) that
- * the increment per tick has to fit into 24 bits.  As a result we
- * need to use a shift of 19 so we can fit a value of 16 into the
- * TIMINCA register.
- *
- *
- *             SYSTIMH            SYSTIML
- *        +--------------+   +---+---+------+
- *  82576 |      32      |   | 8 | 5 |  19  |
- *        +--------------+   +---+---+------+
- *         \________ 45 bits _______/  fract
- *
- *        +----------+---+   +--------------+
- *  82580 |    24    | 8 |   |      32      |
- *        +----------+---+   +--------------+
- *          reserved  \______ 40 bits _____/
- *
- *
- * The 45 bit 82576 SYSTIM overflows every
- *   2^45 * 10^-9 / 3600 = 9.77 hours.
- *
- * The 40 bit 82580 SYSTIM overflows every
- *   2^40 * 10^-9 /  60  = 18.3 minutes.
- */
-
-#define IGB_SYSTIM_OVERFLOW_PERIOD     (HZ * 60 * 9)
-#define IGB_PTP_TX_TIMEOUT             (HZ * 15)
-#define INCPERIOD_82576                        (1 << E1000_TIMINCA_16NS_SHIFT)
-#define INCVALUE_82576_MASK            ((1 << E1000_TIMINCA_16NS_SHIFT) - 1)
-#define INCVALUE_82576                 (16 << IGB_82576_TSYNC_SHIFT)
-#define IGB_NBITS_82580                        40
-
-/*
- * SYSTIM read access for the 82576
- */
-
-static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc)
-{
-       struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
-       struct e1000_hw *hw = &igb->hw;
-       u64 val;
-       u32 lo, hi;
-
-       lo = E1000_READ_REG(hw, E1000_SYSTIML);
-       hi = E1000_READ_REG(hw, E1000_SYSTIMH);
-
-       val = ((u64) hi) << 32;
-       val |= lo;
-
-       return val;
-}
-
-/*
- * SYSTIM read access for the 82580
- */
-
-static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc)
-{
-       struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
-       struct e1000_hw *hw = &igb->hw;
-       u64 val;
-       u32 lo, hi;
-
-       /* The timestamp latches on lowest register read. For the 82580
-        * the lowest register is SYSTIMR instead of SYSTIML.  However we only
-        * need to provide nanosecond resolution, so we just ignore it.
-        */
-       E1000_READ_REG(hw, E1000_SYSTIMR);
-       lo = E1000_READ_REG(hw, E1000_SYSTIML);
-       hi = E1000_READ_REG(hw, E1000_SYSTIMH);
-
-       val = ((u64) hi) << 32;
-       val |= lo;
-
-       return val;
-}
-
-/*
- * SYSTIM read access for I210/I211
- */
-
-static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts)
-{
-       struct e1000_hw *hw = &adapter->hw;
-       u32 sec, nsec;
-
-       /* The timestamp latches on lowest register read. For I210/I211, the
-        * lowest register is SYSTIMR. Since we only need to provide nanosecond
-        * resolution, we can ignore it.
-        */
-       E1000_READ_REG(hw, E1000_SYSTIMR);
-       nsec = E1000_READ_REG(hw, E1000_SYSTIML);
-       sec = E1000_READ_REG(hw, E1000_SYSTIMH);
-
-       ts->tv_sec = sec;
-       ts->tv_nsec = nsec;
-}
-
-static void igb_ptp_write_i210(struct igb_adapter *adapter,
-                              const struct timespec *ts)
-{
-       struct e1000_hw *hw = &adapter->hw;
-
-       /*
-        * Writing the SYSTIMR register is not necessary as it only provides
-        * sub-nanosecond resolution.
-        */
-       E1000_WRITE_REG(hw, E1000_SYSTIML, ts->tv_nsec);
-       E1000_WRITE_REG(hw, E1000_SYSTIMH, ts->tv_sec);
-}
-
-/**
- * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp
- * @adapter: board private structure
- * @hwtstamps: timestamp structure to update
- * @systim: unsigned 64bit system time value.
- *
- * We need to convert the system time value stored in the RX/TXSTMP registers
- * into a hwtstamp which can be used by the upper level timestamping functions.
- *
- * The 'tmreg_lock' spinlock is used to protect the consistency of the
- * system time value. This is needed because reading the 64 bit time
- * value involves reading two (or three) 32 bit registers. The first
- * read latches the value. Ditto for writing.
- *
- * In addition, here have extended the system time with an overflow
- * counter in software.
- **/
-static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter,
-                                      struct skb_shared_hwtstamps *hwtstamps,
-                                      u64 systim)
-{
-       unsigned long flags;
-       u64 ns;
-
-       switch (adapter->hw.mac.type) {
-       case e1000_82576:
-       case e1000_82580:
-       case e1000_i350:
-       case e1000_i354:
-               spin_lock_irqsave(&adapter->tmreg_lock, flags);
-
-               ns = timecounter_cyc2time(&adapter->tc, systim);
-
-               spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
-
-               memset(hwtstamps, 0, sizeof(*hwtstamps));
-               hwtstamps->hwtstamp = ns_to_ktime(ns);
-               break;
-       case e1000_i210:
-       case e1000_i211:
-               memset(hwtstamps, 0, sizeof(*hwtstamps));
-               /* Upper 32 bits contain s, lower 32 bits contain ns. */
-               hwtstamps->hwtstamp = ktime_set(systim >> 32,
-                                               systim & 0xFFFFFFFF);
-               break;
-       default:
-               break;
-       }
-}
-
-/*
- * PTP clock operations
- */
-
-static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb)
-{
-       struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-                                              ptp_caps);
-       struct e1000_hw *hw = &igb->hw;
-       int neg_adj = 0;
-       u64 rate;
-       u32 incvalue;
-
-       if (ppb < 0) {
-               neg_adj = 1;
-               ppb = -ppb;
-       }
-       rate = ppb;
-       rate <<= 14;
-       rate = div_u64(rate, 1953125);
-
-       incvalue = 16 << IGB_82576_TSYNC_SHIFT;
-
-       if (neg_adj)
-               incvalue -= rate;
-       else
-               incvalue += rate;
-
-       E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | (incvalue & INCVALUE_82576_MASK));
-
-       return 0;
-}
-
-static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb)
-{
-       struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-                                              ptp_caps);
-       struct e1000_hw *hw = &igb->hw;
-       int neg_adj = 0;
-       u64 rate;
-       u32 inca;
-
-       if (ppb < 0) {
-               neg_adj = 1;
-               ppb = -ppb;
-       }
-       rate = ppb;
-       rate <<= 26;
-       rate = div_u64(rate, 1953125);
-
-       /* At 2.5G speeds, the TIMINCA register on I354 updates the clock 2.5x
-        * as quickly. Account for this by dividing the adjustment by 2.5.
-        */
-       if (hw->mac.type == e1000_i354) {
-               u32 status = E1000_READ_REG(hw, E1000_STATUS);
-
-               if ((status & E1000_STATUS_2P5_SKU) &&
-                   !(status & E1000_STATUS_2P5_SKU_OVER)) {
-                       rate <<= 1;
-                       rate = div_u64(rate, 5);
-               }
-       }
-
-       inca = rate & INCVALUE_MASK;
-       if (neg_adj)
-               inca |= ISGN;
-
-       E1000_WRITE_REG(hw, E1000_TIMINCA, inca);
-
-       return 0;
-}
-
-static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta)
-{
-       struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-                                              ptp_caps);
-       unsigned long flags;
-       s64 now;
-
-       spin_lock_irqsave(&igb->tmreg_lock, flags);
-
-       now = timecounter_read(&igb->tc);
-       now += delta;
-       timecounter_init(&igb->tc, &igb->cc, now);
-
-       spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
-       return 0;
-}
-
-static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta)
-{
-       struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-                                              ptp_caps);
-       unsigned long flags;
-       struct timespec now, then = ns_to_timespec(delta);
-
-       spin_lock_irqsave(&igb->tmreg_lock, flags);
-
-       igb_ptp_read_i210(igb, &now);
-       now = timespec_add(now, then);
-       igb_ptp_write_i210(igb, (const struct timespec *)&now);
-
-       spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
-       return 0;
-}
-
-static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp,
-                                struct timespec *ts)
-{
-       struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-                                              ptp_caps);
-       unsigned long flags;
-       u64 ns;
-       u32 remainder;
-
-       spin_lock_irqsave(&igb->tmreg_lock, flags);
-
-       ns = timecounter_read(&igb->tc);
-
-       spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
-       ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
-       ts->tv_nsec = remainder;
-
-       return 0;
-}
-
-static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp,
-                               struct timespec *ts)
-{
-       struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-                                              ptp_caps);
-       unsigned long flags;
-
-       spin_lock_irqsave(&igb->tmreg_lock, flags);
-
-       igb_ptp_read_i210(igb, ts);
-
-       spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
-       return 0;
-}
-
-static int igb_ptp_settime_82576(struct ptp_clock_info *ptp,
-                                const struct timespec *ts)
-{
-       struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-                                              ptp_caps);
-       unsigned long flags;
-       u64 ns;
-
-       ns = ts->tv_sec * 1000000000ULL;
-       ns += ts->tv_nsec;
-
-       spin_lock_irqsave(&igb->tmreg_lock, flags);
-
-       timecounter_init(&igb->tc, &igb->cc, ns);
-
-       spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
-       return 0;
-}
-
-static int igb_ptp_settime_i210(struct ptp_clock_info *ptp,
-                               const struct timespec *ts)
-{
-       struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-                                              ptp_caps);
-       unsigned long flags;
-
-       spin_lock_irqsave(&igb->tmreg_lock, flags);
-
-       igb_ptp_write_i210(igb, ts);
-
-       spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
-       return 0;
-}
-
-static int igb_ptp_enable(struct ptp_clock_info *ptp,
-                         struct ptp_clock_request *rq, int on)
-{
-       return -EOPNOTSUPP;
-}
-
-/**
- * igb_ptp_tx_work
- * @work: pointer to work struct
- *
- * This work function polls the TSYNCTXCTL valid bit to determine when a
- * timestamp has been taken for the current stored skb.
- */
-void igb_ptp_tx_work(struct work_struct *work)
-{
-       struct igb_adapter *adapter = container_of(work, struct igb_adapter,
-                                                  ptp_tx_work);
-       struct e1000_hw *hw = &adapter->hw;
-       u32 tsynctxctl;
-
-       if (!adapter->ptp_tx_skb)
-               return;
-
-       if (time_is_before_jiffies(adapter->ptp_tx_start +
-                                  IGB_PTP_TX_TIMEOUT)) {
-               dev_kfree_skb_any(adapter->ptp_tx_skb);
-               adapter->ptp_tx_skb = NULL;
-               adapter->tx_hwtstamp_timeouts++;
-               dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang");
-               return;
-       }
-
-       tsynctxctl = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
-       if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
-               igb_ptp_tx_hwtstamp(adapter);
-       else
-               /* reschedule to check later */
-               schedule_work(&adapter->ptp_tx_work);
-}
-
-static void igb_ptp_overflow_check(struct work_struct *work)
-{
-       struct igb_adapter *igb =
-               container_of(work, struct igb_adapter, ptp_overflow_work.work);
-       struct timespec ts;
-
-       igb->ptp_caps.gettime(&igb->ptp_caps, &ts);
-
-       pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec);
-
-       schedule_delayed_work(&igb->ptp_overflow_work,
-                             IGB_SYSTIM_OVERFLOW_PERIOD);
-}
-
-/**
- * igb_ptp_rx_hang - detect error case when Rx timestamp registers latched
- * @adapter: private network adapter structure
- *
- * This watchdog task is scheduled to detect error case where hardware has
- * dropped an Rx packet that was timestamped when the ring is full. The
- * particular error is rare but leaves the device in a state unable to timestamp
- * any future packets.
- */
-void igb_ptp_rx_hang(struct igb_adapter *adapter)
-{
-       struct e1000_hw *hw = &adapter->hw;
-       struct igb_ring *rx_ring;
-       u32 tsyncrxctl = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
-       unsigned long rx_event;
-       int n;
-
-       if (hw->mac.type != e1000_82576)
-               return;
-
-       /* If we don't have a valid timestamp in the registers, just update the
-        * timeout counter and exit
-        */
-       if (!(tsyncrxctl & E1000_TSYNCRXCTL_VALID)) {
-               adapter->last_rx_ptp_check = jiffies;
-               return;
-       }
-
-       /* Determine the most recent watchdog or rx_timestamp event */
-       rx_event = adapter->last_rx_ptp_check;
-       for (n = 0; n < adapter->num_rx_queues; n++) {
-               rx_ring = adapter->rx_ring[n];
-               if (time_after(rx_ring->last_rx_timestamp, rx_event))
-                       rx_event = rx_ring->last_rx_timestamp;
-       }
-
-       /* Only need to read the high RXSTMP register to clear the lock */
-       if (time_is_before_jiffies(rx_event + 5 * HZ)) {
-               E1000_READ_REG(hw, E1000_RXSTMPH);
-               adapter->last_rx_ptp_check = jiffies;
-               adapter->rx_hwtstamp_cleared++;
-               dev_warn(&adapter->pdev->dev, "clearing Rx timestamp hang");
-       }
-}
-
-/**
- * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp
- * @adapter: Board private structure.
- *
- * If we were asked to do hardware stamping and such a time stamp is
- * available, then it must have been for this skb here because we only
- * allow only one such packet into the queue.
- */
-void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
-{
-       struct e1000_hw *hw = &adapter->hw;
-       struct skb_shared_hwtstamps shhwtstamps;
-       u64 regval;
-
-       regval = E1000_READ_REG(hw, E1000_TXSTMPL);
-       regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32;
-
-       igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
-       skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
-       dev_kfree_skb_any(adapter->ptp_tx_skb);
-       adapter->ptp_tx_skb = NULL;
-}
-
-/**
- * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp
- * @q_vector: Pointer to interrupt specific structure
- * @va: Pointer to address containing Rx buffer
- * @skb: Buffer containing timestamp and packet
- *
- * This function is meant to retrieve a timestamp from the first buffer of an
- * incoming frame.  The value is stored in little endian format starting on
- * byte 8.
- */
-void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
-                        unsigned char *va,
-                        struct sk_buff *skb)
-{
-       __le64 *regval = (__le64 *)va;
-
-       /*
-        * The timestamp is recorded in little endian format.
-        * DWORD: 0        1        2        3
-        * Field: Reserved Reserved SYSTIML  SYSTIMH
-        */
-       igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
-                                  le64_to_cpu(regval[1]));
-}
-
-/**
- * igb_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register
- * @q_vector: Pointer to interrupt specific structure
- * @skb: Buffer containing timestamp and packet
- *
- * This function is meant to retrieve a timestamp from the internal registers
- * of the adapter and store it in the skb.
- */
-void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
-                        struct sk_buff *skb)
-{
-       struct igb_adapter *adapter = q_vector->adapter;
-       struct e1000_hw *hw = &adapter->hw;
-       u64 regval;
-
-       /*
-        * If this bit is set, then the RX registers contain the time stamp. No
-        * other packet will be time stamped until we read these registers, so
-        * read the registers to make them available again. Because only one
-        * packet can be time stamped at a time, we know that the register
-        * values must belong to this one here and therefore we don't need to
-        * compare any of the additional attributes stored for it.
-        *
-        * If nothing went wrong, then it should have a shared tx_flags that we
-        * can turn into a skb_shared_hwtstamps.
-        */
-       if (!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
-               return;
-
-       regval = E1000_READ_REG(hw, E1000_RXSTMPL);
-       regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32;
-
-       igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
-}
-
-/**
- * igb_ptp_hwtstamp_ioctl - control hardware time stamping
- * @netdev:
- * @ifreq:
- * @cmd:
- *
- * Outgoing time stamping can be enabled and disabled. Play nice and
- * disable it when requested, although it shouldn't case any overhead
- * when no packet needs it. At most one packet in the queue may be
- * marked for time stamping, otherwise it would be impossible to tell
- * for sure to which packet the hardware time stamp belongs.
- *
- * Incoming time stamping has to be configured via the hardware
- * filters. Not all combinations are supported, in particular event
- * type has to be specified. Matching the kind of event packet is
- * not supported, with the exception of "all V2 events regardless of
- * level 2 or 4".
- *
- **/
-int igb_ptp_hwtstamp_ioctl(struct net_device *netdev,
-                          struct ifreq *ifr, int cmd)
-{
-       struct igb_adapter *adapter = netdev_priv(netdev);
-       struct e1000_hw *hw = &adapter->hw;
-       struct hwtstamp_config config;
-       u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
-       u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
-       u32 tsync_rx_cfg = 0;
-       bool is_l4 = false;
-       bool is_l2 = false;
-       u32 regval;
-
-       if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
-               return -EFAULT;
-
-       /* reserved for future extensions */
-       if (config.flags)
-               return -EINVAL;
-
-       switch (config.tx_type) {
-       case HWTSTAMP_TX_OFF:
-               tsync_tx_ctl = 0;
-       case HWTSTAMP_TX_ON:
-               break;
-       default:
-               return -ERANGE;
-       }
-
-       switch (config.rx_filter) {
-       case HWTSTAMP_FILTER_NONE:
-               tsync_rx_ctl = 0;
-               break;
-       case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
-               tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
-               tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
-               is_l4 = true;
-               break;
-       case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-               tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
-               tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
-               is_l4 = true;
-               break;
-       case HWTSTAMP_FILTER_PTP_V2_EVENT:
-       case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
-       case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
-       case HWTSTAMP_FILTER_PTP_V2_SYNC:
-       case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
-       case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
-       case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-       case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
-       case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
-               tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
-               config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
-               is_l2 = true;
-               is_l4 = true;
-               break;
-       case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
-       case HWTSTAMP_FILTER_ALL:
-               /*
-                * 82576 cannot timestamp all packets, which it needs to do to
-                * support both V1 Sync and Delay_Req messages
-                */
-               if (hw->mac.type != e1000_82576) {
-                       tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
-                       config.rx_filter = HWTSTAMP_FILTER_ALL;
-                       break;
-               }
-               /* fall through */
-       default:
-               config.rx_filter = HWTSTAMP_FILTER_NONE;
-               return -ERANGE;
-       }
-
-       if (hw->mac.type == e1000_82575) {
-               if (tsync_rx_ctl | tsync_tx_ctl)
-                       return -EINVAL;
-               return 0;
-       }
-
-       /*
-        * Per-packet timestamping only works if all packets are
-        * timestamped, so enable timestamping in all packets as
-        * long as one rx filter was configured.
-        */
-       if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
-               tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
-               tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
-               config.rx_filter = HWTSTAMP_FILTER_ALL;
-               is_l2 = true;
-               is_l4 = true;
-
-               if ((hw->mac.type == e1000_i210) ||
-                   (hw->mac.type == e1000_i211)) {
-                       regval = E1000_READ_REG(hw, E1000_RXPBS);
-                       regval |= E1000_RXPBS_CFG_TS_EN;
-                       E1000_WRITE_REG(hw, E1000_RXPBS, regval);
-               }
-       }
-
-       /* enable/disable TX */
-       regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
-       regval &= ~E1000_TSYNCTXCTL_ENABLED;
-       regval |= tsync_tx_ctl;
-       E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
-
-       /* enable/disable RX */
-       regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
-       regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
-       regval |= tsync_rx_ctl;
-       E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
-
-       /* define which PTP packets are time stamped */
-       E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
-
-       /* define ethertype filter for timestamped packets */
-       if (is_l2)
-               E1000_WRITE_REG(hw, E1000_ETQF(3),
-                    (E1000_ETQF_FILTER_ENABLE | /* enable filter */
-                     E1000_ETQF_1588 | /* enable timestamping */
-                     ETH_P_1588));     /* 1588 eth protocol type */
-       else
-               E1000_WRITE_REG(hw, E1000_ETQF(3), 0);
-
-       /* L4 Queue Filter[3]: filter by destination port and protocol */
-       if (is_l4) {
-               u32 ftqf = (IPPROTO_UDP /* UDP */
-                       | E1000_FTQF_VF_BP /* VF not compared */
-                       | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
-                       | E1000_FTQF_MASK); /* mask all inputs */
-               ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
-
-               E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_EV_PORT));
-               E1000_WRITE_REG(hw, E1000_IMIREXT(3),
-                    (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
-               if (hw->mac.type == e1000_82576) {
-                       /* enable source port check */
-                       E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_EV_PORT));
-                       ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
-               }
-               E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf);
-       } else {
-               E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK);
-       }
-       E1000_WRITE_FLUSH(hw);
-
-       /* clear TX/RX time stamp registers, just to be sure */
-       regval = E1000_READ_REG(hw, E1000_TXSTMPL);
-       regval = E1000_READ_REG(hw, E1000_TXSTMPH);
-       regval = E1000_READ_REG(hw, E1000_RXSTMPL);
-       regval = E1000_READ_REG(hw, E1000_RXSTMPH);
-
-       return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
-               -EFAULT : 0;
-}
-
-void igb_ptp_init(struct igb_adapter *adapter)
-{
-       struct e1000_hw *hw = &adapter->hw;
-       struct net_device *netdev = adapter->netdev;
-
-       switch (hw->mac.type) {
-       case e1000_82576:
-               snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
-               adapter->ptp_caps.owner = THIS_MODULE;
-               adapter->ptp_caps.max_adj = 999999881;
-               adapter->ptp_caps.n_ext_ts = 0;
-               adapter->ptp_caps.pps = 0;
-               adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576;
-               adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
-               adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
-               adapter->ptp_caps.settime = igb_ptp_settime_82576;
-               adapter->ptp_caps.enable = igb_ptp_enable;
-               adapter->cc.read = igb_ptp_read_82576;
-               adapter->cc.mask = CLOCKSOURCE_MASK(64);
-               adapter->cc.mult = 1;
-               adapter->cc.shift = IGB_82576_TSYNC_SHIFT;
-               /* Dial the nominal frequency. */
-               E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 |
-                                                  INCVALUE_82576);
-               break;
-       case e1000_82580:
-       case e1000_i350:
-       case e1000_i354:
-               snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
-               adapter->ptp_caps.owner = THIS_MODULE;
-               adapter->ptp_caps.max_adj = 62499999;
-               adapter->ptp_caps.n_ext_ts = 0;
-               adapter->ptp_caps.pps = 0;
-               adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
-               adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
-               adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
-               adapter->ptp_caps.settime = igb_ptp_settime_82576;
-               adapter->ptp_caps.enable = igb_ptp_enable;
-               adapter->cc.read = igb_ptp_read_82580;
-               adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580);
-               adapter->cc.mult = 1;
-               adapter->cc.shift = 0;
-               /* Enable the timer functions by clearing bit 31. */
-               E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
-               break;
-       case e1000_i210:
-       case e1000_i211:
-               snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
-               adapter->ptp_caps.owner = THIS_MODULE;
-               adapter->ptp_caps.max_adj = 62499999;
-               adapter->ptp_caps.n_ext_ts = 0;
-               adapter->ptp_caps.pps = 0;
-               adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
-               adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210;
-               adapter->ptp_caps.gettime = igb_ptp_gettime_i210;
-               adapter->ptp_caps.settime = igb_ptp_settime_i210;
-               adapter->ptp_caps.enable = igb_ptp_enable;
-               /* Enable the timer functions by clearing bit 31. */
-               E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
-               break;
-       default:
-               adapter->ptp_clock = NULL;
-               return;
-       }
-
-       E1000_WRITE_FLUSH(hw);
-
-       spin_lock_init(&adapter->tmreg_lock);
-       INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
-
-       /* Initialize the clock and overflow work for devices that need it. */
-       if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
-               struct timespec ts = ktime_to_timespec(ktime_get_real());
-
-               igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
-       } else {
-               timecounter_init(&adapter->tc, &adapter->cc,
-                                ktime_to_ns(ktime_get_real()));
-
-               INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
-                                 igb_ptp_overflow_check);
-
-               schedule_delayed_work(&adapter->ptp_overflow_work,
-                                     IGB_SYSTIM_OVERFLOW_PERIOD);
-       }
-
-       /* Initialize the time sync interrupts for devices that support it. */
-       if (hw->mac.type >= e1000_82580) {
-               E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS);
-               E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS);
-       }
-
-       adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
-                                               &adapter->pdev->dev);
-       if (IS_ERR(adapter->ptp_clock)) {
-               adapter->ptp_clock = NULL;
-               dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
-       } else {
-               dev_info(&adapter->pdev->dev, "added PHC on %s\n",
-                        adapter->netdev->name);
-               adapter->flags |= IGB_FLAG_PTP;
-       }
-}
-
-/**
- * igb_ptp_stop - Disable PTP device and stop the overflow check.
- * @adapter: Board private structure.
- *
- * This function stops the PTP support and cancels the delayed work.
- **/
-void igb_ptp_stop(struct igb_adapter *adapter)
-{
-       switch (adapter->hw.mac.type) {
-       case e1000_82576:
-       case e1000_82580:
-       case e1000_i350:
-       case e1000_i354:
-               cancel_delayed_work_sync(&adapter->ptp_overflow_work);
-               break;
-       case e1000_i210:
-       case e1000_i211:
-               /* No delayed work to cancel. */
-               break;
-       default:
-               return;
-       }
-
-       cancel_work_sync(&adapter->ptp_tx_work);
-       if (adapter->ptp_tx_skb) {
-               dev_kfree_skb_any(adapter->ptp_tx_skb);
-               adapter->ptp_tx_skb = NULL;
-       }
-
-       if (adapter->ptp_clock) {
-               ptp_clock_unregister(adapter->ptp_clock);
-               dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
-                        adapter->netdev->name);
-               adapter->flags &= ~IGB_FLAG_PTP;
-       }
-}
-
-/**
- * igb_ptp_reset - Re-enable the adapter for PTP following a reset.
- * @adapter: Board private structure.
- *
- * This function handles the reset work required to re-enable the PTP device.
- **/
-void igb_ptp_reset(struct igb_adapter *adapter)
-{
-       struct e1000_hw *hw = &adapter->hw;
-
-       if (!(adapter->flags & IGB_FLAG_PTP))
-               return;
-
-       switch (adapter->hw.mac.type) {
-       case e1000_82576:
-               /* Dial the nominal frequency. */
-               E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 |
-                                                  INCVALUE_82576);
-               break;
-       case e1000_82580:
-       case e1000_i350:
-       case e1000_i354:
-       case e1000_i210:
-       case e1000_i211:
-               /* Enable the timer functions and interrupts. */
-               E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
-               E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS);
-               E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS);
-               break;
-       default:
-               /* No work to do. */
-               return;
-       }
-
-       /* Re-initialize the timer. */
-       if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
-               struct timespec ts = ktime_to_timespec(ktime_get_real());
-
-               igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
-       } else {
-               timecounter_init(&adapter->tc, &adapter->cc,
-                                ktime_to_ns(ktime_get_real()));
-       }
-}
index 18da64a..9d49b45 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 015c895..205da56 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index e51e7c4..c6d4c56 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c
deleted file mode 100644 (file)
index bde3a83..0000000
+++ /dev/null
@@ -1,1482 +0,0 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <[email protected]>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "igb.h"
-#include "kcompat.h"
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
-/* From lib/vsprintf.c */
-#include <asm/div64.h>
-
-static int skip_atoi(const char **s)
-{
-       int i=0;
-
-       while (isdigit(**s))
-               i = i*10 + *((*s)++) - '0';
-       return i;
-}
-
-#define _kc_ZEROPAD    1               /* pad with zero */
-#define _kc_SIGN       2               /* unsigned/signed long */
-#define _kc_PLUS       4               /* show plus */
-#define _kc_SPACE      8               /* space if plus */
-#define _kc_LEFT       16              /* left justified */
-#define _kc_SPECIAL    32              /* 0x */
-#define _kc_LARGE      64              /* use 'ABCDEF' instead of 'abcdef' */
-
-static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type)
-{
-       char c,sign,tmp[66];
-       const char *digits;
-       const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
-       const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-       int i;
-
-       digits = (type & _kc_LARGE) ? large_digits : small_digits;
-       if (type & _kc_LEFT)
-               type &= ~_kc_ZEROPAD;
-       if (base < 2 || base > 36)
-               return 0;
-       c = (type & _kc_ZEROPAD) ? '0' : ' ';
-       sign = 0;
-       if (type & _kc_SIGN) {
-               if (num < 0) {
-                       sign = '-';
-                       num = -num;
-                       size--;
-               } else if (type & _kc_PLUS) {
-                       sign = '+';
-                       size--;
-               } else if (type & _kc_SPACE) {
-                       sign = ' ';
-                       size--;
-               }
-       }
-       if (type & _kc_SPECIAL) {
-               if (base == 16)
-                       size -= 2;
-               else if (base == 8)
-                       size--;
-       }
-       i = 0;
-       if (num == 0)
-               tmp[i++]='0';
-       else while (num != 0)
-               tmp[i++] = digits[do_div(num,base)];
-       if (i > precision)
-               precision = i;
-       size -= precision;
-       if (!(type&(_kc_ZEROPAD+_kc_LEFT))) {
-               while(size-->0) {
-                       if (buf <= end)
-                               *buf = ' ';
-                       ++buf;
-               }
-       }
-       if (sign) {
-               if (buf <= end)
-                       *buf = sign;
-               ++buf;
-       }
-       if (type & _kc_SPECIAL) {
-               if (base==8) {
-                       if (buf <= end)
-                               *buf = '0';
-                       ++buf;
-               } else if (base==16) {
-                       if (buf <= end)
-                               *buf = '0';
-                       ++buf;
-                       if (buf <= end)
-                               *buf = digits[33];
-                       ++buf;
-               }
-       }
-       if (!(type & _kc_LEFT)) {
-               while (size-- > 0) {
-                       if (buf <= end)
-                               *buf = c;
-                       ++buf;
-               }
-       }
-       while (i < precision--) {
-               if (buf <= end)
-                       *buf = '0';
-               ++buf;
-       }
-       while (i-- > 0) {
-               if (buf <= end)
-                       *buf = tmp[i];
-               ++buf;
-       }
-       while (size-- > 0) {
-               if (buf <= end)
-                       *buf = ' ';
-               ++buf;
-       }
-       return buf;
-}
-
-int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
-{
-       int len;
-       unsigned long long num;
-       int i, base;
-       char *str, *end, c;
-       const char *s;
-
-       int flags;              /* flags to number() */
-
-       int field_width;        /* width of output field */
-       int precision;          /* min. # of digits for integers; max
-                                  number of chars for from string */
-       int qualifier;          /* 'h', 'l', or 'L' for integer fields */
-                               /* 'z' support added 23/7/1999 S.H.    */
-                               /* 'z' changed to 'Z' --davidm 1/25/99 */
-
-       str = buf;
-       end = buf + size - 1;
-
-       if (end < buf - 1) {
-               end = ((void *) -1);
-               size = end - buf + 1;
-       }
-
-       for (; *fmt ; ++fmt) {
-               if (*fmt != '%') {
-                       if (str <= end)
-                               *str = *fmt;
-                       ++str;
-                       continue;
-               }
-
-               /* process flags */
-               flags = 0;
-               repeat:
-                       ++fmt;          /* this also skips first '%' */
-                       switch (*fmt) {
-                               case '-': flags |= _kc_LEFT; goto repeat;
-                               case '+': flags |= _kc_PLUS; goto repeat;
-                               case ' ': flags |= _kc_SPACE; goto repeat;
-                               case '#': flags |= _kc_SPECIAL; goto repeat;
-                               case '0': flags |= _kc_ZEROPAD; goto repeat;
-                       }
-
-               /* get field width */
-               field_width = -1;
-               if (isdigit(*fmt))
-                       field_width = skip_atoi(&fmt);
-               else if (*fmt == '*') {
-                       ++fmt;
-                       /* it's the next argument */
-                       field_width = va_arg(args, int);
-                       if (field_width < 0) {
-                               field_width = -field_width;
-                               flags |= _kc_LEFT;
-                       }
-               }
-
-               /* get the precision */
-               precision = -1;
-               if (*fmt == '.') {
-                       ++fmt;
-                       if (isdigit(*fmt))
-                               precision = skip_atoi(&fmt);
-                       else if (*fmt == '*') {
-                               ++fmt;
-                               /* it's the next argument */
-                               precision = va_arg(args, int);
-                       }
-                       if (precision < 0)
-                               precision = 0;
-               }
-
-               /* get the conversion qualifier */
-               qualifier = -1;
-               if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
-                       qualifier = *fmt;
-                       ++fmt;
-               }
-
-               /* default base */
-               base = 10;
-
-               switch (*fmt) {
-                       case 'c':
-                               if (!(flags & _kc_LEFT)) {
-                                       while (--field_width > 0) {
-                                               if (str <= end)
-                                                       *str = ' ';
-                                               ++str;
-                                       }
-                               }
-                               c = (unsigned char) va_arg(args, int);
-                               if (str <= end)
-                                       *str = c;
-                               ++str;
-                               while (--field_width > 0) {
-                                       if (str <= end)
-                                               *str = ' ';
-                                       ++str;
-                               }
-                               continue;
-
-                       case 's':
-                               s = va_arg(args, char *);
-                               if (!s)
-                                       s = "<NULL>";
-
-                               len = strnlen(s, precision);
-
-                               if (!(flags & _kc_LEFT)) {
-                                       while (len < field_width--) {
-                                               if (str <= end)
-                                                       *str = ' ';
-                                               ++str;
-                                       }
-                               }
-                               for (i = 0; i < len; ++i) {
-                                       if (str <= end)
-                                               *str = *s;
-                                       ++str; ++s;
-                               }
-                               while (len < field_width--) {
-                                       if (str <= end)
-                                               *str = ' ';
-                                       ++str;
-                               }
-                               continue;
-
-                       case 'p':
-                               if (field_width == -1) {
-                                       field_width = 2*sizeof(void *);
-                                       flags |= _kc_ZEROPAD;
-                               }
-                               str = number(str, end,
-                                               (unsigned long) va_arg(args, void *),
-                                               16, field_width, precision, flags);
-                               continue;
-
-
-                       case 'n':
-                               /* FIXME:
-                               * What does C99 say about the overflow case here? */
-                               if (qualifier == 'l') {
-                                       long * ip = va_arg(args, long *);
-                                       *ip = (str - buf);
-                               } else if (qualifier == 'Z') {
-                                       size_t * ip = va_arg(args, size_t *);
-                                       *ip = (str - buf);
-                               } else {
-                                       int * ip = va_arg(args, int *);
-                                       *ip = (str - buf);
-                               }
-                               continue;
-
-                       case '%':
-                               if (str <= end)
-                                       *str = '%';
-                               ++str;
-                               continue;
-
-                               /* integer number formats - set up the flags and "break" */
-                       case 'o':
-                               base = 8;
-                               break;
-
-                       case 'X':
-                               flags |= _kc_LARGE;
-                       case 'x':
-                               base = 16;
-                               break;
-
-                       case 'd':
-                       case 'i':
-                               flags |= _kc_SIGN;
-                       case 'u':
-                               break;
-
-                       default:
-                               if (str <= end)
-                                       *str = '%';
-                               ++str;
-                               if (*fmt) {
-                                       if (str <= end)
-                                               *str = *fmt;
-                                       ++str;
-                               } else {
-                                       --fmt;
-                               }
-                               continue;
-               }
-               if (qualifier == 'L')
-                       num = va_arg(args, long long);
-               else if (qualifier == 'l') {
-                       num = va_arg(args, unsigned long);
-                       if (flags & _kc_SIGN)
-                               num = (signed long) num;
-               } else if (qualifier == 'Z') {
-                       num = va_arg(args, size_t);
-               } else if (qualifier == 'h') {
-                       num = (unsigned short) va_arg(args, int);
-                       if (flags & _kc_SIGN)
-                               num = (signed short) num;
-               } else {
-                       num = va_arg(args, unsigned int);
-                       if (flags & _kc_SIGN)
-                               num = (signed int) num;
-               }
-               str = number(str, end, num, base,
-                               field_width, precision, flags);
-       }
-       if (str <= end)
-               *str = '\0';
-       else if (size > 0)
-               /* don't write out a null byte if the buf size is zero */
-               *end = '\0';
-       /* the trailing null byte doesn't count towards the total
-       * ++str;
-       */
-       return str-buf;
-}
-
-int _kc_snprintf(char * buf, size_t size, const char *fmt, ...)
-{
-       va_list args;
-       int i;
-
-       va_start(args, fmt);
-       i = _kc_vsnprintf(buf,size,fmt,args);
-       va_end(args);
-       return i;
-}
-#endif /* < 2.4.8 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
-
-/**************************************/
-/* PCI DMA MAPPING */
-
-#if defined(CONFIG_HIGHMEM)
-
-#ifndef PCI_DRAM_OFFSET
-#define PCI_DRAM_OFFSET 0
-#endif
-
-u64
-_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
-                 size_t size, int direction)
-{
-       return (((u64) (page - mem_map) << PAGE_SHIFT) + offset +
-               PCI_DRAM_OFFSET);
-}
-
-#else /* CONFIG_HIGHMEM */
-
-u64
-_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
-                 size_t size, int direction)
-{
-       return pci_map_single(dev, (void *)page_address(page) + offset, size,
-                             direction);
-}
-
-#endif /* CONFIG_HIGHMEM */
-
-void
-_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size,
-                   int direction)
-{
-       return pci_unmap_single(dev, dma_addr, size, direction);
-}
-
-#endif /* 2.4.13 => 2.4.3 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
-
-/**************************************/
-/* PCI DRIVER API */
-
-int
-_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask)
-{
-       if (!pci_dma_supported(dev, mask))
-               return -EIO;
-       dev->dma_mask = mask;
-       return 0;
-}
-
-int
-_kc_pci_request_regions(struct pci_dev *dev, char *res_name)
-{
-       int i;
-
-       for (i = 0; i < 6; i++) {
-               if (pci_resource_len(dev, i) == 0)
-                       continue;
-
-               if (pci_resource_flags(dev, i) & IORESOURCE_IO) {
-                       if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
-                               pci_release_regions(dev);
-                               return -EBUSY;
-                       }
-               } else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) {
-                       if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
-                               pci_release_regions(dev);
-                               return -EBUSY;
-                       }
-               }
-       }
-       return 0;
-}
-
-void
-_kc_pci_release_regions(struct pci_dev *dev)
-{
-       int i;
-
-       for (i = 0; i < 6; i++) {
-               if (pci_resource_len(dev, i) == 0)
-                       continue;
-
-               if (pci_resource_flags(dev, i) & IORESOURCE_IO)
-                       release_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
-
-               else if (pci_resource_flags(dev, i) & IORESOURCE_MEM)
-                       release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
-       }
-}
-
-/**************************************/
-/* NETWORK DRIVER API */
-
-struct net_device *
-_kc_alloc_etherdev(int sizeof_priv)
-{
-       struct net_device *dev;
-       int alloc_size;
-
-       alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31;
-       dev = kzalloc(alloc_size, GFP_KERNEL);
-       if (!dev)
-               return NULL;
-
-       if (sizeof_priv)
-               dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31);
-       dev->name[0] = '\0';
-       ether_setup(dev);
-
-       return dev;
-}
-
-int
-_kc_is_valid_ether_addr(u8 *addr)
-{
-       const char zaddr[6] = { 0, };
-
-       return !(addr[0] & 1) && memcmp(addr, zaddr, 6);
-}
-
-#endif /* 2.4.3 => 2.4.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
-
-int
-_kc_pci_set_power_state(struct pci_dev *dev, int state)
-{
-       return 0;
-}
-
-int
-_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable)
-{
-       return 0;
-}
-
-#endif /* 2.4.6 => 2.4.3 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
-void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page,
-                            int off, int size)
-{
-       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-       frag->page = page;
-       frag->page_offset = off;
-       frag->size = size;
-       skb_shinfo(skb)->nr_frags = i + 1;
-}
-
-/*
- * Original Copyright:
- * find_next_bit.c: fallback find next bit implementation
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells ([email protected])
- */
-
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
-                            unsigned long offset)
-{
-       const unsigned long *p = addr + BITOP_WORD(offset);
-       unsigned long result = offset & ~(BITS_PER_LONG-1);
-       unsigned long tmp;
-
-       if (offset >= size)
-               return size;
-       size -= result;
-       offset %= BITS_PER_LONG;
-       if (offset) {
-               tmp = *(p++);
-               tmp &= (~0UL << offset);
-               if (size < BITS_PER_LONG)
-                       goto found_first;
-               if (tmp)
-                       goto found_middle;
-               size -= BITS_PER_LONG;
-               result += BITS_PER_LONG;
-       }
-       while (size & ~(BITS_PER_LONG-1)) {
-               if ((tmp = *(p++)))
-                       goto found_middle;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
-       }
-       if (!size)
-               return result;
-       tmp = *p;
-
-found_first:
-       tmp &= (~0UL >> (BITS_PER_LONG - size));
-       if (tmp == 0UL)         /* Are any bits set? */
-               return result + size;   /* Nope. */
-found_middle:
-       return result + ffs(tmp);
-}
-
-size_t _kc_strlcpy(char *dest, const char *src, size_t size)
-{
-       size_t ret = strlen(src);
-
-       if (size) {
-               size_t len = (ret >= size) ? size - 1 : ret;
-               memcpy(dest, src, len);
-               dest[len] = '\0';
-       }
-       return ret;
-}
-
-#ifndef do_div
-#if BITS_PER_LONG == 32
-uint32_t __attribute__((weak)) _kc__div64_32(uint64_t *n, uint32_t base)
-{
-       uint64_t rem = *n;
-       uint64_t b = base;
-       uint64_t res, d = 1;
-       uint32_t high = rem >> 32;
-
-       /* Reduce the thing a bit first */
-       res = 0;
-       if (high >= base) {
-               high /= base;
-               res = (uint64_t) high << 32;
-               rem -= (uint64_t) (high*base) << 32;
-       }
-
-       while ((int64_t)b > 0 && b < rem) {
-               b = b+b;
-               d = d+d;
-       }
-
-       do {
-               if (rem >= b) {
-                       rem -= b;
-                       res += d;
-               }
-               b >>= 1;
-               d >>= 1;
-       } while (d);
-
-       *n = res;
-       return rem;
-}
-#endif /* BITS_PER_LONG == 32 */
-#endif /* do_div */
-#endif /* 2.6.0 => 2.4.6 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
-int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...)
-{
-       va_list args;
-       int i;
-
-       va_start(args, fmt);
-       i = vsnprintf(buf, size, fmt, args);
-       va_end(args);
-       return (i >= size) ? (size - 1) : i;
-}
-#endif /* < 2.6.4 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
-DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1};
-#endif /* < 2.6.10 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
-char *_kc_kstrdup(const char *s, unsigned int gfp)
-{
-       size_t len;
-       char *buf;
-
-       if (!s)
-               return NULL;
-
-       len = strlen(s) + 1;
-       buf = kmalloc(len, gfp);
-       if (buf)
-               memcpy(buf, s, len);
-       return buf;
-}
-#endif /* < 2.6.13 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
-void *_kc_kzalloc(size_t size, int flags)
-{
-       void *ret = kmalloc(size, flags);
-       if (ret)
-               memset(ret, 0, size);
-       return ret;
-}
-#endif /* <= 2.6.13 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
-int _kc_skb_pad(struct sk_buff *skb, int pad)
-{
-       int ntail;
-
-        /* If the skbuff is non linear tailroom is always zero.. */
-        if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
-               memset(skb->data+skb->len, 0, pad);
-               return 0;
-        }
-
-       ntail = skb->data_len + pad - (skb->end - skb->tail);
-       if (likely(skb_cloned(skb) || ntail > 0)) {
-               if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC));
-                       goto free_skb;
-       }
-
-#ifdef MAX_SKB_FRAGS
-       if (skb_is_nonlinear(skb) &&
-           !__pskb_pull_tail(skb, skb->data_len))
-               goto free_skb;
-
-#endif
-       memset(skb->data + skb->len, 0, pad);
-        return 0;
-
-free_skb:
-       kfree_skb(skb);
-       return -ENOMEM;
-}
-
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
-int _kc_pci_save_state(struct pci_dev *pdev)
-{
-       struct net_device *netdev = pci_get_drvdata(pdev);
-       struct adapter_struct *adapter = netdev_priv(netdev);
-       int size = PCI_CONFIG_SPACE_LEN, i;
-       u16 pcie_cap_offset, pcie_link_status;
-
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
-       /* no ->dev for 2.4 kernels */
-       WARN_ON(pdev->dev.driver_data == NULL);
-#endif
-       pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
-       if (pcie_cap_offset) {
-               if (!pci_read_config_word(pdev,
-                                         pcie_cap_offset + PCIE_LINK_STATUS,
-                                         &pcie_link_status))
-               size = PCIE_CONFIG_SPACE_LEN;
-       }
-       pci_config_space_ich8lan();
-#ifdef HAVE_PCI_ERS
-       if (adapter->config_space == NULL)
-#else
-       WARN_ON(adapter->config_space != NULL);
-#endif
-               adapter->config_space = kmalloc(size, GFP_KERNEL);
-       if (!adapter->config_space) {
-               printk(KERN_ERR "Out of memory in pci_save_state\n");
-               return -ENOMEM;
-       }
-       for (i = 0; i < (size / 4); i++)
-               pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]);
-       return 0;
-}
-
-void _kc_pci_restore_state(struct pci_dev *pdev)
-{
-       struct net_device *netdev = pci_get_drvdata(pdev);
-       struct adapter_struct *adapter = netdev_priv(netdev);
-       int size = PCI_CONFIG_SPACE_LEN, i;
-       u16 pcie_cap_offset;
-       u16 pcie_link_status;
-
-       if (adapter->config_space != NULL) {
-               pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
-               if (pcie_cap_offset &&
-                   !pci_read_config_word(pdev,
-                                         pcie_cap_offset + PCIE_LINK_STATUS,
-                                         &pcie_link_status))
-                       size = PCIE_CONFIG_SPACE_LEN;
-
-               pci_config_space_ich8lan();
-               for (i = 0; i < (size / 4); i++)
-               pci_write_config_dword(pdev, i * 4, adapter->config_space[i]);
-#ifndef HAVE_PCI_ERS
-               kfree(adapter->config_space);
-               adapter->config_space = NULL;
-#endif
-       }
-}
-#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
-
-#ifdef HAVE_PCI_ERS
-void _kc_free_netdev(struct net_device *netdev)
-{
-       struct adapter_struct *adapter = netdev_priv(netdev);
-
-       if (adapter->config_space != NULL)
-               kfree(adapter->config_space);
-#ifdef CONFIG_SYSFS
-       if (netdev->reg_state == NETREG_UNINITIALIZED) {
-               kfree((char *)netdev - netdev->padded);
-       } else {
-               BUG_ON(netdev->reg_state != NETREG_UNREGISTERED);
-               netdev->reg_state = NETREG_RELEASED;
-               class_device_put(&netdev->class_dev);
-       }
-#else
-       kfree((char *)netdev - netdev->padded);
-#endif
-}
-#endif
-
-void *_kc_kmemdup(const void *src, size_t len, unsigned gfp)
-{
-       void *p;
-
-       p = kzalloc(len, gfp);
-       if (p)
-               memcpy(p, src, len);
-       return p;
-}
-#endif /* <= 2.6.19 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
-struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev)
-{
-       return ((struct adapter_struct *)netdev_priv(netdev))->pdev;
-}
-#endif /* < 2.6.21 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
-/* hexdump code taken from lib/hexdump.c */
-static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
-                       int groupsize, unsigned char *linebuf,
-                       size_t linebuflen, bool ascii)
-{
-       const u8 *ptr = buf;
-       u8 ch;
-       int j, lx = 0;
-       int ascii_column;
-
-       if (rowsize != 16 && rowsize != 32)
-               rowsize = 16;
-
-       if (!len)
-               goto nil;
-       if (len > rowsize)              /* limit to one line at a time */
-               len = rowsize;
-       if ((len % groupsize) != 0)     /* no mixed size output */
-               groupsize = 1;
-
-       switch (groupsize) {
-       case 8: {
-               const u64 *ptr8 = buf;
-               int ngroups = len / groupsize;
-
-               for (j = 0; j < ngroups; j++)
-                       lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
-                               "%s%16.16llx", j ? " " : "",
-                               (unsigned long long)*(ptr8 + j));
-               ascii_column = 17 * ngroups + 2;
-               break;
-       }
-
-       case 4: {
-               const u32 *ptr4 = buf;
-               int ngroups = len / groupsize;
-
-               for (j = 0; j < ngroups; j++)
-                       lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
-                               "%s%8.8x", j ? " " : "", *(ptr4 + j));
-               ascii_column = 9 * ngroups + 2;
-               break;
-       }
-
-       case 2: {
-               const u16 *ptr2 = buf;
-               int ngroups = len / groupsize;
-
-               for (j = 0; j < ngroups; j++)
-                       lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
-                               "%s%4.4x", j ? " " : "", *(ptr2 + j));
-               ascii_column = 5 * ngroups + 2;
-               break;
-       }
-
-       default:
-               for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) {
-                       ch = ptr[j];
-                       linebuf[lx++] = hex_asc(ch >> 4);
-                       linebuf[lx++] = hex_asc(ch & 0x0f);
-                       linebuf[lx++] = ' ';
-               }
-               if (j)
-                       lx--;
-
-               ascii_column = 3 * rowsize + 2;
-               break;
-       }
-       if (!ascii)
-               goto nil;
-
-       while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
-               linebuf[lx++] = ' ';
-       for (j = 0; (j < len) && (lx + 2) < linebuflen; j++)
-               linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j]
-                               : '.';
-nil:
-       linebuf[lx++] = '\0';
-}
-
-void _kc_print_hex_dump(const char *level,
-                       const char *prefix_str, int prefix_type,
-                       int rowsize, int groupsize,
-                       const void *buf, size_t len, bool ascii)
-{
-       const u8 *ptr = buf;
-       int i, linelen, remaining = len;
-       unsigned char linebuf[200];
-
-       if (rowsize != 16 && rowsize != 32)
-               rowsize = 16;
-
-       for (i = 0; i < len; i += rowsize) {
-               linelen = min(remaining, rowsize);
-               remaining -= rowsize;
-               _kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
-                               linebuf, sizeof(linebuf), ascii);
-
-               switch (prefix_type) {
-               case DUMP_PREFIX_ADDRESS:
-                       printk("%s%s%*p: %s\n", level, prefix_str,
-                               (int)(2 * sizeof(void *)), ptr + i, linebuf);
-                       break;
-               case DUMP_PREFIX_OFFSET:
-                       printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
-                       break;
-               default:
-                       printk("%s%s%s\n", level, prefix_str, linebuf);
-                       break;
-               }
-       }
-}
-
-#ifdef HAVE_I2C_SUPPORT
-struct i2c_client *
-_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
-{
-       struct i2c_client       *client;
-       int                     status;
-
-       client = kzalloc(sizeof *client, GFP_KERNEL);
-       if (!client)
-               return NULL;
-
-       client->adapter = adap;
-
-       client->dev.platform_data = info->platform_data;
-
-       client->flags = info->flags;
-       client->addr = info->addr;
-
-       strlcpy(client->name, info->type, sizeof(client->name));
-
-       /* Check for address business */
-       status = i2c_check_addr(adap, client->addr);
-       if (status)
-               goto out_err;
-
-       client->dev.parent = &client->adapter->dev;
-       client->dev.bus = &i2c_bus_type;
-
-       status = i2c_attach_client(client);
-       if (status)
-               goto out_err;
-
-       dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n",
-               client->name, dev_name(&client->dev));
-
-       return client;
-
-out_err:
-       dev_err(&adap->dev, "Failed to register i2c client %s at 0x%02x "
-               "(%d)\n", client->name, client->addr, status);
-       kfree(client);
-       return NULL;
-}
-#endif /* HAVE_I2C_SUPPORT */
-#endif /* < 2.6.22 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
-#ifdef NAPI
-struct net_device *napi_to_poll_dev(const struct napi_struct *napi)
-{
-       struct adapter_q_vector *q_vector = container_of(napi,
-                                                       struct adapter_q_vector,
-                                                       napi);
-       return &q_vector->poll_dev;
-}
-
-int __kc_adapter_clean(struct net_device *netdev, int *budget)
-{
-       int work_done;
-       int work_to_do = min(*budget, netdev->quota);
-       /* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */
-       struct napi_struct *napi = netdev->priv;
-       work_done = napi->poll(napi, work_to_do);
-       *budget -= work_done;
-       netdev->quota -= work_done;
-       return (work_done >= work_to_do) ? 1 : 0;
-}
-#endif /* NAPI */
-#endif /* <= 2.6.24 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
-void _kc_pci_disable_link_state(struct pci_dev *pdev, int state)
-{
-       struct pci_dev *parent = pdev->bus->self;
-       u16 link_state;
-       int pos;
-
-       if (!parent)
-               return;
-
-       pos = pci_find_capability(parent, PCI_CAP_ID_EXP);
-       if (pos) {
-               pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state);
-               link_state &= ~state;
-               pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state);
-       }
-}
-#endif /* < 2.6.26 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
-#ifdef HAVE_TX_MQ
-void _kc_netif_tx_stop_all_queues(struct net_device *netdev)
-{
-       struct adapter_struct *adapter = netdev_priv(netdev);
-       int i;
-
-       netif_stop_queue(netdev);
-       if (netif_is_multiqueue(netdev))
-               for (i = 0; i < adapter->num_tx_queues; i++)
-                       netif_stop_subqueue(netdev, i);
-}
-void _kc_netif_tx_wake_all_queues(struct net_device *netdev)
-{
-       struct adapter_struct *adapter = netdev_priv(netdev);
-       int i;
-
-       netif_wake_queue(netdev);
-       if (netif_is_multiqueue(netdev))
-               for (i = 0; i < adapter->num_tx_queues; i++)
-                       netif_wake_subqueue(netdev, i);
-}
-void _kc_netif_tx_start_all_queues(struct net_device *netdev)
-{
-       struct adapter_struct *adapter = netdev_priv(netdev);
-       int i;
-
-       netif_start_queue(netdev);
-       if (netif_is_multiqueue(netdev))
-               for (i = 0; i < adapter->num_tx_queues; i++)
-                       netif_start_subqueue(netdev, i);
-}
-#endif /* HAVE_TX_MQ */
-
-#ifndef __WARN_printf
-void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...)
-{
-       va_list args;
-
-       printk(KERN_WARNING "------------[ cut here ]------------\n");
-       printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line);
-       va_start(args, fmt);
-       vprintk(fmt, args);
-       va_end(args);
-
-       dump_stack();
-}
-#endif /* __WARN_printf */
-#endif /* < 2.6.27 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
-
-int
-_kc_pci_prepare_to_sleep(struct pci_dev *dev)
-{
-       pci_power_t target_state;
-       int error;
-
-       target_state = pci_choose_state(dev, PMSG_SUSPEND);
-
-       pci_enable_wake(dev, target_state, true);
-
-       error = pci_set_power_state(dev, target_state);
-
-       if (error)
-               pci_enable_wake(dev, target_state, false);
-
-       return error;
-}
-
-int
-_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable)
-{
-       int err;
-
-       err = pci_enable_wake(dev, PCI_D3cold, enable);
-       if (err)
-               goto out;
-
-       err = pci_enable_wake(dev, PCI_D3hot, enable);
-
-out:
-       return err;
-}
-#endif /* < 2.6.28 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) )
-static void __kc_pci_set_master(struct pci_dev *pdev, bool enable)
-{
-       u16 old_cmd, cmd;
-
-       pci_read_config_word(pdev, PCI_COMMAND, &old_cmd);
-       if (enable)
-               cmd = old_cmd | PCI_COMMAND_MASTER;
-       else
-               cmd = old_cmd & ~PCI_COMMAND_MASTER;
-       if (cmd != old_cmd) {
-               dev_dbg(pci_dev_to_dev(pdev), "%s bus mastering\n",
-                       enable ? "enabling" : "disabling");
-               pci_write_config_word(pdev, PCI_COMMAND, cmd);
-       }
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7) )
-       pdev->is_busmaster = enable;
-#endif
-}
-
-void _kc_pci_clear_master(struct pci_dev *dev)
-{
-       __kc_pci_set_master(dev, false);
-}
-#endif /* < 2.6.29 */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) )
-#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
-int _kc_pci_num_vf(struct pci_dev *dev)
-{
-       int num_vf = 0;
-#ifdef CONFIG_PCI_IOV
-       struct pci_dev *vfdev;
-
-       /* loop through all ethernet devices starting at PF dev */
-       vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, NULL);
-       while (vfdev) {
-               if (vfdev->is_virtfn && vfdev->physfn == dev)
-                       num_vf++;
-
-               vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, vfdev);
-       }
-
-#endif
-       return num_vf;
-}
-#endif /* RHEL_RELEASE_CODE */
-#endif /* < 2.6.34 */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
-#ifdef HAVE_TX_MQ
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)))
-#ifndef CONFIG_NETDEVICES_MULTIQUEUE
-void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
-{
-       unsigned int real_num = dev->real_num_tx_queues;
-       struct Qdisc *qdisc;
-       int i;
-
-       if (unlikely(txq > dev->num_tx_queues))
-               ;
-       else if (txq > real_num)
-               dev->real_num_tx_queues = txq;
-       else if ( txq < real_num) {
-               dev->real_num_tx_queues = txq;
-               for (i = txq; i < dev->num_tx_queues; i++) {
-                       qdisc = netdev_get_tx_queue(dev, i)->qdisc;
-                       if (qdisc) {
-                               spin_lock_bh(qdisc_lock(qdisc));
-                               qdisc_reset(qdisc);
-                               spin_unlock_bh(qdisc_lock(qdisc));
-                       }
-               }
-       }
-}
-#endif /* CONFIG_NETDEVICES_MULTIQUEUE */
-#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */
-#endif /* HAVE_TX_MQ */
-
-ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
-                                  const void __user *from, size_t count)
-{
-        loff_t pos = *ppos;
-        size_t res;
-
-        if (pos < 0)
-                return -EINVAL;
-        if (pos >= available || !count)
-                return 0;
-        if (count > available - pos)
-                count = available - pos;
-        res = copy_from_user(to + pos, from, count);
-        if (res == count)
-                return -EFAULT;
-        count -= res;
-        *ppos = pos + count;
-        return count;
-}
-
-#endif /* < 2.6.35 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
-static const u32 _kc_flags_dup_features =
-       (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
-
-u32 _kc_ethtool_op_get_flags(struct net_device *dev)
-{
-       return dev->features & _kc_flags_dup_features;
-}
-
-int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
-{
-       if (data & ~supported)
-               return -EINVAL;
-
-       dev->features = ((dev->features & ~_kc_flags_dup_features) |
-                        (data & _kc_flags_dup_features));
-       return 0;
-}
-#endif /* < 2.6.36 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)))
-
-
-
-#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */
-#endif /* < 2.6.39 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
-void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
-                        int off, int size, unsigned int truesize)
-{
-       skb_fill_page_desc(skb, i, page, off, size);
-       skb->len += size;
-       skb->data_len += size;
-       skb->truesize += truesize;
-}
-
-int _kc_simple_open(struct inode *inode, struct file *file)
-{
-        if (inode->i_private)
-                file->private_data = inode->i_private;
-
-        return 0;
-}
-
-#endif /* < 3.4.0 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) )
-#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \
-    !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5))
-static inline int __kc_pcie_cap_version(struct pci_dev *dev)
-{
-       int pos;
-       u16 reg16;
-
-       pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
-       if (!pos)
-               return 0;
-       pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &reg16);
-       return reg16 & PCI_EXP_FLAGS_VERS;
-}
-
-static inline bool __kc_pcie_cap_has_devctl(const struct pci_dev __always_unused *dev)
-{
-       return true;
-}
-
-static inline bool __kc_pcie_cap_has_lnkctl(struct pci_dev *dev)
-{
-       int type = pci_pcie_type(dev);
-
-       return __kc_pcie_cap_version(dev) > 1 ||
-              type == PCI_EXP_TYPE_ROOT_PORT ||
-              type == PCI_EXP_TYPE_ENDPOINT ||
-              type == PCI_EXP_TYPE_LEG_END;
-}
-
-static inline bool __kc_pcie_cap_has_sltctl(struct pci_dev *dev)
-{
-       int type = pci_pcie_type(dev);
-       int pos;
-       u16 pcie_flags_reg;
-
-       pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
-       if (!pos)
-               return 0;
-       pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &pcie_flags_reg);
-
-       return __kc_pcie_cap_version(dev) > 1 ||
-              type == PCI_EXP_TYPE_ROOT_PORT ||
-              (type == PCI_EXP_TYPE_DOWNSTREAM &&
-               pcie_flags_reg & PCI_EXP_FLAGS_SLOT);
-}
-
-static inline bool __kc_pcie_cap_has_rtctl(struct pci_dev *dev)
-{
-       int type = pci_pcie_type(dev);
-
-       return __kc_pcie_cap_version(dev) > 1 ||
-              type == PCI_EXP_TYPE_ROOT_PORT ||
-              type == PCI_EXP_TYPE_RC_EC;
-}
-
-static bool __kc_pcie_capability_reg_implemented(struct pci_dev *dev, int pos)
-{
-       if (!pci_is_pcie(dev))
-               return false;
-
-       switch (pos) {
-       case PCI_EXP_FLAGS_TYPE:
-               return true;
-       case PCI_EXP_DEVCAP:
-       case PCI_EXP_DEVCTL:
-       case PCI_EXP_DEVSTA:
-               return __kc_pcie_cap_has_devctl(dev);
-       case PCI_EXP_LNKCAP:
-       case PCI_EXP_LNKCTL:
-       case PCI_EXP_LNKSTA:
-               return __kc_pcie_cap_has_lnkctl(dev);
-       case PCI_EXP_SLTCAP:
-       case PCI_EXP_SLTCTL:
-       case PCI_EXP_SLTSTA:
-               return __kc_pcie_cap_has_sltctl(dev);
-       case PCI_EXP_RTCTL:
-       case PCI_EXP_RTCAP:
-       case PCI_EXP_RTSTA:
-               return __kc_pcie_cap_has_rtctl(dev);
-       case PCI_EXP_DEVCAP2:
-       case PCI_EXP_DEVCTL2:
-       case PCI_EXP_LNKCAP2:
-       case PCI_EXP_LNKCTL2:
-       case PCI_EXP_LNKSTA2:
-               return __kc_pcie_cap_version(dev) > 1;
-       default:
-               return false;
-       }
-}
-
-/*
- * Note that these accessor functions are only for the "PCI Express
- * Capability" (see PCIe spec r3.0, sec 7.8).  They do not apply to the
- * other "PCI Express Extended Capabilities" (AER, VC, ACS, MFVC, etc.)
- */
-int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val)
-{
-       int ret;
-
-       *val = 0;
-       if (pos & 1)
-               return -EINVAL;
-
-       if (__kc_pcie_capability_reg_implemented(dev, pos)) {
-               ret = pci_read_config_word(dev, pci_pcie_cap(dev) + pos, val);
-               /*
-                * Reset *val to 0 if pci_read_config_word() fails, it may
-                * have been written as 0xFFFF if hardware error happens
-                * during pci_read_config_word().
-                */
-               if (ret)
-                       *val = 0;
-               return ret;
-       }
-
-       /*
-        * For Functions that do not implement the Slot Capabilities,
-        * Slot Status, and Slot Control registers, these spaces must
-        * be hardwired to 0b, with the exception of the Presence Detect
-        * State bit in the Slot Status register of Downstream Ports,
-        * which must be hardwired to 1b.  (PCIe Base Spec 3.0, sec 7.8)
-        */
-       if (pci_is_pcie(dev) && pos == PCI_EXP_SLTSTA &&
-           pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) {
-               *val = PCI_EXP_SLTSTA_PDS;
-       }
-
-       return 0;
-}
-
-int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val)
-{
-       if (pos & 1)
-               return -EINVAL;
-
-       if (!__kc_pcie_capability_reg_implemented(dev, pos))
-               return 0;
-
-       return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val);
-}
-
-int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos,
-                                           u16 clear, u16 set)
-{
-       int ret;
-       u16 val;
-
-       ret = __kc_pcie_capability_read_word(dev, pos, &val);
-       if (!ret) {
-               val &= ~clear;
-               val |= set;
-               ret = __kc_pcie_capability_write_word(dev, pos, val);
-       }
-
-       return ret;
-}
-#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \
-          !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) */
-#endif /* < 3.7.0 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) )
-#endif /* 3.9.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
-#ifdef CONFIG_PCI_IOV
-int __kc_pci_vfs_assigned(struct pci_dev *dev)
-{
-       unsigned int vfs_assigned = 0;
-#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
-       int pos;
-       struct pci_dev *vfdev;
-       unsigned short dev_id;
-
-       /* only search if we are a PF */
-       if (!dev->is_physfn)
-               return 0;
-
-       /* find SR-IOV capability */
-       pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
-       if (!pos)
-               return 0;
-
-       /*
-        * determine the device ID for the VFs, the vendor ID will be the
-        * same as the PF so there is no need to check for that one
-        */
-       pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id);
-
-       /* loop through all the VFs to see if we own any that are assigned */
-       vfdev = pci_get_device(dev->vendor, dev_id, NULL);
-       while (vfdev) {
-               /*
-                * It is considered assigned if it is a virtual function with
-                * our dev as the physical function and the assigned bit is set
-                */
-               if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
-                   (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED))
-                       vfs_assigned++;
-
-               vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
-       }
-
-#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */
-       return vfs_assigned;
-}
-
-#endif /* CONFIG_PCI_IOV */
-#endif /* 3.10.0 */
index e2cf71e..84826b2 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
@@ -3891,7 +3891,7 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
 #if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) ) \
     || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
 #define HAVE_NDO_DFLT_BRIDGE_ADD_MASK
-#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) )
 #define HAVE_NDO_FDB_ADD_VID
 #endif /* !RHEL 7.2 */
 #endif /* >= 3.19.0 */
@@ -3901,12 +3901,13 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
 /* vlan_tx_xx functions got renamed to skb_vlan */
 #define vlan_tx_tag_get skb_vlan_tag_get
 #define vlan_tx_tag_present skb_vlan_tag_present
-#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) )
 #define HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS
 #endif /* !RHEL 7.2 */
 #endif /* 4.0.0 */
 
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) )
+#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) \
+    || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,3) ))
 /* ndo_bridge_getlink adds new nlflags parameter */
 #define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
 #endif /* >= 4.1.0 */
@@ -3915,4 +3916,21 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
 /* ndo_bridge_getlink adds new filter_mask and vlan_fill parameters */
 #define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL
 #endif /* >= 4.2.0 */
+
+/*
+ * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4)
+ * For older kernels backported this commit, need to use renamed functions.
+ * This fix is specific to RedHat/CentOS kernels.
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+       (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \
+       (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)))
+#define vlan_tx_tag_get skb_vlan_tag_get
+#define vlan_tx_tag_present skb_vlan_tag_present
+#endif
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) )
+#define HAVE_VF_VLAN_PROTO
+#endif /* >= 4.9.0 */
+
 #endif /* _KCOMPAT_H_ */
diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c
deleted file mode 100644 (file)
index e1a8938..0000000
+++ /dev/null
@@ -1,1171 +0,0 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <[email protected]>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-/*
- * net/core/ethtool.c - Ethtool ioctl handler
- * Copyright (c) 2003 Matthew Wilcox <[email protected]>
- *
- * This file is where we call all the ethtool_ops commands to get
- * the information ethtool needs.  We fall back to calling do_ioctl()
- * for drivers which haven't been converted to ethtool_ops yet.
- *
- * It's GPL, stupid.
- *
- * Modification by [email protected] to work as backward compat
- * solution for pre-ethtool_ops kernels.
- *     - copied struct ethtool_ops from ethtool.h
- *     - defined SET_ETHTOOL_OPS
- *     - put in some #ifndef NETIF_F_xxx wrappers
- *     - changes refs to dev->ethtool_ops to ethtool_ops
- *     - changed dev_ethtool to ethtool_ioctl
- *      - remove EXPORT_SYMBOL()s
- *      - added _kc_ prefix in built-in ethtool_op_xxx ops.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/mii.h>
-#include <linux/ethtool.h>
-#include <linux/netdevice.h>
-#include <asm/uaccess.h>
-
-#include "kcompat.h"
-
-#undef SUPPORTED_10000baseT_Full
-#define SUPPORTED_10000baseT_Full      (1 << 12)
-#undef ADVERTISED_10000baseT_Full
-#define ADVERTISED_10000baseT_Full     (1 << 12)
-#undef SPEED_10000
-#define SPEED_10000            10000
-
-#undef ethtool_ops
-#define ethtool_ops _kc_ethtool_ops
-
-struct _kc_ethtool_ops {
-       int  (*get_settings)(struct net_device *, struct ethtool_cmd *);
-       int  (*set_settings)(struct net_device *, struct ethtool_cmd *);
-       void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
-       int  (*get_regs_len)(struct net_device *);
-       void (*get_regs)(struct net_device *, struct ethtool_regs *, void *);
-       void (*get_wol)(struct net_device *, struct ethtool_wolinfo *);
-       int  (*set_wol)(struct net_device *, struct ethtool_wolinfo *);
-       u32  (*get_msglevel)(struct net_device *);
-       void (*set_msglevel)(struct net_device *, u32);
-       int  (*nway_reset)(struct net_device *);
-       u32  (*get_link)(struct net_device *);
-       int  (*get_eeprom_len)(struct net_device *);
-       int  (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
-       int  (*set_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
-       int  (*get_coalesce)(struct net_device *, struct ethtool_coalesce *);
-       int  (*set_coalesce)(struct net_device *, struct ethtool_coalesce *);
-       void (*get_ringparam)(struct net_device *, struct ethtool_ringparam *);
-       int  (*set_ringparam)(struct net_device *, struct ethtool_ringparam *);
-       void (*get_pauseparam)(struct net_device *,
-                              struct ethtool_pauseparam*);
-       int  (*set_pauseparam)(struct net_device *,
-                              struct ethtool_pauseparam*);
-       u32  (*get_rx_csum)(struct net_device *);
-       int  (*set_rx_csum)(struct net_device *, u32);
-       u32  (*get_tx_csum)(struct net_device *);
-       int  (*set_tx_csum)(struct net_device *, u32);
-       u32  (*get_sg)(struct net_device *);
-       int  (*set_sg)(struct net_device *, u32);
-       u32  (*get_tso)(struct net_device *);
-       int  (*set_tso)(struct net_device *, u32);
-       int  (*self_test_count)(struct net_device *);
-       void (*self_test)(struct net_device *, struct ethtool_test *, u64 *);
-       void (*get_strings)(struct net_device *, u32 stringset, u8 *);
-       int  (*phys_id)(struct net_device *, u32);
-       int  (*get_stats_count)(struct net_device *);
-       void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *,
-                                 u64 *);
-} *ethtool_ops = NULL;
-
-#undef SET_ETHTOOL_OPS
-#define SET_ETHTOOL_OPS(netdev, ops) (ethtool_ops = (ops))
-
-/*
- * Some useful ethtool_ops methods that are device independent. If we find that
- * all drivers want to do the same thing here, we can turn these into dev_()
- * function calls.
- */
-
-#undef ethtool_op_get_link
-#define ethtool_op_get_link _kc_ethtool_op_get_link
-u32 _kc_ethtool_op_get_link(struct net_device *dev)
-{
-       return netif_carrier_ok(dev) ? 1 : 0;
-}
-
-#undef ethtool_op_get_tx_csum
-#define ethtool_op_get_tx_csum _kc_ethtool_op_get_tx_csum
-u32 _kc_ethtool_op_get_tx_csum(struct net_device *dev)
-{
-#ifdef NETIF_F_IP_CSUM
-       return (dev->features & NETIF_F_IP_CSUM) != 0;
-#else
-       return 0;
-#endif
-}
-
-#undef ethtool_op_set_tx_csum
-#define ethtool_op_set_tx_csum _kc_ethtool_op_set_tx_csum
-int _kc_ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
-{
-#ifdef NETIF_F_IP_CSUM
-       if (data)
-#ifdef NETIF_F_IPV6_CSUM
-               dev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-       else
-               dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-#else
-               dev->features |= NETIF_F_IP_CSUM;
-       else
-               dev->features &= ~NETIF_F_IP_CSUM;
-#endif
-#endif
-
-       return 0;
-}
-
-#undef ethtool_op_get_sg
-#define ethtool_op_get_sg _kc_ethtool_op_get_sg
-u32 _kc_ethtool_op_get_sg(struct net_device *dev)
-{
-#ifdef NETIF_F_SG
-       return (dev->features & NETIF_F_SG) != 0;
-#else
-       return 0;
-#endif
-}
-
-#undef ethtool_op_set_sg
-#define ethtool_op_set_sg _kc_ethtool_op_set_sg
-int _kc_ethtool_op_set_sg(struct net_device *dev, u32 data)
-{
-#ifdef NETIF_F_SG
-       if (data)
-               dev->features |= NETIF_F_SG;
-       else
-               dev->features &= ~NETIF_F_SG;
-#endif
-
-       return 0;
-}
-
-#undef ethtool_op_get_tso
-#define ethtool_op_get_tso _kc_ethtool_op_get_tso
-u32 _kc_ethtool_op_get_tso(struct net_device *dev)
-{
-#ifdef NETIF_F_TSO
-       return (dev->features & NETIF_F_TSO) != 0;
-#else
-       return 0;
-#endif
-}
-
-#undef ethtool_op_set_tso
-#define ethtool_op_set_tso _kc_ethtool_op_set_tso
-int _kc_ethtool_op_set_tso(struct net_device *dev, u32 data)
-{
-#ifdef NETIF_F_TSO
-       if (data)
-               dev->features |= NETIF_F_TSO;
-       else
-               dev->features &= ~NETIF_F_TSO;
-#endif
-
-       return 0;
-}
-
-/* Handlers for each ethtool command */
-
-static int ethtool_get_settings(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_cmd cmd = { ETHTOOL_GSET };
-       int err;
-
-       if (!ethtool_ops->get_settings)
-               return -EOPNOTSUPP;
-
-       err = ethtool_ops->get_settings(dev, &cmd);
-       if (err < 0)
-               return err;
-
-       if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
-               return -EFAULT;
-       return 0;
-}
-
-static int ethtool_set_settings(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_cmd cmd;
-
-       if (!ethtool_ops->set_settings)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
-               return -EFAULT;
-
-       return ethtool_ops->set_settings(dev, &cmd);
-}
-
-static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_drvinfo info;
-       struct ethtool_ops *ops = ethtool_ops;
-
-       if (!ops->get_drvinfo)
-               return -EOPNOTSUPP;
-
-       memset(&info, 0, sizeof(info));
-       info.cmd = ETHTOOL_GDRVINFO;
-       ops->get_drvinfo(dev, &info);
-
-       if (ops->self_test_count)
-               info.testinfo_len = ops->self_test_count(dev);
-       if (ops->get_stats_count)
-               info.n_stats = ops->get_stats_count(dev);
-       if (ops->get_regs_len)
-               info.regdump_len = ops->get_regs_len(dev);
-       if (ops->get_eeprom_len)
-               info.eedump_len = ops->get_eeprom_len(dev);
-
-       if (copy_to_user(useraddr, &info, sizeof(info)))
-               return -EFAULT;
-       return 0;
-}
-
-static int ethtool_get_regs(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_regs regs;
-       struct ethtool_ops *ops = ethtool_ops;
-       void *regbuf;
-       int reglen, ret;
-
-       if (!ops->get_regs || !ops->get_regs_len)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&regs, useraddr, sizeof(regs)))
-               return -EFAULT;
-
-       reglen = ops->get_regs_len(dev);
-       if (regs.len > reglen)
-               regs.len = reglen;
-
-       regbuf = kmalloc(reglen, GFP_USER);
-       if (!regbuf)
-               return -ENOMEM;
-
-       ops->get_regs(dev, &regs, regbuf);
-
-       ret = -EFAULT;
-       if (copy_to_user(useraddr, &regs, sizeof(regs)))
-               goto out;
-       useraddr += offsetof(struct ethtool_regs, data);
-       if (copy_to_user(useraddr, regbuf, reglen))
-               goto out;
-       ret = 0;
-
-out:
-       kfree(regbuf);
-       return ret;
-}
-
-static int ethtool_get_wol(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
-
-       if (!ethtool_ops->get_wol)
-               return -EOPNOTSUPP;
-
-       ethtool_ops->get_wol(dev, &wol);
-
-       if (copy_to_user(useraddr, &wol, sizeof(wol)))
-               return -EFAULT;
-       return 0;
-}
-
-static int ethtool_set_wol(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_wolinfo wol;
-
-       if (!ethtool_ops->set_wol)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&wol, useraddr, sizeof(wol)))
-               return -EFAULT;
-
-       return ethtool_ops->set_wol(dev, &wol);
-}
-
-static int ethtool_get_msglevel(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_value edata = { ETHTOOL_GMSGLVL };
-
-       if (!ethtool_ops->get_msglevel)
-               return -EOPNOTSUPP;
-
-       edata.data = ethtool_ops->get_msglevel(dev);
-
-       if (copy_to_user(useraddr, &edata, sizeof(edata)))
-               return -EFAULT;
-       return 0;
-}
-
-static int ethtool_set_msglevel(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_value edata;
-
-       if (!ethtool_ops->set_msglevel)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&edata, useraddr, sizeof(edata)))
-               return -EFAULT;
-
-       ethtool_ops->set_msglevel(dev, edata.data);
-       return 0;
-}
-
-static int ethtool_nway_reset(struct net_device *dev)
-{
-       if (!ethtool_ops->nway_reset)
-               return -EOPNOTSUPP;
-
-       return ethtool_ops->nway_reset(dev);
-}
-
-static int ethtool_get_link(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_value edata = { ETHTOOL_GLINK };
-
-       if (!ethtool_ops->get_link)
-               return -EOPNOTSUPP;
-
-       edata.data = ethtool_ops->get_link(dev);
-
-       if (copy_to_user(useraddr, &edata, sizeof(edata)))
-               return -EFAULT;
-       return 0;
-}
-
-static int ethtool_get_eeprom(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_eeprom eeprom;
-       struct ethtool_ops *ops = ethtool_ops;
-       u8 *data;
-       int ret;
-
-       if (!ops->get_eeprom || !ops->get_eeprom_len)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
-               return -EFAULT;
-
-       /* Check for wrap and zero */
-       if (eeprom.offset + eeprom.len <= eeprom.offset)
-               return -EINVAL;
-
-       /* Check for exceeding total eeprom len */
-       if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
-               return -EINVAL;
-
-       data = kmalloc(eeprom.len, GFP_USER);
-       if (!data)
-               return -ENOMEM;
-
-       ret = -EFAULT;
-       if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
-               goto out;
-
-       ret = ops->get_eeprom(dev, &eeprom, data);
-       if (ret)
-               goto out;
-
-       ret = -EFAULT;
-       if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
-               goto out;
-       if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
-               goto out;
-       ret = 0;
-
-out:
-       kfree(data);
-       return ret;
-}
-
-static int ethtool_set_eeprom(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_eeprom eeprom;
-       struct ethtool_ops *ops = ethtool_ops;
-       u8 *data;
-       int ret;
-
-       if (!ops->set_eeprom || !ops->get_eeprom_len)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
-               return -EFAULT;
-
-       /* Check for wrap and zero */
-       if (eeprom.offset + eeprom.len <= eeprom.offset)
-               return -EINVAL;
-
-       /* Check for exceeding total eeprom len */
-       if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
-               return -EINVAL;
-
-       data = kmalloc(eeprom.len, GFP_USER);
-       if (!data)
-               return -ENOMEM;
-
-       ret = -EFAULT;
-       if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
-               goto out;
-
-       ret = ops->set_eeprom(dev, &eeprom, data);
-       if (ret)
-               goto out;
-
-       if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
-               ret = -EFAULT;
-
-out:
-       kfree(data);
-       return ret;
-}
-
-static int ethtool_get_coalesce(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
-
-       if (!ethtool_ops->get_coalesce)
-               return -EOPNOTSUPP;
-
-       ethtool_ops->get_coalesce(dev, &coalesce);
-
-       if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
-               return -EFAULT;
-       return 0;
-}
-
-static int ethtool_set_coalesce(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_coalesce coalesce;
-
-       if (!ethtool_ops->get_coalesce)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
-               return -EFAULT;
-
-       return ethtool_ops->set_coalesce(dev, &coalesce);
-}
-
-static int ethtool_get_ringparam(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
-
-       if (!ethtool_ops->get_ringparam)
-               return -EOPNOTSUPP;
-
-       ethtool_ops->get_ringparam(dev, &ringparam);
-
-       if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
-               return -EFAULT;
-       return 0;
-}
-
-static int ethtool_set_ringparam(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_ringparam ringparam;
-
-       if (!ethtool_ops->get_ringparam)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
-               return -EFAULT;
-
-       return ethtool_ops->set_ringparam(dev, &ringparam);
-}
-
-static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
-
-       if (!ethtool_ops->get_pauseparam)
-               return -EOPNOTSUPP;
-
-       ethtool_ops->get_pauseparam(dev, &pauseparam);
-
-       if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
-               return -EFAULT;
-       return 0;
-}
-
-static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_pauseparam pauseparam;
-
-       if (!ethtool_ops->get_pauseparam)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
-               return -EFAULT;
-
-       return ethtool_ops->set_pauseparam(dev, &pauseparam);
-}
-
-static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_value edata = { ETHTOOL_GRXCSUM };
-
-       if (!ethtool_ops->get_rx_csum)
-               return -EOPNOTSUPP;
-
-       edata.data = ethtool_ops->get_rx_csum(dev);
-
-       if (copy_to_user(useraddr, &edata, sizeof(edata)))
-               return -EFAULT;
-       return 0;
-}
-
-static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_value edata;
-
-       if (!ethtool_ops->set_rx_csum)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&edata, useraddr, sizeof(edata)))
-               return -EFAULT;
-
-       ethtool_ops->set_rx_csum(dev, edata.data);
-       return 0;
-}
-
-static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_value edata = { ETHTOOL_GTXCSUM };
-
-       if (!ethtool_ops->get_tx_csum)
-               return -EOPNOTSUPP;
-
-       edata.data = ethtool_ops->get_tx_csum(dev);
-
-       if (copy_to_user(useraddr, &edata, sizeof(edata)))
-               return -EFAULT;
-       return 0;
-}
-
-static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_value edata;
-
-       if (!ethtool_ops->set_tx_csum)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&edata, useraddr, sizeof(edata)))
-               return -EFAULT;
-
-       return ethtool_ops->set_tx_csum(dev, edata.data);
-}
-
-static int ethtool_get_sg(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_value edata = { ETHTOOL_GSG };
-
-       if (!ethtool_ops->get_sg)
-               return -EOPNOTSUPP;
-
-       edata.data = ethtool_ops->get_sg(dev);
-
-       if (copy_to_user(useraddr, &edata, sizeof(edata)))
-               return -EFAULT;
-       return 0;
-}
-
-static int ethtool_set_sg(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_value edata;
-
-       if (!ethtool_ops->set_sg)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&edata, useraddr, sizeof(edata)))
-               return -EFAULT;
-
-       return ethtool_ops->set_sg(dev, edata.data);
-}
-
-static int ethtool_get_tso(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_value edata = { ETHTOOL_GTSO };
-
-       if (!ethtool_ops->get_tso)
-               return -EOPNOTSUPP;
-
-       edata.data = ethtool_ops->get_tso(dev);
-
-       if (copy_to_user(useraddr, &edata, sizeof(edata)))
-               return -EFAULT;
-       return 0;
-}
-
-static int ethtool_set_tso(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_value edata;
-
-       if (!ethtool_ops->set_tso)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&edata, useraddr, sizeof(edata)))
-               return -EFAULT;
-
-       return ethtool_ops->set_tso(dev, edata.data);
-}
-
-static int ethtool_self_test(struct net_device *dev, char *useraddr)
-{
-       struct ethtool_test test;
-       struct ethtool_ops *ops = ethtool_ops;
-       u64 *data;
-       int ret;
-
-       if (!ops->self_test || !ops->self_test_count)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&test, useraddr, sizeof(test)))
-               return -EFAULT;
-
-       test.len = ops->self_test_count(dev);
-       data = kmalloc(test.len * sizeof(u64), GFP_USER);
-       if (!data)
-               return -ENOMEM;
-
-       ops->self_test(dev, &test, data);
-
-       ret = -EFAULT;
-       if (copy_to_user(useraddr, &test, sizeof(test)))
-               goto out;
-       useraddr += sizeof(test);
-       if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
-               goto out;
-       ret = 0;
-
-out:
-       kfree(data);
-       return ret;
-}
-
-static int ethtool_get_strings(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_gstrings gstrings;
-       struct ethtool_ops *ops = ethtool_ops;
-       u8 *data;
-       int ret;
-
-       if (!ops->get_strings)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
-               return -EFAULT;
-
-       switch (gstrings.string_set) {
-       case ETH_SS_TEST:
-               if (!ops->self_test_count)
-                       return -EOPNOTSUPP;
-               gstrings.len = ops->self_test_count(dev);
-               break;
-       case ETH_SS_STATS:
-               if (!ops->get_stats_count)
-                       return -EOPNOTSUPP;
-               gstrings.len = ops->get_stats_count(dev);
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
-       if (!data)
-               return -ENOMEM;
-
-       ops->get_strings(dev, gstrings.string_set, data);
-
-       ret = -EFAULT;
-       if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
-               goto out;
-       useraddr += sizeof(gstrings);
-       if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
-               goto out;
-       ret = 0;
-
-out:
-       kfree(data);
-       return ret;
-}
-
-static int ethtool_phys_id(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_value id;
-
-       if (!ethtool_ops->phys_id)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&id, useraddr, sizeof(id)))
-               return -EFAULT;
-
-       return ethtool_ops->phys_id(dev, id.data);
-}
-
-static int ethtool_get_stats(struct net_device *dev, void *useraddr)
-{
-       struct ethtool_stats stats;
-       struct ethtool_ops *ops = ethtool_ops;
-       u64 *data;
-       int ret;
-
-       if (!ops->get_ethtool_stats || !ops->get_stats_count)
-               return -EOPNOTSUPP;
-
-       if (copy_from_user(&stats, useraddr, sizeof(stats)))
-               return -EFAULT;
-
-       stats.n_stats = ops->get_stats_count(dev);
-       data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
-       if (!data)
-               return -ENOMEM;
-
-       ops->get_ethtool_stats(dev, &stats, data);
-
-       ret = -EFAULT;
-       if (copy_to_user(useraddr, &stats, sizeof(stats)))
-               goto out;
-       useraddr += sizeof(stats);
-       if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
-               goto out;
-       ret = 0;
-
-out:
-       kfree(data);
-       return ret;
-}
-
-/* The main entry point in this file.  Called from net/core/dev.c */
-
-#define ETHTOOL_OPS_COMPAT
-int ethtool_ioctl(struct ifreq *ifr)
-{
-       struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
-       void *useraddr = (void *) ifr->ifr_data;
-       u32 ethcmd;
-
-       /*
-        * XXX: This can be pushed down into the ethtool_* handlers that
-        * need it.  Keep existing behavior for the moment.
-        */
-       if (!capable(CAP_NET_ADMIN))
-               return -EPERM;
-
-       if (!dev || !netif_device_present(dev))
-               return -ENODEV;
-
-       if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
-               return -EFAULT;
-
-       switch (ethcmd) {
-       case ETHTOOL_GSET:
-               return ethtool_get_settings(dev, useraddr);
-       case ETHTOOL_SSET:
-               return ethtool_set_settings(dev, useraddr);
-       case ETHTOOL_GDRVINFO:
-               return ethtool_get_drvinfo(dev, useraddr);
-       case ETHTOOL_GREGS:
-               return ethtool_get_regs(dev, useraddr);
-       case ETHTOOL_GWOL:
-               return ethtool_get_wol(dev, useraddr);
-       case ETHTOOL_SWOL:
-               return ethtool_set_wol(dev, useraddr);
-       case ETHTOOL_GMSGLVL:
-               return ethtool_get_msglevel(dev, useraddr);
-       case ETHTOOL_SMSGLVL:
-               return ethtool_set_msglevel(dev, useraddr);
-       case ETHTOOL_NWAY_RST:
-               return ethtool_nway_reset(dev);
-       case ETHTOOL_GLINK:
-               return ethtool_get_link(dev, useraddr);
-       case ETHTOOL_GEEPROM:
-               return ethtool_get_eeprom(dev, useraddr);
-       case ETHTOOL_SEEPROM:
-               return ethtool_set_eeprom(dev, useraddr);
-       case ETHTOOL_GCOALESCE:
-               return ethtool_get_coalesce(dev, useraddr);
-       case ETHTOOL_SCOALESCE:
-               return ethtool_set_coalesce(dev, useraddr);
-       case ETHTOOL_GRINGPARAM:
-               return ethtool_get_ringparam(dev, useraddr);
-       case ETHTOOL_SRINGPARAM:
-               return ethtool_set_ringparam(dev, useraddr);
-       case ETHTOOL_GPAUSEPARAM:
-               return ethtool_get_pauseparam(dev, useraddr);
-       case ETHTOOL_SPAUSEPARAM:
-               return ethtool_set_pauseparam(dev, useraddr);
-       case ETHTOOL_GRXCSUM:
-               return ethtool_get_rx_csum(dev, useraddr);
-       case ETHTOOL_SRXCSUM:
-               return ethtool_set_rx_csum(dev, useraddr);
-       case ETHTOOL_GTXCSUM:
-               return ethtool_get_tx_csum(dev, useraddr);
-       case ETHTOOL_STXCSUM:
-               return ethtool_set_tx_csum(dev, useraddr);
-       case ETHTOOL_GSG:
-               return ethtool_get_sg(dev, useraddr);
-       case ETHTOOL_SSG:
-               return ethtool_set_sg(dev, useraddr);
-       case ETHTOOL_GTSO:
-               return ethtool_get_tso(dev, useraddr);
-       case ETHTOOL_STSO:
-               return ethtool_set_tso(dev, useraddr);
-       case ETHTOOL_TEST:
-               return ethtool_self_test(dev, useraddr);
-       case ETHTOOL_GSTRINGS:
-               return ethtool_get_strings(dev, useraddr);
-       case ETHTOOL_PHYS_ID:
-               return ethtool_phys_id(dev, useraddr);
-       case ETHTOOL_GSTATS:
-               return ethtool_get_stats(dev, useraddr);
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       return -EOPNOTSUPP;
-}
-
-#define mii_if_info _kc_mii_if_info
-struct _kc_mii_if_info {
-       int phy_id;
-       int advertising;
-       int phy_id_mask;
-       int reg_num_mask;
-
-       unsigned int full_duplex : 1;   /* is full duplex? */
-       unsigned int force_media : 1;   /* is autoneg. disabled? */
-
-       struct net_device *dev;
-       int (*mdio_read) (struct net_device *dev, int phy_id, int location);
-       void (*mdio_write) (struct net_device *dev, int phy_id, int location, int val);
-};
-
-struct ethtool_cmd;
-struct mii_ioctl_data;
-
-#undef mii_link_ok
-#define mii_link_ok _kc_mii_link_ok
-#undef mii_nway_restart
-#define mii_nway_restart _kc_mii_nway_restart
-#undef mii_ethtool_gset
-#define mii_ethtool_gset _kc_mii_ethtool_gset
-#undef mii_ethtool_sset
-#define mii_ethtool_sset _kc_mii_ethtool_sset
-#undef mii_check_link
-#define mii_check_link _kc_mii_check_link
-extern int _kc_mii_link_ok (struct mii_if_info *mii);
-extern int _kc_mii_nway_restart (struct mii_if_info *mii);
-extern int _kc_mii_ethtool_gset(struct mii_if_info *mii,
-                                struct ethtool_cmd *ecmd);
-extern int _kc_mii_ethtool_sset(struct mii_if_info *mii,
-                                struct ethtool_cmd *ecmd);
-extern void _kc_mii_check_link (struct mii_if_info *mii);
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) )
-#undef generic_mii_ioctl
-#define generic_mii_ioctl _kc_generic_mii_ioctl
-extern int _kc_generic_mii_ioctl(struct mii_if_info *mii_if,
-                                 struct mii_ioctl_data *mii_data, int cmd,
-                                 unsigned int *duplex_changed);
-#endif /* > 2.4.6 */
-
-
-struct _kc_pci_dev_ext {
-       struct pci_dev *dev;
-       void *pci_drvdata;
-       struct pci_driver *driver;
-};
-
-struct _kc_net_dev_ext {
-       struct net_device *dev;
-       unsigned int carrier;
-};
-
-
-/**************************************/
-/* mii support */
-
-int _kc_mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
-{
-       struct net_device *dev = mii->dev;
-       u32 advert, bmcr, lpa, nego;
-
-       ecmd->supported =
-           (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
-            SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
-            SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII);
-
-       /* only supports twisted-pair */
-       ecmd->port = PORT_MII;
-
-       /* only supports internal transceiver */
-       ecmd->transceiver = XCVR_INTERNAL;
-
-       /* this isn't fully supported at higher layers */
-       ecmd->phy_address = mii->phy_id;
-
-       ecmd->advertising = ADVERTISED_TP | ADVERTISED_MII;
-       advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE);
-       if (advert & ADVERTISE_10HALF)
-               ecmd->advertising |= ADVERTISED_10baseT_Half;
-       if (advert & ADVERTISE_10FULL)
-               ecmd->advertising |= ADVERTISED_10baseT_Full;
-       if (advert & ADVERTISE_100HALF)
-               ecmd->advertising |= ADVERTISED_100baseT_Half;
-       if (advert & ADVERTISE_100FULL)
-               ecmd->advertising |= ADVERTISED_100baseT_Full;
-
-       bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
-       lpa = mii->mdio_read(dev, mii->phy_id, MII_LPA);
-       if (bmcr & BMCR_ANENABLE) {
-               ecmd->advertising |= ADVERTISED_Autoneg;
-               ecmd->autoneg = AUTONEG_ENABLE;
-
-               nego = mii_nway_result(advert & lpa);
-               if (nego == LPA_100FULL || nego == LPA_100HALF)
-                       ecmd->speed = SPEED_100;
-               else
-                       ecmd->speed = SPEED_10;
-               if (nego == LPA_100FULL || nego == LPA_10FULL) {
-                       ecmd->duplex = DUPLEX_FULL;
-                       mii->full_duplex = 1;
-               } else {
-                       ecmd->duplex = DUPLEX_HALF;
-                       mii->full_duplex = 0;
-               }
-       } else {
-               ecmd->autoneg = AUTONEG_DISABLE;
-
-               ecmd->speed = (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10;
-               ecmd->duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF;
-       }
-
-       /* ignore maxtxpkt, maxrxpkt for now */
-
-       return 0;
-}
-
-int _kc_mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
-{
-       struct net_device *dev = mii->dev;
-
-       if (ecmd->speed != SPEED_10 && ecmd->speed != SPEED_100)
-               return -EINVAL;
-       if (ecmd->duplex != DUPLEX_HALF && ecmd->duplex != DUPLEX_FULL)
-               return -EINVAL;
-       if (ecmd->port != PORT_MII)
-               return -EINVAL;
-       if (ecmd->transceiver != XCVR_INTERNAL)
-               return -EINVAL;
-       if (ecmd->phy_address != mii->phy_id)
-               return -EINVAL;
-       if (ecmd->autoneg != AUTONEG_DISABLE && ecmd->autoneg != AUTONEG_ENABLE)
-               return -EINVAL;
-
-       /* ignore supported, maxtxpkt, maxrxpkt */
-
-       if (ecmd->autoneg == AUTONEG_ENABLE) {
-               u32 bmcr, advert, tmp;
-
-               if ((ecmd->advertising & (ADVERTISED_10baseT_Half |
-                                         ADVERTISED_10baseT_Full |
-                                         ADVERTISED_100baseT_Half |
-                                         ADVERTISED_100baseT_Full)) == 0)
-                       return -EINVAL;
-
-               /* advertise only what has been requested */
-               advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE);
-               tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4);
-               if (ADVERTISED_10baseT_Half)
-                       tmp |= ADVERTISE_10HALF;
-               if (ADVERTISED_10baseT_Full)
-                       tmp |= ADVERTISE_10FULL;
-               if (ADVERTISED_100baseT_Half)
-                       tmp |= ADVERTISE_100HALF;
-               if (ADVERTISED_100baseT_Full)
-                       tmp |= ADVERTISE_100FULL;
-               if (advert != tmp) {
-                       mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp);
-                       mii->advertising = tmp;
-               }
-
-               /* turn on autonegotiation, and force a renegotiate */
-               bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
-               bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
-               mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr);
-
-               mii->force_media = 0;
-       } else {
-               u32 bmcr, tmp;
-
-               /* turn off auto negotiation, set speed and duplexity */
-               bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
-               tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 | BMCR_FULLDPLX);
-               if (ecmd->speed == SPEED_100)
-                       tmp |= BMCR_SPEED100;
-               if (ecmd->duplex == DUPLEX_FULL) {
-                       tmp |= BMCR_FULLDPLX;
-                       mii->full_duplex = 1;
-               } else
-                       mii->full_duplex = 0;
-               if (bmcr != tmp)
-                       mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp);
-
-               mii->force_media = 1;
-       }
-       return 0;
-}
-
-int _kc_mii_link_ok (struct mii_if_info *mii)
-{
-       /* first, a dummy read, needed to latch some MII phys */
-       mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR);
-       if (mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR) & BMSR_LSTATUS)
-               return 1;
-       return 0;
-}
-
-int _kc_mii_nway_restart (struct mii_if_info *mii)
-{
-       int bmcr;
-       int r = -EINVAL;
-
-       /* if autoneg is off, it's an error */
-       bmcr = mii->mdio_read(mii->dev, mii->phy_id, MII_BMCR);
-
-       if (bmcr & BMCR_ANENABLE) {
-               bmcr |= BMCR_ANRESTART;
-               mii->mdio_write(mii->dev, mii->phy_id, MII_BMCR, bmcr);
-               r = 0;
-       }
-
-       return r;
-}
-
-void _kc_mii_check_link (struct mii_if_info *mii)
-{
-       int cur_link = mii_link_ok(mii);
-       int prev_link = netif_carrier_ok(mii->dev);
-
-       if (cur_link && !prev_link)
-               netif_carrier_on(mii->dev);
-       else if (prev_link && !cur_link)
-               netif_carrier_off(mii->dev);
-}
-
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) )
-int _kc_generic_mii_ioctl(struct mii_if_info *mii_if,
-                          struct mii_ioctl_data *mii_data, int cmd,
-                          unsigned int *duplex_chg_out)
-{
-       int rc = 0;
-       unsigned int duplex_changed = 0;
-
-       if (duplex_chg_out)
-               *duplex_chg_out = 0;
-
-       mii_data->phy_id &= mii_if->phy_id_mask;
-       mii_data->reg_num &= mii_if->reg_num_mask;
-
-       switch(cmd) {
-       case SIOCDEVPRIVATE:    /* binary compat, remove in 2.5 */
-       case SIOCGMIIPHY:
-               mii_data->phy_id = mii_if->phy_id;
-               /* fall through */
-
-       case SIOCDEVPRIVATE + 1:/* binary compat, remove in 2.5 */
-       case SIOCGMIIREG:
-               mii_data->val_out =
-                       mii_if->mdio_read(mii_if->dev, mii_data->phy_id,
-                                         mii_data->reg_num);
-               break;
-
-       case SIOCDEVPRIVATE + 2:/* binary compat, remove in 2.5 */
-       case SIOCSMIIREG: {
-               u16 val = mii_data->val_in;
-
-               if (!capable(CAP_NET_ADMIN))
-                       return -EPERM;
-
-               if (mii_data->phy_id == mii_if->phy_id) {
-                       switch(mii_data->reg_num) {
-                       case MII_BMCR: {
-                               unsigned int new_duplex = 0;
-                               if (val & (BMCR_RESET|BMCR_ANENABLE))
-                                       mii_if->force_media = 0;
-                               else
-                                       mii_if->force_media = 1;
-                               if (mii_if->force_media &&
-                                   (val & BMCR_FULLDPLX))
-                                       new_duplex = 1;
-                               if (mii_if->full_duplex != new_duplex) {
-                                       duplex_changed = 1;
-                                       mii_if->full_duplex = new_duplex;
-                               }
-                               break;
-                       }
-                       case MII_ADVERTISE:
-                               mii_if->advertising = val;
-                               break;
-                       default:
-                               /* do nothing */
-                               break;
-                       }
-               }
-
-               mii_if->mdio_write(mii_if->dev, mii_data->phy_id,
-                                  mii_data->reg_num, val);
-               break;
-       }
-
-       default:
-               rc = -EOPNOTSUPP;
-               break;
-       }
-
-       if ((rc == 0) && (duplex_chg_out) && (duplex_changed))
-               *duplex_chg_out = 1;
-
-       return rc;
-}
-#endif /* > 2.4.6 */
index 222c2c7..5941546 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 2401584..e17b7f1 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index c6abb02..00a584f 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index c6f4130..30de47e 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 02be92a..4102440 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index ef7ce62..f00fe79 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index a6ab30d..98b7400 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 93659ca..88b33fa 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 9bd6f53..6ae5926 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index a669045..5e6f9ac 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 11472bd..bc3cb2f 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index cad2862..48f7dcf 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 92fc9fc..d26016c 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
@@ -86,7 +86,7 @@ const char ixgbe_driver_version[] = DRV_VERSION;
  * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
  *   Class, Class Mask, private data (not used) }
  */
-DEFINE_PCI_DEVICE_TABLE(ixgbe_pci_tbl) = {
+const struct pci_device_id ixgbe_pci_tbl[] = {
        {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598)},
        {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)},
        {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT)},
index 124f00d..5ced84f 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index d161600..c6f8e21 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index e3f5275..234fa63 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index bbe5a9e..5ae171a 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
diff --git a/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h b/src/dpdk/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h
deleted file mode 100644 (file)
index 5e3559f..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <[email protected]>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-
-#ifndef _IXGBE_SRIOV_H_
-#define _IXGBE_SRIOV_H_
-
-int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
-                           int entries, u16 *hash_list, u32 vf);
-void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter);
-int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf);
-void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe);
-void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf);
-void ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf);
-void ixgbe_msg_task(struct ixgbe_adapter *adapter);
-int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter,
-                    int vf, unsigned char *mac_addr);
-void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter);
-void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
-#ifdef IFLA_VF_MAX
-int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
-int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
-                         u8 qos);
-int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
-#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
-int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
-#endif
-int ixgbe_ndo_get_vf_config(struct net_device *netdev,
-                           int vf, struct ifla_vf_info *ivi);
-#endif
-void ixgbe_disable_sriov(struct ixgbe_adapter *adapter);
-#ifdef CONFIG_PCI_IOV
-int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask);
-void ixgbe_enable_sriov(struct ixgbe_adapter *adapter);
-#endif
-int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter);
-#ifdef IFLA_VF_MAX
-void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter);
-#endif /* IFLA_VF_MAX */
-void ixgbe_dump_registers(struct ixgbe_adapter *adapter);
-
-/*
- * These are defined in ixgbe_type.h on behalf of the VF driver
- * but we need them here unwrapped for the PF driver.
- */
-#define IXGBE_DEV_ID_82599_VF                  0x10ED
-#define IXGBE_DEV_ID_X540_VF                   0x1515
-
-#endif /* _IXGBE_SRIOV_H_ */
index 6b21c87..bda61fa 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index b99d9e8..2affe24 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 77e8952..38bcc87 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index 5f2523e..d84c7cc 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
index bf27579..4c7a640 100644 (file)
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <[email protected]>
@@ -3140,4 +3140,16 @@ static inline int __kc_pci_vfs_assigned(struct pci_dev *dev)
 #define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops))
 #endif /* >= 3.16.0 */
 
+/*
+ * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4)
+ * For older kernels backported this commit, need to use renamed functions.
+ * This fix is specific to RedHat/CentOS kernels.
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+       RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8) && \
+       LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))
+#define vlan_tx_tag_get skb_vlan_tag_get
+#define vlan_tx_tag_present skb_vlan_tag_present
+#endif
+
 #endif /* _KCOMPAT_H_ */
index a0e5cb6..58cbadd 100644 (file)
 #ifndef _KNI_DEV_H_
 #define _KNI_DEV_H_
 
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/if.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <exec-env/rte_kni_common.h>
 #define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
 
+#define MBUF_BURST_SZ 32
+
 /**
  * A structure describing the private information for a kni device.
  */
-
 struct kni_dev {
        /* kni list */
        struct list_head list;
@@ -50,7 +56,7 @@ struct kni_dev {
        struct net_device_stats stats;
        int status;
        uint16_t group_id;           /* Group ID of a group of KNI devices */
-       unsigned core_id;            /* Core ID to bind */
+       uint32_t core_id;            /* Core ID to bind */
        char name[RTE_KNI_NAMESIZE]; /* Network device name */
        struct task_struct *pthread;
 
@@ -84,38 +90,36 @@ struct kni_dev {
        /* response queue */
        void *resp_q;
 
-       void * sync_kva;
+       void *sync_kva;
        void *sync_va;
 
        void *mbuf_kva;
        void *mbuf_va;
 
        /* mbuf size */
-       unsigned mbuf_size;
+       uint32_t mbuf_size;
 
        /* synchro for request processing */
        unsigned long synchro;
 
 #ifdef RTE_KNI_VHOST
-       struct kni_vhost_queue* vhost_queue;
+       struct kni_vhost_queue *vhost_queue;
+
        volatile enum {
                BE_STOP = 0x1,
                BE_START = 0x2,
                BE_FINISH = 0x4,
-       }vq_status;
+       } vq_status;
 #endif
+       /* buffers */
+       void *pa[MBUF_BURST_SZ];
+       void *va[MBUF_BURST_SZ];
+       void *alloc_pa[MBUF_BURST_SZ];
+       void *alloc_va[MBUF_BURST_SZ];
 };
 
-#define KNI_ERR(args...) printk(KERN_DEBUG "KNI: Error: " args)
-#define KNI_PRINT(args...) printk(KERN_DEBUG "KNI: " args)
-#ifdef RTE_KNI_KO_DEBUG
-       #define KNI_DBG(args...) printk(KERN_DEBUG "KNI: " args)
-#else
-       #define KNI_DBG(args...)
-#endif
-
 #ifdef RTE_KNI_VHOST
-unsigned int
+uint32_t
 kni_poll(struct file *file, struct socket *sock, poll_table * wait);
 int kni_chk_vhost_rx(struct kni_dev *kni);
 int kni_vhost_init(struct kni_dev *kni);
@@ -127,23 +131,22 @@ struct kni_vhost_queue {
        int vnet_hdr_sz;
        struct kni_dev *kni;
        int sockfd;
-       unsigned int flags;
-       struct sk_buffcache;
-       struct rte_kni_fifofifo;
+       uint32_t flags;
+       struct sk_buff *cache;
+       struct rte_kni_fifo *fifo;
 };
 
 #endif
 
-#ifdef RTE_KNI_VHOST_DEBUG_RX
-       #define KNI_DBG_RX(args...) printk(KERN_DEBUG "KNI RX: " args)
-#else
-       #define KNI_DBG_RX(args...)
-#endif
+void kni_net_rx(struct kni_dev *kni);
+void kni_net_init(struct net_device *dev);
+void kni_net_config_lo_mode(char *lo_str);
+void kni_net_poll_resp(struct kni_dev *kni);
+void kni_set_ethtool_ops(struct net_device *netdev);
 
-#ifdef RTE_KNI_VHOST_DEBUG_TX
-       #define KNI_DBG_TX(args...) printk(KERN_DEBUG "KNI TX: " args)
-#else
-       #define KNI_DBG_TX(args...)
-#endif
+int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
+void ixgbe_kni_remove(struct pci_dev *pdev);
+int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
+void igb_kni_remove(struct pci_dev *pdev);
 
 #endif
index 06b6d46..0c88589 100644 (file)
@@ -31,6 +31,7 @@ static int
 kni_check_if_running(struct net_device *dev)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        if (priv->lad_dev)
                return 0;
        else
@@ -41,6 +42,7 @@ static void
 kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info);
 }
 
@@ -48,6 +50,7 @@ static int
 kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd);
 }
 
@@ -55,6 +58,7 @@ static int
 kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd);
 }
 
@@ -62,6 +66,7 @@ static void
 kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        priv->lad_dev->ethtool_ops->get_wol(priv->lad_dev, wol);
 }
 
@@ -69,6 +74,7 @@ static int
 kni_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        return priv->lad_dev->ethtool_ops->set_wol(priv->lad_dev, wol);
 }
 
@@ -76,6 +82,7 @@ static int
 kni_nway_reset(struct net_device *dev)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        return priv->lad_dev->ethtool_ops->nway_reset(priv->lad_dev);
 }
 
@@ -83,6 +90,7 @@ static int
 kni_get_eeprom_len(struct net_device *dev)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        return priv->lad_dev->ethtool_ops->get_eeprom_len(priv->lad_dev);
 }
 
@@ -91,6 +99,7 @@ kni_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
                                                        u8 *bytes)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        return priv->lad_dev->ethtool_ops->get_eeprom(priv->lad_dev, eeprom,
                                                                bytes);
 }
@@ -100,6 +109,7 @@ kni_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
                                                        u8 *bytes)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        return priv->lad_dev->ethtool_ops->set_eeprom(priv->lad_dev, eeprom,
                                                                bytes);
 }
@@ -108,6 +118,7 @@ static void
 kni_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        priv->lad_dev->ethtool_ops->get_ringparam(priv->lad_dev, ring);
 }
 
@@ -115,6 +126,7 @@ static int
 kni_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        return priv->lad_dev->ethtool_ops->set_ringparam(priv->lad_dev, ring);
 }
 
@@ -122,6 +134,7 @@ static void
 kni_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        priv->lad_dev->ethtool_ops->get_pauseparam(priv->lad_dev, pause);
 }
 
@@ -129,6 +142,7 @@ static int
 kni_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        return priv->lad_dev->ethtool_ops->set_pauseparam(priv->lad_dev,
                                                                pause);
 }
@@ -137,6 +151,7 @@ static u32
 kni_get_msglevel(struct net_device *dev)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        return priv->lad_dev->ethtool_ops->get_msglevel(priv->lad_dev);
 }
 
@@ -144,6 +159,7 @@ static void
 kni_set_msglevel(struct net_device *dev, u32 data)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        priv->lad_dev->ethtool_ops->set_msglevel(priv->lad_dev, data);
 }
 
@@ -151,6 +167,7 @@ static int
 kni_get_regs_len(struct net_device *dev)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        return priv->lad_dev->ethtool_ops->get_regs_len(priv->lad_dev);
 }
 
@@ -158,6 +175,7 @@ static void
 kni_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        priv->lad_dev->ethtool_ops->get_regs(priv->lad_dev, regs, p);
 }
 
@@ -165,6 +183,7 @@ static void
 kni_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        priv->lad_dev->ethtool_ops->get_strings(priv->lad_dev, stringset,
                                                                data);
 }
@@ -173,6 +192,7 @@ static int
 kni_get_sset_count(struct net_device *dev, int sset)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        return priv->lad_dev->ethtool_ops->get_sset_count(priv->lad_dev, sset);
 }
 
@@ -181,24 +201,25 @@ kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats,
                                                                u64 *data)
 {
        struct kni_dev *priv = netdev_priv(dev);
+
        priv->lad_dev->ethtool_ops->get_ethtool_stats(priv->lad_dev, stats,
                                                                data);
 }
 
 struct ethtool_ops kni_ethtool_ops = {
-       .begin                          = kni_check_if_running,
+       .begin                  = kni_check_if_running,
        .get_drvinfo            = kni_get_drvinfo,
        .get_settings           = kni_get_settings,
        .set_settings           = kni_set_settings,
        .get_regs_len           = kni_get_regs_len,
-       .get_regs                       = kni_get_regs,
-       .get_wol                        = kni_get_wol,
-       .set_wol                        = kni_set_wol,
-       .nway_reset                     = kni_nway_reset,
-       .get_link                       = ethtool_op_get_link,
+       .get_regs               = kni_get_regs,
+       .get_wol                = kni_get_wol,
+       .set_wol                = kni_set_wol,
+       .nway_reset             = kni_nway_reset,
+       .get_link               = ethtool_op_get_link,
        .get_eeprom_len         = kni_get_eeprom_len,
-       .get_eeprom                     = kni_get_eeprom,
-       .set_eeprom                     = kni_set_eeprom,
+       .get_eeprom             = kni_get_eeprom,
+       .set_eeprom             = kni_set_eeprom,
        .get_ringparam          = kni_get_ringparam,
        .set_ringparam          = kni_set_ringparam,
        .get_pauseparam         = kni_get_pauseparam,
@@ -207,7 +228,7 @@ struct ethtool_ops kni_ethtool_ops = {
        .set_msglevel           = kni_set_msglevel,
        .get_strings            = kni_get_strings,
        .get_sset_count         = kni_get_sset_count,
-       .get_ethtool_stats  = kni_get_ethtool_stats,
+       .get_ethtool_stats      = kni_get_ethtool_stats,
 };
 
 void
index 3ea750e..025ec1c 100644 (file)
 /**
  * Adds num elements into the fifo. Return the number actually written
  */
-static inline unsigned
-kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
+static inline uint32_t
+kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num)
 {
-       unsigned i = 0;
-       unsigned fifo_write = fifo->write;
-       unsigned fifo_read = fifo->read;
-       unsigned new_write = fifo_write;
+       uint32_t i = 0;
+       uint32_t fifo_write = fifo->write;
+       uint32_t fifo_read = fifo->read;
+       uint32_t new_write = fifo_write;
 
        for (i = 0; i < num; i++) {
                new_write = (new_write + 1) & (fifo->len - 1);
@@ -54,12 +54,12 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
 /**
  * Get up to num elements from the fifo. Return the number actully read
  */
-static inline unsigned
-kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
+static inline uint32_t
+kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num)
 {
-       unsigned i = 0;
-       unsigned new_read = fifo->read;
-       unsigned fifo_write = fifo->write;
+       uint32_t i = 0;
+       uint32_t new_read = fifo->read;
+       uint32_t fifo_write = fifo->write;
 
        for (i = 0; i < num; i++) {
                if (new_read == fifo_write)
@@ -76,16 +76,16 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
 /**
  * Get the num of elements in the fifo
  */
-static inline unsigned
+static inline uint32_t
 kni_fifo_count(struct rte_kni_fifo *fifo)
 {
-       return (fifo->len + fifo->write - fifo->read) & ( fifo->len - 1);
+       return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1);
 }
 
 /**
  * Get the num of available elements in the fifo
  */
-static inline unsigned
+static inline uint32_t
 kni_fifo_free_count(struct rte_kni_fifo *fifo)
 {
        return (fifo->read - fifo->write - 1) & (fifo->len - 1);
@@ -96,7 +96,7 @@ kni_fifo_free_count(struct rte_kni_fifo *fifo)
  * Initializes the kni fifo structure
  */
 static inline void
-kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size)
+kni_fifo_init(struct rte_kni_fifo *fifo, uint32_t size)
 {
        fifo->write = 0;
        fifo->read = 0;
index 59d15ca..33b61f2 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/pci.h>
 #include <linux/kthread.h>
 #include <linux/rwsem.h>
+#include <linux/mutex.h>
 #include <linux/nsproxy.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
@@ -47,52 +48,15 @@ MODULE_DESCRIPTION("Kernel Module for managing kni devices");
 
 #define KNI_MAX_DEVICES 32
 
-extern void kni_net_rx(struct kni_dev *kni);
-extern void kni_net_init(struct net_device *dev);
-extern void kni_net_config_lo_mode(char *lo_str);
-extern void kni_net_poll_resp(struct kni_dev *kni);
-extern void kni_set_ethtool_ops(struct net_device *netdev);
-
-extern int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
-extern void ixgbe_kni_remove(struct pci_dev *pdev);
-extern int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
-extern void igb_kni_remove(struct pci_dev *pdev);
-
-static int kni_open(struct inode *inode, struct file *file);
-static int kni_release(struct inode *inode, struct file *file);
-static int kni_ioctl(struct inode *inode, unsigned int ioctl_num,
-                                       unsigned long ioctl_param);
-static int kni_compat_ioctl(struct inode *inode, unsigned int ioctl_num,
-                                               unsigned long ioctl_param);
-static int kni_dev_remove(struct kni_dev *dev);
-
-static int __init kni_parse_kthread_mode(void);
-
-/* KNI processing for single kernel thread mode */
-static int kni_thread_single(void *unused);
-/* KNI processing for multiple kernel thread mode */
-static int kni_thread_multiple(void *param);
-
-static struct file_operations kni_fops = {
-       .owner = THIS_MODULE,
-       .open = kni_open,
-       .release = kni_release,
-       .unlocked_ioctl = (void *)kni_ioctl,
-       .compat_ioctl = (void *)kni_compat_ioctl,
-};
-
-static struct miscdevice kni_misc = {
-       .minor = MISC_DYNAMIC_MINOR,
-       .name = KNI_DEVICE,
-       .fops = &kni_fops,
-};
+extern const struct pci_device_id ixgbe_pci_tbl[];
+extern const struct pci_device_id igb_pci_tbl[];
 
 /* loopback mode */
-static char *lo_mode = NULL;
+static char *lo_mode;
 
 /* Kernel thread mode */
-static char *kthread_mode = NULL;
-static unsigned multiple_kthread_on = 0;
+static char *kthread_mode;
+static uint32_t multiple_kthread_on;
 
 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
 
@@ -100,20 +64,24 @@ static int kni_net_id;
 
 struct kni_net {
        unsigned long device_in_use; /* device in use flag */
+       struct mutex kni_kthread_lock;
        struct task_struct *kni_kthread;
        struct rw_semaphore kni_list_lock;
        struct list_head kni_list_head;
 };
 
-static int __net_init kni_init_net(struct net *net)
+static int __net_init
+kni_init_net(struct net *net)
 {
 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
        struct kni_net *knet = net_generic(net, kni_net_id);
+
+       memset(knet, 0, sizeof(*knet));
 #else
        struct kni_net *knet;
        int ret;
 
-       knet = kmalloc(sizeof(struct kni_net), GFP_KERNEL);
+       knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
        if (!knet) {
                ret = -ENOMEM;
                return ret;
@@ -123,6 +91,8 @@ static int __net_init kni_init_net(struct net *net)
        /* Clear the bit of device in use */
        clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
 
+       mutex_init(&knet->kni_kthread_lock);
+
        init_rwsem(&knet->kni_list_lock);
        INIT_LIST_HEAD(&knet->kni_list_head);
 
@@ -137,11 +107,15 @@ static int __net_init kni_init_net(struct net *net)
 #endif
 }
 
-static void __net_exit kni_exit_net(struct net *net)
+static void __net_exit
+kni_exit_net(struct net *net)
 {
-#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-       struct kni_net *knet = net_generic(net, kni_net_id);
+       struct kni_net *knet __maybe_unused;
+
+       knet = net_generic(net, kni_net_id);
+       mutex_destroy(&knet->kni_kthread_lock);
 
+#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
        kfree(knet);
 #endif
 }
@@ -155,72 +129,56 @@ static struct pernet_operations kni_net_ops = {
 #endif
 };
 
-static int __init
-kni_init(void)
+static int
+kni_thread_single(void *data)
 {
-       int rc;
-
-       KNI_PRINT("######## DPDK kni module loading ########\n");
-
-       if (kni_parse_kthread_mode() < 0) {
-               KNI_ERR("Invalid parameter for kthread_mode\n");
-               return -EINVAL;
-       }
+       struct kni_net *knet = data;
+       int j;
+       struct kni_dev *dev;
 
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-       rc = register_pernet_subsys(&kni_net_ops);
+       while (!kthread_should_stop()) {
+               down_read(&knet->kni_list_lock);
+               for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+                       list_for_each_entry(dev, &knet->kni_list_head, list) {
+#ifdef RTE_KNI_VHOST
+                               kni_chk_vhost_rx(dev);
 #else
-       rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+                               kni_net_rx(dev);
+#endif
+                               kni_net_poll_resp(dev);
+                       }
+               }
+               up_read(&knet->kni_list_lock);
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+               /* reschedule out for a while */
+               schedule_timeout_interruptible(
+                       usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
 #endif
-       if (rc)
-               return -EPERM;
-
-       rc = misc_register(&kni_misc);
-       if (rc != 0) {
-               KNI_ERR("Misc registration failed\n");
-               goto out;
        }
 
-       /* Configure the lo mode according to the input parameter */
-       kni_net_config_lo_mode(lo_mode);
-
-       KNI_PRINT("######## DPDK kni module loaded  ########\n");
-
        return 0;
-
-out:
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-       unregister_pernet_subsys(&kni_net_ops);
-#else
-       register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
-#endif
-       return rc;
 }
 
-static void __exit
-kni_exit(void)
+static int
+kni_thread_multiple(void *param)
 {
-       misc_deregister(&kni_misc);
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-       unregister_pernet_subsys(&kni_net_ops);
+       int j;
+       struct kni_dev *dev = (struct kni_dev *)param;
+
+       while (!kthread_should_stop()) {
+               for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+#ifdef RTE_KNI_VHOST
+                       kni_chk_vhost_rx(dev);
 #else
-       register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+                       kni_net_rx(dev);
 #endif
-       KNI_PRINT("####### DPDK kni module unloaded  #######\n");
-}
-
-static int __init
-kni_parse_kthread_mode(void)
-{
-       if (!kthread_mode)
-               return 0;
-
-       if (strcmp(kthread_mode, "single") == 0)
-               return 0;
-       else if (strcmp(kthread_mode, "multiple") == 0)
-               multiple_kthread_on = 1;
-       else
-               return -1;
+                       kni_net_poll_resp(dev);
+               }
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+               schedule_timeout_interruptible(
+                       usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+       }
 
        return 0;
 }
@@ -235,21 +193,31 @@ kni_open(struct inode *inode, struct file *file)
        if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
                return -EBUSY;
 
-       /* Create kernel thread for single mode */
-       if (multiple_kthread_on == 0) {
-               KNI_PRINT("Single kernel thread for all KNI devices\n");
-               /* Create kernel thread for RX */
-               knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
-                                               "kni_single");
-               if (IS_ERR(knet->kni_kthread)) {
-                       KNI_ERR("Unable to create kernel threaed\n");
-                       return PTR_ERR(knet->kni_kthread);
-               }
-       } else
-               KNI_PRINT("Multiple kernel thread mode enabled\n");
-
        file->private_data = get_net(net);
-       KNI_PRINT("/dev/kni opened\n");
+       pr_debug("/dev/kni opened\n");
+
+       return 0;
+}
+
+static int
+kni_dev_remove(struct kni_dev *dev)
+{
+       if (!dev)
+               return -ENODEV;
+
+#ifdef CONFIG_RTE_KNI_KMOD_ETHTOOL
+       if (dev->pci_dev) {
+               if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev))
+                       ixgbe_kni_remove(dev->pci_dev);
+               else if (pci_match_id(igb_pci_tbl, dev->pci_dev))
+                       igb_kni_remove(dev->pci_dev);
+       }
+#endif
+
+       if (dev->net_dev) {
+               unregister_netdev(dev->net_dev);
+               free_netdev(dev->net_dev);
+       }
 
        return 0;
 }
@@ -263,9 +231,13 @@ kni_release(struct inode *inode, struct file *file)
 
        /* Stop kernel thread for single mode */
        if (multiple_kthread_on == 0) {
+               mutex_lock(&knet->kni_kthread_lock);
                /* Stop kernel thread */
-               kthread_stop(knet->kni_kthread);
-               knet->kni_kthread = NULL;
+               if (knet->kni_kthread != NULL) {
+                       kthread_stop(knet->kni_kthread);
+                       knet->kni_kthread = NULL;
+               }
+               mutex_unlock(&knet->kni_kthread_lock);
        }
 
        down_write(&knet->kni_list_lock);
@@ -288,121 +260,78 @@ kni_release(struct inode *inode, struct file *file)
        clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
 
        put_net(net);
-       KNI_PRINT("/dev/kni closed\n");
+       pr_debug("/dev/kni closed\n");
 
        return 0;
 }
 
 static int
-kni_thread_single(void *data)
+kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
 {
-       struct kni_net *knet = data;
-       int j;
-       struct kni_dev *dev;
+       if (!kni || !dev)
+               return -1;
 
-       while (!kthread_should_stop()) {
-               down_read(&knet->kni_list_lock);
-               for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
-                       list_for_each_entry(dev, &knet->kni_list_head, list) {
-#ifdef RTE_KNI_VHOST
-                               kni_chk_vhost_rx(dev);
-#else
-                               kni_net_rx(dev);
-#endif
-                               kni_net_poll_resp(dev);
-                       }
-               }
-               up_read(&knet->kni_list_lock);
-#ifdef RTE_KNI_PREEMPT_DEFAULT
-               /* reschedule out for a while */
-               schedule_timeout_interruptible(usecs_to_jiffies( \
-                               KNI_KTHREAD_RESCHEDULE_INTERVAL));
-#endif
+       /* Check if network name has been used */
+       if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
+               pr_err("KNI name %s duplicated\n", dev->name);
+               return -1;
        }
 
        return 0;
 }
 
 static int
-kni_thread_multiple(void *param)
+kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
 {
-       int j;
-       struct kni_dev *dev = (struct kni_dev *)param;
-
-       while (!kthread_should_stop()) {
-               for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
-#ifdef RTE_KNI_VHOST
-                       kni_chk_vhost_rx(dev);
-#else
-                       kni_net_rx(dev);
-#endif
-                       kni_net_poll_resp(dev);
+       /**
+        * Create a new kernel thread for multiple mode, set its core affinity,
+        * and finally wake it up.
+        */
+       if (multiple_kthread_on) {
+               kni->pthread = kthread_create(kni_thread_multiple,
+                       (void *)kni, "kni_%s", kni->name);
+               if (IS_ERR(kni->pthread)) {
+                       kni_dev_remove(kni);
+                       return -ECANCELED;
                }
-#ifdef RTE_KNI_PREEMPT_DEFAULT
-               schedule_timeout_interruptible(usecs_to_jiffies( \
-                               KNI_KTHREAD_RESCHEDULE_INTERVAL));
-#endif
-       }
-
-       return 0;
-}
-
-static int
-kni_dev_remove(struct kni_dev *dev)
-{
-       if (!dev)
-               return -ENODEV;
-
-       switch (dev->device_id) {
-       #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
-       #include <rte_pci_dev_ids.h>
-               igb_kni_remove(dev->pci_dev);
-               break;
-       #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) case (dev):
-       #include <rte_pci_dev_ids.h>
-               ixgbe_kni_remove(dev->pci_dev);
-               break;
-       default:
-               break;
-       }
-
-       if (dev->net_dev) {
-               unregister_netdev(dev->net_dev);
-               free_netdev(dev->net_dev);
-       }
 
-       return 0;
-}
+               if (force_bind)
+                       kthread_bind(kni->pthread, kni->core_id);
+               wake_up_process(kni->pthread);
+       } else {
+               mutex_lock(&knet->kni_kthread_lock);
+
+               if (knet->kni_kthread == NULL) {
+                       knet->kni_kthread = kthread_create(kni_thread_single,
+                               (void *)knet, "kni_single");
+                       if (IS_ERR(knet->kni_kthread)) {
+                               mutex_unlock(&knet->kni_kthread_lock);
+                               kni_dev_remove(kni);
+                               return -ECANCELED;
+                       }
 
-static int
-kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
-{
-       if (!kni || !dev)
-               return -1;
+                       if (force_bind)
+                               kthread_bind(knet->kni_kthread, kni->core_id);
+                       wake_up_process(knet->kni_kthread);
+               }
 
-       /* Check if network name has been used */
-       if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
-               KNI_ERR("KNI name %s duplicated\n", dev->name);
-               return -1;
+               mutex_unlock(&knet->kni_kthread_lock);
        }
 
        return 0;
 }
 
 static int
-kni_ioctl_create(struct net *net,
-               unsigned int ioctl_num, unsigned long ioctl_param)
+kni_ioctl_create(struct net *net, uint32_t ioctl_num,
+               unsigned long ioctl_param)
 {
        struct kni_net *knet = net_generic(net, kni_net_id);
        int ret;
        struct rte_kni_device_info dev_info;
-       struct pci_dev *pci = NULL;
-       struct pci_dev *found_pci = NULL;
        struct net_device *net_dev = NULL;
-       struct net_device *lad_dev = NULL;
        struct kni_dev *kni, *dev, *n;
 
-       printk(KERN_INFO "KNI: Creating kni...\n");
+       pr_info("Creating kni...\n");
        /* Check the buffer size, to avoid warning */
        if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
                return -EINVAL;
@@ -410,17 +339,21 @@ kni_ioctl_create(struct net *net,
        /* Copy kni info from user space */
        ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
        if (ret) {
-               KNI_ERR("copy_from_user in kni_ioctl_create");
+               pr_err("copy_from_user in kni_ioctl_create");
                return -EIO;
        }
 
+       /* Check if name is zero-ended */
+       if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) {
+               pr_err("kni.name not zero-terminated");
+               return -EINVAL;
+       }
+
        /**
-        * Check if the cpu core id is valid for binding,
-        * for multiple kernel thread mode.
+        * Check if the cpu core id is valid for binding.
         */
-       if (multiple_kthread_on && dev_info.force_bind &&
-                               !cpu_online(dev_info.core_id)) {
-               KNI_ERR("cpu %u is not online\n", dev_info.core_id);
+       if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
+               pr_err("cpu %u is not online\n", dev_info.core_id);
                return -EINVAL;
        }
 
@@ -435,12 +368,12 @@ kni_ioctl_create(struct net *net,
        up_read(&knet->kni_list_lock);
 
        net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
-#ifdef NET_NAME_UNKNOWN
-                                                       NET_NAME_UNKNOWN,
+#ifdef NET_NAME_USER
+                                                       NET_NAME_USER,
 #endif
                                                        kni_net_init);
        if (net_dev == NULL) {
-               KNI_ERR("error allocating device \"%s\"\n", dev_info.name);
+               pr_err("error allocating device \"%s\"\n", dev_info.name);
                return -EBUSY;
        }
 
@@ -464,44 +397,43 @@ kni_ioctl_create(struct net *net,
        kni->sync_va = dev_info.sync_va;
        kni->sync_kva = phys_to_virt(dev_info.sync_phys);
 
-       kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys);
-       kni->mbuf_va = dev_info.mbuf_va;
-
 #ifdef RTE_KNI_VHOST
        kni->vhost_queue = NULL;
        kni->vq_status = BE_STOP;
 #endif
        kni->mbuf_size = dev_info.mbuf_size;
 
-       KNI_PRINT("tx_phys:      0x%016llx, tx_q addr:      0x%p\n",
+       pr_debug("tx_phys:      0x%016llx, tx_q addr:      0x%p\n",
                (unsigned long long) dev_info.tx_phys, kni->tx_q);
-       KNI_PRINT("rx_phys:      0x%016llx, rx_q addr:      0x%p\n",
+       pr_debug("rx_phys:      0x%016llx, rx_q addr:      0x%p\n",
                (unsigned long long) dev_info.rx_phys, kni->rx_q);
-       KNI_PRINT("alloc_phys:   0x%016llx, alloc_q addr:   0x%p\n",
+       pr_debug("alloc_phys:   0x%016llx, alloc_q addr:   0x%p\n",
                (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
-       KNI_PRINT("free_phys:    0x%016llx, free_q addr:    0x%p\n",
+       pr_debug("free_phys:    0x%016llx, free_q addr:    0x%p\n",
                (unsigned long long) dev_info.free_phys, kni->free_q);
-       KNI_PRINT("req_phys:     0x%016llx, req_q addr:     0x%p\n",
+       pr_debug("req_phys:     0x%016llx, req_q addr:     0x%p\n",
                (unsigned long long) dev_info.req_phys, kni->req_q);
-       KNI_PRINT("resp_phys:    0x%016llx, resp_q addr:    0x%p\n",
+       pr_debug("resp_phys:    0x%016llx, resp_q addr:    0x%p\n",
                (unsigned long long) dev_info.resp_phys, kni->resp_q);
-       KNI_PRINT("mbuf_phys:    0x%016llx, mbuf_kva:       0x%p\n",
-               (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva);
-       KNI_PRINT("mbuf_va:      0x%p\n", dev_info.mbuf_va);
-       KNI_PRINT("mbuf_size:    %u\n", kni->mbuf_size);
+       pr_debug("mbuf_size:    %u\n", kni->mbuf_size);
 
-       KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n",
+       pr_debug("PCI: %02x:%02x.%02x %04x:%04x\n",
                                        dev_info.bus,
                                        dev_info.devid,
                                        dev_info.function,
                                        dev_info.vendor_id,
                                        dev_info.device_id);
 
+#ifdef CONFIG_RTE_KNI_KMOD_ETHTOOL
+       struct pci_dev *found_pci = NULL;
+       struct net_device *lad_dev = NULL;
+       struct pci_dev *pci = NULL;
+
        pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);
 
        /* Support Ethtool */
        while (pci) {
-               KNI_PRINT("pci_bus: %02x:%02x:%02x \n",
+               pr_debug("pci_bus: %02x:%02x:%02x\n",
                                        pci->bus->number,
                                        PCI_SLOT(pci->devfn),
                                        PCI_FUNC(pci->devfn));
@@ -510,28 +442,21 @@ kni_ioctl_create(struct net *net,
                        (PCI_SLOT(pci->devfn) == dev_info.devid) &&
                        (PCI_FUNC(pci->devfn) == dev_info.function)) {
                        found_pci = pci;
-                       switch (dev_info.device_id) {
-                       #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
-                       #include <rte_pci_dev_ids.h>
-                               ret = igb_kni_probe(found_pci, &lad_dev);
-                               break;
-                       #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \
-                                                       case (dev):
-                       #include <rte_pci_dev_ids.h>
+
+                       if (pci_match_id(ixgbe_pci_tbl, found_pci))
                                ret = ixgbe_kni_probe(found_pci, &lad_dev);
-                               break;
-                       default:
+                       else if (pci_match_id(igb_pci_tbl, found_pci))
+                               ret = igb_kni_probe(found_pci, &lad_dev);
+                       else
                                ret = -1;
-                               break;
-                       }
 
-                       KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n",
+                       pr_debug("PCI found: pci=0x%p, lad_dev=0x%p\n",
                                                        pci, lad_dev);
                        if (ret == 0) {
                                kni->lad_dev = lad_dev;
                                kni_set_ethtool_ops(kni->net_dev);
                        } else {
-                               KNI_ERR("Device not supported by ethtool");
+                               pr_err("Device not supported by ethtool");
                                kni->lad_dev = NULL;
                        }
 
@@ -544,9 +469,10 @@ kni_ioctl_create(struct net *net,
        }
        if (pci)
                pci_dev_put(pci);
+#endif
 
        if (kni->lad_dev)
-               memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
+               ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr);
        else
                /*
                 * Generate random mac address. eth_random_addr() is the newer
@@ -556,9 +482,11 @@ kni_ioctl_create(struct net *net,
 
        ret = register_netdev(net_dev);
        if (ret) {
-               KNI_ERR("error %i registering device \"%s\"\n",
+               pr_err("error %i registering device \"%s\"\n",
                                        ret, dev_info.name);
+               kni->net_dev = NULL;
                kni_dev_remove(kni);
+               free_netdev(net_dev);
                return -ENODEV;
        }
 
@@ -566,22 +494,9 @@ kni_ioctl_create(struct net *net,
        kni_vhost_init(kni);
 #endif
 
-       /**
-        * Create a new kernel thread for multiple mode, set its core affinity,
-        * and finally wake it up.
-        */
-       if (multiple_kthread_on) {
-               kni->pthread = kthread_create(kni_thread_multiple,
-                                             (void *)kni,
-                                             "kni_%s", kni->name);
-               if (IS_ERR(kni->pthread)) {
-                       kni_dev_remove(kni);
-                       return -ECANCELED;
-               }
-               if (dev_info.force_bind)
-                       kthread_bind(kni->pthread, kni->core_id);
-               wake_up_process(kni->pthread);
-       }
+       ret = kni_run_thread(knet, kni, dev_info.force_bind);
+       if (ret != 0)
+               return ret;
 
        down_write(&knet->kni_list_lock);
        list_add(&kni->list, &knet->kni_list_head);
@@ -591,8 +506,8 @@ kni_ioctl_create(struct net *net,
 }
 
 static int
-kni_ioctl_release(struct net *net,
-               unsigned int ioctl_num, unsigned long ioctl_param)
+kni_ioctl_release(struct net *net, uint32_t ioctl_num,
+               unsigned long ioctl_param)
 {
        struct kni_net *knet = net_generic(net, kni_net_id);
        int ret = -EINVAL;
@@ -600,11 +515,11 @@ kni_ioctl_release(struct net *net,
        struct rte_kni_device_info dev_info;
 
        if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
-                       return -EINVAL;
+               return -EINVAL;
 
        ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
        if (ret) {
-               KNI_ERR("copy_from_user in kni_ioctl_release");
+               pr_err("copy_from_user in kni_ioctl_release");
                return -EIO;
        }
 
@@ -631,21 +546,19 @@ kni_ioctl_release(struct net *net,
                break;
        }
        up_write(&knet->kni_list_lock);
-       printk(KERN_INFO "KNI: %s release kni named %s\n",
+       pr_info("%s release kni named %s\n",
                (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
 
        return ret;
 }
 
 static int
-kni_ioctl(struct inode *inode,
-       unsigned int ioctl_num,
-       unsigned long ioctl_param)
+kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
 {
        int ret = -EINVAL;
        struct net *net = current->nsproxy->net_ns;
 
-       KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
+       pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
 
        /*
         * Switch according to the ioctl called
@@ -661,7 +574,7 @@ kni_ioctl(struct inode *inode,
                ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
                break;
        default:
-               KNI_DBG("IOCTL default\n");
+               pr_debug("IOCTL default\n");
                break;
        }
 
@@ -669,16 +582,99 @@ kni_ioctl(struct inode *inode,
 }
 
 static int
-kni_compat_ioctl(struct inode *inode,
-               unsigned int ioctl_num,
+kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num,
                unsigned long ioctl_param)
 {
        /* 32 bits app on 64 bits OS to be supported later */
-       KNI_PRINT("Not implemented.\n");
+       pr_debug("Not implemented.\n");
 
        return -EINVAL;
 }
 
+static const struct file_operations kni_fops = {
+       .owner = THIS_MODULE,
+       .open = kni_open,
+       .release = kni_release,
+       .unlocked_ioctl = (void *)kni_ioctl,
+       .compat_ioctl = (void *)kni_compat_ioctl,
+};
+
+static struct miscdevice kni_misc = {
+       .minor = MISC_DYNAMIC_MINOR,
+       .name = KNI_DEVICE,
+       .fops = &kni_fops,
+};
+
+static int __init
+kni_parse_kthread_mode(void)
+{
+       if (!kthread_mode)
+               return 0;
+
+       if (strcmp(kthread_mode, "single") == 0)
+               return 0;
+       else if (strcmp(kthread_mode, "multiple") == 0)
+               multiple_kthread_on = 1;
+       else
+               return -1;
+
+       return 0;
+}
+
+static int __init
+kni_init(void)
+{
+       int rc;
+
+       if (kni_parse_kthread_mode() < 0) {
+               pr_err("Invalid parameter for kthread_mode\n");
+               return -EINVAL;
+       }
+
+       if (multiple_kthread_on == 0)
+               pr_debug("Single kernel thread for all KNI devices\n");
+       else
+               pr_debug("Multiple kernel thread mode enabled\n");
+
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+       rc = register_pernet_subsys(&kni_net_ops);
+#else
+       rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+#endif
+       if (rc)
+               return -EPERM;
+
+       rc = misc_register(&kni_misc);
+       if (rc != 0) {
+               pr_err("Misc registration failed\n");
+               goto out;
+       }
+
+       /* Configure the lo mode according to the input parameter */
+       kni_net_config_lo_mode(lo_mode);
+
+       return 0;
+
+out:
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+       unregister_pernet_subsys(&kni_net_ops);
+#else
+       unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
+#endif
+       return rc;
+}
+
+static void __exit
+kni_exit(void)
+{
+       misc_deregister(&kni_misc);
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+       unregister_pernet_subsys(&kni_net_ops);
+#else
+       unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
+#endif
+}
+
 module_init(kni_init);
 module_exit(kni_exit);
 
index fc82193..4ac99cf 100644 (file)
 
 #define WD_TIMEOUT 5 /*jiffies */
 
-#define MBUF_BURST_SZ 32
-
 #define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */
 
 /* typedef for rx function */
 typedef void (*kni_net_rx_t)(struct kni_dev *kni);
 
-static int kni_net_tx(struct sk_buff *skb, struct net_device *dev);
 static void kni_net_rx_normal(struct kni_dev *kni);
-static void kni_net_rx_lo_fifo(struct kni_dev *kni);
-static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni);
-static int kni_net_process_request(struct kni_dev *kni,
-                       struct rte_kni_request *req);
 
 /* kni rx function pointer, with default to normal rx */
 static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
 
+/* physical address to kernel virtual address */
+static void *
+pa2kva(void *pa)
+{
+       return phys_to_virt((unsigned long)pa);
+}
+
+/* physical address to virtual address */
+static void *
+pa2va(void *pa, struct rte_kni_mbuf *m)
+{
+       void *va;
+
+       va = (void *)((unsigned long)pa +
+                       (unsigned long)m->buf_addr -
+                       (unsigned long)m->buf_physaddr);
+       return va;
+}
+
+/* mbuf data kernel virtual address from mbuf kernel virtual address */
+static void *
+kva2data_kva(struct rte_kni_mbuf *m)
+{
+       return phys_to_virt(m->buf_physaddr + m->data_off);
+}
+
+/* virtual address to physical address */
+static void *
+va2pa(void *va, struct rte_kni_mbuf *m)
+{
+       void *pa;
+
+       pa = (void *)((unsigned long)va -
+                       ((unsigned long)m->buf_addr -
+                        (unsigned long)m->buf_physaddr));
+       return pa;
+}
+
+/*
+ * It can be called to process the request.
+ */
+static int
+kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
+{
+       int ret = -1;
+       void *resp_va;
+       uint32_t num;
+       int ret_val;
+
+       if (!kni || !req) {
+               pr_err("No kni instance or request\n");
+               return -EINVAL;
+       }
+
+       mutex_lock(&kni->sync_lock);
+
+       /* Construct data */
+       memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
+       num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
+       if (num < 1) {
+               pr_err("Cannot send to req_q\n");
+               ret = -EBUSY;
+               goto fail;
+       }
+
+       ret_val = wait_event_interruptible_timeout(kni->wq,
+                       kni_fifo_count(kni->resp_q), 3 * HZ);
+       if (signal_pending(current) || ret_val <= 0) {
+               ret = -ETIME;
+               goto fail;
+       }
+       num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
+       if (num != 1 || resp_va != kni->sync_va) {
+               /* This should never happen */
+               pr_err("No data in resp_q\n");
+               ret = -ENODATA;
+               goto fail;
+       }
+
+       memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
+       ret = 0;
+
+fail:
+       mutex_unlock(&kni->sync_lock);
+       return ret;
+}
+
 /*
  * Open and close
  */
@@ -115,19 +195,113 @@ kni_net_config(struct net_device *dev, struct ifmap *map)
        return 0;
 }
 
+/*
+ * Transmit a packet (called by the kernel)
+ */
+#ifdef RTE_KNI_VHOST
+static int
+kni_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+       struct kni_dev *kni = netdev_priv(dev);
+
+       dev_kfree_skb(skb);
+       kni->stats.tx_dropped++;
+
+       return NETDEV_TX_OK;
+}
+#else
+static int
+kni_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+       int len = 0;
+       uint32_t ret;
+       struct kni_dev *kni = netdev_priv(dev);
+       struct rte_kni_mbuf *pkt_kva = NULL;
+       void *pkt_pa = NULL;
+       void *pkt_va = NULL;
+
+       /* save the timestamp */
+#ifdef HAVE_TRANS_START_HELPER
+       netif_trans_update(dev);
+#else
+       dev->trans_start = jiffies;
+#endif
+
+       /* Check if the length of skb is less than mbuf size */
+       if (skb->len > kni->mbuf_size)
+               goto drop;
+
+       /**
+        * Check if it has at least one free entry in tx_q and
+        * one entry in alloc_q.
+        */
+       if (kni_fifo_free_count(kni->tx_q) == 0 ||
+                       kni_fifo_count(kni->alloc_q) == 0) {
+               /**
+                * If no free entry in tx_q or no entry in alloc_q,
+                * drops skb and goes out.
+                */
+               goto drop;
+       }
+
+       /* dequeue a mbuf from alloc_q */
+       ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1);
+       if (likely(ret == 1)) {
+               void *data_kva;
+
+               pkt_kva = pa2kva(pkt_pa);
+               data_kva = kva2data_kva(pkt_kva);
+               pkt_va = pa2va(pkt_pa, pkt_kva);
+
+               len = skb->len;
+               memcpy(data_kva, skb->data, len);
+               if (unlikely(len < ETH_ZLEN)) {
+                       memset(data_kva + len, 0, ETH_ZLEN - len);
+                       len = ETH_ZLEN;
+               }
+               pkt_kva->pkt_len = len;
+               pkt_kva->data_len = len;
+
+               /* enqueue mbuf into tx_q */
+               ret = kni_fifo_put(kni->tx_q, &pkt_va, 1);
+               if (unlikely(ret != 1)) {
+                       /* Failing should not happen */
+                       pr_err("Fail to enqueue mbuf into tx_q\n");
+                       goto drop;
+               }
+       } else {
+               /* Failing should not happen */
+               pr_err("Fail to dequeue mbuf from alloc_q\n");
+               goto drop;
+       }
+
+       /* Free skb and update statistics */
+       dev_kfree_skb(skb);
+       kni->stats.tx_bytes += len;
+       kni->stats.tx_packets++;
+
+       return NETDEV_TX_OK;
+
+drop:
+       /* Free skb and update statistics */
+       dev_kfree_skb(skb);
+       kni->stats.tx_dropped++;
+
+       return NETDEV_TX_OK;
+}
+#endif
+
 /*
  * RX: normal working mode
  */
 static void
 kni_net_rx_normal(struct kni_dev *kni)
 {
-       unsigned ret;
+       uint32_t ret;
        uint32_t len;
-       unsigned i, num_rx, num_fq;
+       uint32_t i, num_rx, num_fq;
        struct rte_kni_mbuf *kva;
-       struct rte_kni_mbuf *va[MBUF_BURST_SZ];
-       void * data_kva;
-
+       void *data_kva;
        struct sk_buff *skb;
        struct net_device *dev = kni->net_dev;
 
@@ -139,24 +313,22 @@ kni_net_rx_normal(struct kni_dev *kni)
        }
 
        /* Calculate the number of entries to dequeue from rx_q */
-       num_rx = min(num_fq, (unsigned)MBUF_BURST_SZ);
+       num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ);
 
        /* Burst dequeue from rx_q */
-       num_rx = kni_fifo_get(kni->rx_q, (void **)va, num_rx);
+       num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx);
        if (num_rx == 0)
                return;
 
        /* Transfer received packets to netif */
        for (i = 0; i < num_rx; i++) {
-               kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+               kva = pa2kva(kni->pa[i]);
                len = kva->pkt_len;
-
-               data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va
-                               + kni->mbuf_kva;
+               data_kva = kva2data_kva(kva);
+               kni->va[i] = pa2va(kni->pa[i], kva);
 
                skb = dev_alloc_skb(len + 2);
                if (!skb) {
-                       KNI_ERR("Out of mem, dropping pkts\n");
                        /* Update statistics */
                        kni->stats.rx_dropped++;
                        continue;
@@ -178,9 +350,8 @@ kni_net_rx_normal(struct kni_dev *kni)
                                if (!kva->next)
                                        break;
 
-                               kva = kva->next - kni->mbuf_va + kni->mbuf_kva;
-                               data_kva = kva->buf_addr + kva->data_off
-                                       - kni->mbuf_va + kni->mbuf_kva;
+                               kva = pa2kva(va2pa(kva->next, kva));
+                               data_kva = kva2data_kva(kva);
                        }
                }
 
@@ -197,10 +368,10 @@ kni_net_rx_normal(struct kni_dev *kni)
        }
 
        /* Burst enqueue mbufs into free_q */
-       ret = kni_fifo_put(kni->free_q, (void **)va, num_rx);
+       ret = kni_fifo_put(kni->free_q, kni->va, num_rx);
        if (ret != num_rx)
                /* Failing should not happen */
-               KNI_ERR("Fail to enqueue entries into free_q\n");
+               pr_err("Fail to enqueue entries into free_q\n");
 }
 
 /*
@@ -209,15 +380,12 @@ kni_net_rx_normal(struct kni_dev *kni)
 static void
 kni_net_rx_lo_fifo(struct kni_dev *kni)
 {
-       unsigned ret;
+       uint32_t ret;
        uint32_t len;
-       unsigned i, num, num_rq, num_tq, num_aq, num_fq;
+       uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
        struct rte_kni_mbuf *kva;
-       struct rte_kni_mbuf *va[MBUF_BURST_SZ];
-       void * data_kva;
-
+       void *data_kva;
        struct rte_kni_mbuf *alloc_kva;
-       struct rte_kni_mbuf *alloc_va[MBUF_BURST_SZ];
        void *alloc_data_kva;
 
        /* Get the number of entries in rx_q */
@@ -236,33 +404,32 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
        num = min(num_rq, num_tq);
        num = min(num, num_aq);
        num = min(num, num_fq);
-       num = min(num, (unsigned)MBUF_BURST_SZ);
+       num = min_t(uint32_t, num, MBUF_BURST_SZ);
 
        /* Return if no entry to dequeue from rx_q */
        if (num == 0)
                return;
 
        /* Burst dequeue from rx_q */
-       ret = kni_fifo_get(kni->rx_q, (void **)va, num);
+       ret = kni_fifo_get(kni->rx_q, kni->pa, num);
        if (ret == 0)
                return; /* Failing should not happen */
 
        /* Dequeue entries from alloc_q */
-       ret = kni_fifo_get(kni->alloc_q, (void **)alloc_va, num);
+       ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num);
        if (ret) {
                num = ret;
                /* Copy mbufs */
                for (i = 0; i < num; i++) {
-                       kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+                       kva = pa2kva(kni->pa[i]);
                        len = kva->pkt_len;
-                       data_kva = kva->buf_addr + kva->data_off -
-                                       kni->mbuf_va + kni->mbuf_kva;
-
-                       alloc_kva = (void *)alloc_va[i] - kni->mbuf_va +
-                                                       kni->mbuf_kva;
-                       alloc_data_kva = alloc_kva->buf_addr +
-                                       alloc_kva->data_off - kni->mbuf_va +
-                                                       kni->mbuf_kva;
+                       data_kva = kva2data_kva(kva);
+                       kni->va[i] = pa2va(kni->pa[i], kva);
+
+                       alloc_kva = pa2kva(kni->alloc_pa[i]);
+                       alloc_data_kva = kva2data_kva(alloc_kva);
+                       kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
+
                        memcpy(alloc_data_kva, data_kva, len);
                        alloc_kva->pkt_len = len;
                        alloc_kva->data_len = len;
@@ -272,17 +439,17 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
                }
 
                /* Burst enqueue mbufs into tx_q */
-               ret = kni_fifo_put(kni->tx_q, (void **)alloc_va, num);
+               ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num);
                if (ret != num)
                        /* Failing should not happen */
-                       KNI_ERR("Fail to enqueue mbufs into tx_q\n");
+                       pr_err("Fail to enqueue mbufs into tx_q\n");
        }
 
        /* Burst enqueue mbufs into free_q */
-       ret = kni_fifo_put(kni->free_q, (void **)va, num);
+       ret = kni_fifo_put(kni->free_q, kni->va, num);
        if (ret != num)
                /* Failing should not happen */
-               KNI_ERR("Fail to enqueue mbufs into free_q\n");
+               pr_err("Fail to enqueue mbufs into free_q\n");
 
        /**
         * Update statistic, and enqueue/dequeue failure is impossible,
@@ -298,13 +465,11 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
 static void
 kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 {
-       unsigned ret;
+       uint32_t ret;
        uint32_t len;
-       unsigned i, num_rq, num_fq, num;
+       uint32_t i, num_rq, num_fq, num;
        struct rte_kni_mbuf *kva;
-       struct rte_kni_mbuf *va[MBUF_BURST_SZ];
-       void * data_kva;
-
+       void *data_kva;
        struct sk_buff *skb;
        struct net_device *dev = kni->net_dev;
 
@@ -316,28 +481,26 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 
        /* Calculate the number of entries to dequeue from rx_q */
        num = min(num_rq, num_fq);
-       num = min(num, (unsigned)MBUF_BURST_SZ);
+       num = min_t(uint32_t, num, MBUF_BURST_SZ);
 
        /* Return if no entry to dequeue from rx_q */
        if (num == 0)
                return;
 
        /* Burst dequeue mbufs from rx_q */
-       ret = kni_fifo_get(kni->rx_q, (void **)va, num);
+       ret = kni_fifo_get(kni->rx_q, kni->pa, num);
        if (ret == 0)
                return;
 
        /* Copy mbufs to sk buffer and then call tx interface */
        for (i = 0; i < num; i++) {
-               kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+               kva = pa2kva(kni->pa[i]);
                len = kva->pkt_len;
-               data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va +
-                               kni->mbuf_kva;
+               data_kva = kva2data_kva(kva);
+               kni->va[i] = pa2va(kni->pa[i], kva);
 
                skb = dev_alloc_skb(len + 2);
-               if (skb == NULL)
-                       KNI_ERR("Out of mem, dropping pkts\n");
-               else {
+               if (skb) {
                        /* Align IP on 16B boundary */
                        skb_reserve(skb, 2);
                        memcpy(skb_put(skb, len), data_kva, len);
@@ -349,7 +512,6 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
                /* Simulate real usage, allocate/copy skb twice */
                skb = dev_alloc_skb(len + 2);
                if (skb == NULL) {
-                       KNI_ERR("Out of mem, dropping pkts\n");
                        kni->stats.rx_dropped++;
                        continue;
                }
@@ -370,9 +532,8 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
                                if (!kva->next)
                                        break;
 
-                               kva = kva->next - kni->mbuf_va + kni->mbuf_kva;
-                               data_kva = kva->buf_addr + kva->data_off
-                                       - kni->mbuf_va + kni->mbuf_kva;
+                               kva = pa2kva(va2pa(kva->next, kva));
+                               data_kva = kva2data_kva(kva);
                        }
                }
 
@@ -387,10 +548,10 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
        }
 
        /* enqueue all the mbufs from rx_q into free_q */
-       ret = kni_fifo_put(kni->free_q, (void **)&va, num);
+       ret = kni_fifo_put(kni->free_q, kni->va, num);
        if (ret != num)
                /* Failing should not happen */
-               KNI_ERR("Fail to enqueue mbufs into free_q\n");
+               pr_err("Fail to enqueue mbufs into free_q\n");
 }
 
 /* rx interface */
@@ -404,115 +565,19 @@ kni_net_rx(struct kni_dev *kni)
        (*kni_net_rx_func)(kni);
 }
 
-/*
- * Transmit a packet (called by the kernel)
- */
-#ifdef RTE_KNI_VHOST
-static int
-kni_net_tx(struct sk_buff *skb, struct net_device *dev)
-{
-       struct kni_dev *kni = netdev_priv(dev);
-
-       dev_kfree_skb(skb);
-       kni->stats.tx_dropped++;
-
-       return NETDEV_TX_OK;
-}
-#else
-static int
-kni_net_tx(struct sk_buff *skb, struct net_device *dev)
-{
-       int len = 0;
-       unsigned ret;
-       struct kni_dev *kni = netdev_priv(dev);
-       struct rte_kni_mbuf *pkt_kva = NULL;
-       struct rte_kni_mbuf *pkt_va = NULL;
-
-       /* save the timestamp */
-#ifdef HAVE_TRANS_START_HELPER
-       netif_trans_update(dev);
-#else
-       dev->trans_start = jiffies;
-#endif
-
-       /* Check if the length of skb is less than mbuf size */
-       if (skb->len > kni->mbuf_size)
-               goto drop;
-
-       /**
-        * Check if it has at least one free entry in tx_q and
-        * one entry in alloc_q.
-        */
-       if (kni_fifo_free_count(kni->tx_q) == 0 ||
-                       kni_fifo_count(kni->alloc_q) == 0) {
-               /**
-                * If no free entry in tx_q or no entry in alloc_q,
-                * drops skb and goes out.
-                */
-               goto drop;
-       }
-
-       /* dequeue a mbuf from alloc_q */
-       ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1);
-       if (likely(ret == 1)) {
-               void *data_kva;
-
-               pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
-               data_kva = pkt_kva->buf_addr + pkt_kva->data_off - kni->mbuf_va
-                               + kni->mbuf_kva;
-
-               len = skb->len;
-               memcpy(data_kva, skb->data, len);
-               if (unlikely(len < ETH_ZLEN)) {
-                       memset(data_kva + len, 0, ETH_ZLEN - len);
-                       len = ETH_ZLEN;
-               }
-               pkt_kva->pkt_len = len;
-               pkt_kva->data_len = len;
-
-               /* enqueue mbuf into tx_q */
-               ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
-               if (unlikely(ret != 1)) {
-                       /* Failing should not happen */
-                       KNI_ERR("Fail to enqueue mbuf into tx_q\n");
-                       goto drop;
-               }
-       } else {
-               /* Failing should not happen */
-               KNI_ERR("Fail to dequeue mbuf from alloc_q\n");
-               goto drop;
-       }
-
-       /* Free skb and update statistics */
-       dev_kfree_skb(skb);
-       kni->stats.tx_bytes += len;
-       kni->stats.tx_packets++;
-
-       return NETDEV_TX_OK;
-
-drop:
-       /* Free skb and update statistics */
-       dev_kfree_skb(skb);
-       kni->stats.tx_dropped++;
-
-       return NETDEV_TX_OK;
-}
-#endif
-
 /*
  * Deal with a transmit timeout.
  */
 static void
-kni_net_tx_timeout (struct net_device *dev)
+kni_net_tx_timeout(struct net_device *dev)
 {
        struct kni_dev *kni = netdev_priv(dev);
 
-       KNI_DBG("Transmit timeout at %ld, latency %ld\n", jiffies,
-                       jiffies - dev->trans_start);
+       pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies,
+                       jiffies - dev_trans_start(dev));
 
        kni->stats.tx_errors++;
        netif_wake_queue(dev);
-       return;
 }
 
 /*
@@ -521,8 +586,8 @@ kni_net_tx_timeout (struct net_device *dev)
 static int
 kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-       KNI_DBG("kni_net_ioctl %d\n",
-               ((struct kni_dev *)netdev_priv(dev))->group_id);
+       pr_debug("kni_net_ioctl group:%d cmd:%d\n",
+               ((struct kni_dev *)netdev_priv(dev))->group_id, cmd);
 
        return 0;
 }
@@ -539,7 +604,7 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu)
        struct rte_kni_request req;
        struct kni_dev *kni = netdev_priv(dev);
 
-       KNI_DBG("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
+       pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
 
        memset(&req, 0, sizeof(req));
        req.req_id = RTE_KNI_REQ_CHANGE_MTU;
@@ -561,55 +626,6 @@ kni_net_poll_resp(struct kni_dev *kni)
                wake_up_interruptible(&kni->wq);
 }
 
-/*
- * It can be called to process the request.
- */
-static int
-kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
-{
-       int ret = -1;
-       void *resp_va;
-       unsigned num;
-       int ret_val;
-
-       if (!kni || !req) {
-               KNI_ERR("No kni instance or request\n");
-               return -EINVAL;
-       }
-
-       mutex_lock(&kni->sync_lock);
-
-       /* Construct data */
-       memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
-       num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
-       if (num < 1) {
-               KNI_ERR("Cannot send to req_q\n");
-               ret = -EBUSY;
-               goto fail;
-       }
-
-       ret_val = wait_event_interruptible_timeout(kni->wq,
-                       kni_fifo_count(kni->resp_q), 3 * HZ);
-       if (signal_pending(current) || ret_val <= 0) {
-               ret = -ETIME;
-               goto fail;
-       }
-       num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
-       if (num != 1 || resp_va != kni->sync_va) {
-               /* This should never happen */
-               KNI_ERR("No data in resp_q\n");
-               ret = -ENODATA;
-               goto fail;
-       }
-
-       memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
-       ret = 0;
-
-fail:
-       mutex_unlock(&kni->sync_lock);
-       return ret;
-}
-
 /*
  * Return statistics to the caller
  */
@@ -617,6 +633,7 @@ static struct net_device_stats *
 kni_net_stats(struct net_device *dev)
 {
        struct kni_dev *kni = netdev_priv(dev);
+
        return &kni->stats;
 }
 
@@ -626,7 +643,7 @@ kni_net_stats(struct net_device *dev)
 static int
 kni_net_header(struct sk_buff *skb, struct net_device *dev,
                unsigned short type, const void *daddr,
-               const void *saddr, unsigned int len)
+               const void *saddr, uint32_t len)
 {
        struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
 
@@ -637,7 +654,6 @@ kni_net_header(struct sk_buff *skb, struct net_device *dev,
        return dev->hard_header_len;
 }
 
-
 /*
  * Re-fill the eth header
  */
@@ -662,9 +678,11 @@ kni_net_rebuild_header(struct sk_buff *skb)
  *
  * Returns 0 on success, negative on failure
  **/
-static int kni_net_set_mac(struct net_device *netdev, void *p)
+static int
+kni_net_set_mac(struct net_device *netdev, void *p)
 {
        struct sockaddr *addr = p;
+
        if (!is_valid_ether_addr((unsigned char *)(addr->sa_data)))
                return -EADDRNOTAVAIL;
        memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
@@ -672,7 +690,8 @@ static int kni_net_set_mac(struct net_device *netdev, void *p)
 }
 
 #ifdef HAVE_CHANGE_CARRIER_CB
-static int kni_net_change_carrier(struct net_device *dev, bool new_carrier)
+static int
+kni_net_change_carrier(struct net_device *dev, bool new_carrier)
 {
        if (new_carrier)
                netif_carrier_on(dev);
@@ -711,8 +730,6 @@ kni_net_init(struct net_device *dev)
 {
        struct kni_dev *kni = netdev_priv(dev);
 
-       KNI_DBG("kni_net_init\n");
-
        init_waitqueue_head(&kni->wq);
        mutex_init(&kni->sync_lock);
 
@@ -726,18 +743,18 @@ void
 kni_net_config_lo_mode(char *lo_str)
 {
        if (!lo_str) {
-               KNI_PRINT("loopback disabled");
+               pr_debug("loopback disabled");
                return;
        }
 
        if (!strcmp(lo_str, "lo_mode_none"))
-               KNI_PRINT("loopback disabled");
+               pr_debug("loopback disabled");
        else if (!strcmp(lo_str, "lo_mode_fifo")) {
-               KNI_PRINT("loopback mode=lo_mode_fifo enabled");
+               pr_debug("loopback mode=lo_mode_fifo enabled");
                kni_net_rx_func = kni_net_rx_lo_fifo;
        } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) {
-               KNI_PRINT("loopback mode=lo_mode_fifo_skb enabled");
+               pr_debug("loopback mode=lo_mode_fifo_skb enabled");
                kni_net_rx_func = kni_net_rx_lo_fifo_skb;
        } else
-               KNI_PRINT("Incognizant parameter, loopback disabled");
+               pr_debug("Incognizant parameter, loopback disabled");
 }
index a3ca849..f54c34b 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/sched.h>
 #include <linux/if_tun.h>
 #include <linux/version.h>
+#include <linux/file.h>
 
 #include "compat.h"
 #include "kni_dev.h"
 
 #define RX_BURST_SZ 4
 
-extern void put_unused_fd(unsigned int fd);
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)
-extern struct file*
-sock_alloc_file(struct socket *sock,
-               int flags, const char *dname);
-
-extern int get_unused_fd_flags(unsigned flags);
-
-extern void fd_install(unsigned int fd, struct file *file);
-
+#ifdef HAVE_STATIC_SOCK_MAP_FD
 static int kni_sock_map_fd(struct socket *sock)
 {
        struct file *file;
        int fd = get_unused_fd_flags(0);
+
        if (fd < 0)
                return fd;
 
@@ -65,8 +57,6 @@ static int kni_sock_map_fd(struct socket *sock)
        fd_install(fd, file);
        return fd;
 }
-#else
-#define kni_sock_map_fd(s)             sock_map_fd(s, 0)
 #endif
 
 static struct proto kni_raw_proto = {
@@ -77,13 +67,13 @@ static struct proto kni_raw_proto = {
 
 static inline int
 kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
-                unsigned offset, unsigned len)
+                uint32_t offset, uint32_t len)
 {
        struct rte_kni_mbuf *pkt_kva = NULL;
        struct rte_kni_mbuf *pkt_va = NULL;
        int ret;
 
-       KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n",
+       pr_debug("tx offset=%d, len=%d, iovlen=%d\n",
 #ifdef HAVE_IOV_ITER_MSGHDR
                   offset, len, (int)m->msg_iter.iov->iov_len);
 #else
@@ -110,7 +100,7 @@ kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
 
                pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
                data_kva = pkt_kva->buf_addr + pkt_kva->data_off
-                          - kni->mbuf_va + kni->mbuf_kva;
+                       - kni->mbuf_va + kni->mbuf_kva;
 
 #ifdef HAVE_IOV_ITER_MSGHDR
                copy_from_iter(data_kva, len, &m->msg_iter);
@@ -129,12 +119,12 @@ kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
                ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
                if (unlikely(ret != 1)) {
                        /* Failing should not happen */
-                       KNI_ERR("Fail to enqueue mbuf into tx_q\n");
+                       pr_err("Fail to enqueue mbuf into tx_q\n");
                        goto drop;
                }
        } else {
                /* Failing should not happen */
-               KNI_ERR("Fail to dequeue mbuf from alloc_q\n");
+               pr_err("Fail to dequeue mbuf from alloc_q\n");
                goto drop;
        }
 
@@ -153,12 +143,12 @@ drop:
 
 static inline int
 kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
-                unsigned offset, unsigned len)
+                uint32_t offset, uint32_t len)
 {
        uint32_t pkt_len;
        struct rte_kni_mbuf *kva;
        struct rte_kni_mbuf *va;
-       void * data_kva;
+       void *data_kva;
        struct sk_buff *skb;
        struct kni_vhost_queue *q = kni->vhost_queue;
 
@@ -173,19 +163,19 @@ kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
        if (unlikely(skb == NULL))
                return 0;
 
-       kva = (struct rte_kni_mbuf*)skb->data;
+       kva = (struct rte_kni_mbuf *)skb->data;
 
        /* free skb to cache */
        skb->data = NULL;
-       if (unlikely(1 != kni_fifo_put(q->fifo, (void **)&skb, 1)))
+       if (unlikely(kni_fifo_put(q->fifo, (void **)&skb, 1) != 1))
                /* Failing should not happen */
-               KNI_ERR("Fail to enqueue entries into rx cache fifo\n");
+               pr_err("Fail to enqueue entries into rx cache fifo\n");
 
        pkt_len = kva->data_len;
        if (unlikely(pkt_len > len))
                goto drop;
 
-       KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
+       pr_debug("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
 #ifdef HAVE_IOV_ITER_MSGHDR
                   offset, len, pkt_len, (int)m->msg_iter.iov->iov_len);
 #else
@@ -205,12 +195,12 @@ kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
        kni->stats.rx_packets++;
 
        /* enqueue mbufs into free_q */
-       va = (void*)kva - kni->mbuf_kva + kni->mbuf_va;
-       if (unlikely(1 != kni_fifo_put(kni->free_q, (void **)&va, 1)))
+       va = (void *)kva - kni->mbuf_kva + kni->mbuf_va;
+       if (unlikely(kni_fifo_put(kni->free_q, (void **)&va, 1) != 1))
                /* Failing should not happen */
-               KNI_ERR("Fail to enqueue entries into free_q\n");
+               pr_err("Fail to enqueue entries into free_q\n");
 
-       KNI_DBG_RX("receive done %d\n", pkt_len);
+       pr_debug("receive done %d\n", pkt_len);
 
        return pkt_len;
 
@@ -221,29 +211,25 @@ drop:
        return 0;
 }
 
-static unsigned int
-kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait)
+static uint32_t
+kni_sock_poll(struct file *file, struct socket *sock, poll_table *wait)
 {
        struct kni_vhost_queue *q =
                container_of(sock->sk, struct kni_vhost_queue, sk);
        struct kni_dev *kni;
-       unsigned int mask = 0;
+       uint32_t mask = 0;
 
        if (unlikely(q == NULL || q->kni == NULL))
                return POLLERR;
 
        kni = q->kni;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
-       KNI_DBG("start kni_poll on group %d, wq 0x%16llx\n",
+#ifdef HAVE_SOCKET_WQ
+       pr_debug("start kni_poll on group %d, wq 0x%16llx\n",
                  kni->group_id, (uint64_t)sock->wq);
-#else
-       KNI_DBG("start kni_poll on group %d, wait at 0x%16llx\n",
-                 kni->group_id, (uint64_t)&sock->wait);
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
        poll_wait(file, &sock->wq->wait, wait);
 #else
+       pr_debug("start kni_poll on group %d, wait at 0x%16llx\n",
+                 kni->group_id, (uint64_t)&sock->wait);
        poll_wait(file, &sock->wait, wait);
 #endif
 
@@ -252,11 +238,12 @@ kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait)
 
        if (sock_writeable(&q->sk) ||
 #ifdef SOCKWQ_ASYNC_NOSPACE
-           (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) &&
+               (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) &&
+                       sock_writeable(&q->sk)))
 #else
-           (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) &&
+               (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) &&
+                       sock_writeable(&q->sk)))
 #endif
-            sock_writeable(&q->sk)))
                mask |= POLLOUT | POLLWRNORM;
 
        return mask;
@@ -269,7 +256,7 @@ kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q,
        struct rte_kni_mbuf *kva;
 
        kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva;
-       (skb)->data = (unsigned char*)kva;
+       (skb)->data = (unsigned char *)kva;
        (skb)->len = kva->data_len;
        skb_queue_tail(&q->sk.sk_receive_queue, skb);
 }
@@ -279,6 +266,7 @@ kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q,
          struct sk_buff **skb, struct rte_kni_mbuf **va)
 {
        int i;
+
        for (i = 0; i < RX_BURST_SZ; skb++, va++, i++)
                kni_vhost_enqueue(kni, q, *skb, *va);
 }
@@ -287,9 +275,9 @@ int
 kni_chk_vhost_rx(struct kni_dev *kni)
 {
        struct kni_vhost_queue *q = kni->vhost_queue;
-       unsigned nb_in, nb_mbuf, nb_skb;
-       const unsigned BURST_MASK = RX_BURST_SZ - 1;
-       unsigned nb_burst, nb_backlog, i;
+       uint32_t nb_in, nb_mbuf, nb_skb;
+       const uint32_t BURST_MASK = RX_BURST_SZ - 1;
+       uint32_t nb_burst, nb_backlog, i;
        struct sk_buff *skb[RX_BURST_SZ];
        struct rte_kni_mbuf *va[RX_BURST_SZ];
 
@@ -305,20 +293,18 @@ kni_chk_vhost_rx(struct kni_dev *kni)
        nb_mbuf = kni_fifo_count(kni->rx_q);
 
        nb_in = min(nb_mbuf, nb_skb);
-       nb_in = min(nb_in, (unsigned)RX_BURST_SZ);
+       nb_in = min_t(uint32_t, nb_in, RX_BURST_SZ);
        nb_burst   = (nb_in & ~BURST_MASK);
        nb_backlog = (nb_in & BURST_MASK);
 
        /* enqueue skb_queue per BURST_SIZE bulk */
-       if (0 != nb_burst) {
-               if (unlikely(RX_BURST_SZ != kni_fifo_get(
-                                    kni->rx_q, (void **)&va,
-                                    RX_BURST_SZ)))
+       if (nb_burst != 0) {
+               if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, RX_BURST_SZ)
+                               != RX_BURST_SZ))
                        goto except;
 
-               if (unlikely(RX_BURST_SZ != kni_fifo_get(
-                                    q->fifo, (void **)&skb,
-                                    RX_BURST_SZ)))
+               if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, RX_BURST_SZ)
+                               != RX_BURST_SZ))
                        goto except;
 
                kni_vhost_enqueue_burst(kni, q, skb, va);
@@ -326,12 +312,10 @@ kni_chk_vhost_rx(struct kni_dev *kni)
 
        /* all leftover, do one by one */
        for (i = 0; i < nb_backlog; ++i) {
-               if (unlikely(1 != kni_fifo_get(
-                                    kni->rx_q,(void **)&va, 1)))
+               if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, 1) != 1))
                        goto except;
 
-               if (unlikely(1 != kni_fifo_get(
-                                    q->fifo, (void **)&skb, 1)))
+               if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, 1) != 1))
                        goto except;
 
                kni_vhost_enqueue(kni, q, *skb, *va);
@@ -342,7 +326,7 @@ kni_chk_vhost_rx(struct kni_dev *kni)
            ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) {
                wake_up_interruptible_poll(sk_sleep(&q->sk),
                                   POLLIN | POLLRDNORM | POLLRDBAND);
-               KNI_DBG_RX("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n",
+               pr_debug("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n",
                           nb_mbuf, nb_skb, nb_in);
        }
 
@@ -350,7 +334,7 @@ kni_chk_vhost_rx(struct kni_dev *kni)
 
 except:
        /* Failing should not happen */
-       KNI_ERR("Fail to enqueue fifo, it shouldn't happen \n");
+       pr_err("Fail to enqueue fifo, it shouldn't happen\n");
        BUG_ON(1);
 
        return 0;
@@ -373,7 +357,7 @@ kni_sock_sndmsg(struct socket *sock,
        if (unlikely(q == NULL || q->kni == NULL))
                return 0;
 
-       KNI_DBG_TX("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n",
+       pr_debug("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n",
 #ifdef HAVE_IOV_ITER_MSGHDR
                   len, q->flags, (int)m->msg_iter.iov->iov_len);
 #else
@@ -420,13 +404,14 @@ kni_sock_rcvmsg(struct socket *sock,
 #ifdef RTE_KNI_VHOST_VNET_HDR_EN
        if (likely(q->flags & IFF_VNET_HDR)) {
                vnet_hdr_len = q->vnet_hdr_sz;
-               if ((len -= vnet_hdr_len) < 0)
+               len -= vnet_hdr_len;
+               if (len < 0)
                        return -EINVAL;
        }
 #endif
 
-       if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni,
-               m, vnet_hdr_len, len))))
+       pkt_len = kni_vhost_net_rx(q->kni, m, vnet_hdr_len, len);
+       if (unlikely(pkt_len == 0))
                return 0;
 
 #ifdef RTE_KNI_VHOST_VNET_HDR_EN
@@ -440,7 +425,7 @@ kni_sock_rcvmsg(struct socket *sock,
 #endif /* HAVE_IOV_ITER_MSGHDR */
                return -EFAULT;
 #endif /* RTE_KNI_VHOST_VNET_HDR_EN */
-       KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n",
+       pr_debug("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n",
                   (unsigned long)len, q->flags, pkt_len);
 
        return pkt_len + vnet_hdr_len;
@@ -448,25 +433,24 @@ kni_sock_rcvmsg(struct socket *sock,
 
 /* dummy tap like ioctl */
 static int
-kni_sock_ioctl(struct socket *sock, unsigned int cmd,
-             unsigned long arg)
+kni_sock_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg)
 {
        void __user *argp = (void __user *)arg;
        struct ifreq __user *ifr = argp;
-       unsigned int __user *up = argp;
+       uint32_t __user *up = argp;
        struct kni_vhost_queue *q =
                container_of(sock->sk, struct kni_vhost_queue, sk);
        struct kni_dev *kni;
-       unsigned int u;
+       uint32_t u;
        int __user *sp = argp;
        int s;
        int ret;
 
-       KNI_DBG("tap ioctl cmd 0x%08x\n", cmd);
+       pr_debug("tap ioctl cmd 0x%08x\n", cmd);
 
        switch (cmd) {
        case TUNSETIFF:
-               KNI_DBG("TUNSETIFF\n");
+               pr_debug("TUNSETIFF\n");
                /* ignore the name, just look at flags */
                if (get_user(u, &ifr->ifr_flags))
                        return -EFAULT;
@@ -480,7 +464,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
                return ret;
 
        case TUNGETIFF:
-               KNI_DBG("TUNGETIFF\n");
+               pr_debug("TUNGETIFF\n");
                rcu_read_lock_bh();
                kni = rcu_dereference_bh(q->kni);
                if (kni)
@@ -491,14 +475,14 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
                        return -ENOLINK;
 
                ret = 0;
-               if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) ||
-                   put_user(q->flags, &ifr->ifr_flags))
+               if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ)
+                               || put_user(q->flags, &ifr->ifr_flags))
                        ret = -EFAULT;
                dev_put(kni->net_dev);
                return ret;
 
        case TUNGETFEATURES:
-               KNI_DBG("TUNGETFEATURES\n");
+               pr_debug("TUNGETFEATURES\n");
                u = IFF_TAP | IFF_NO_PI;
 #ifdef RTE_KNI_VHOST_VNET_HDR_EN
                u |= IFF_VNET_HDR;
@@ -508,7 +492,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
                return 0;
 
        case TUNSETSNDBUF:
-               KNI_DBG("TUNSETSNDBUF\n");
+               pr_debug("TUNSETSNDBUF\n");
                if (get_user(u, up))
                        return -EFAULT;
 
@@ -519,7 +503,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
                s = q->vnet_hdr_sz;
                if (put_user(s, sp))
                        return -EFAULT;
-               KNI_DBG("TUNGETVNETHDRSZ %d\n", s);
+               pr_debug("TUNGETVNETHDRSZ %d\n", s);
                return 0;
 
        case TUNSETVNETHDRSZ:
@@ -528,12 +512,12 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
                if (s < (int)sizeof(struct virtio_net_hdr))
                        return -EINVAL;
 
-               KNI_DBG("TUNSETVNETHDRSZ %d\n", s);
+               pr_debug("TUNSETVNETHDRSZ %d\n", s);
                q->vnet_hdr_sz = s;
                return 0;
 
        case TUNSETOFFLOAD:
-               KNI_DBG("TUNSETOFFLOAD %lx\n", arg);
+               pr_debug("TUNSETOFFLOAD %lx\n", arg);
 #ifdef RTE_KNI_VHOST_VNET_HDR_EN
                /* not support any offload yet */
                if (!(q->flags & IFF_VNET_HDR))
@@ -545,26 +529,26 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
 #endif
 
        default:
-               KNI_DBG("NOT SUPPORT\n");
+               pr_debug("NOT SUPPORT\n");
                return -EINVAL;
        }
 }
 
 static int
-kni_sock_compat_ioctl(struct socket *sock, unsigned int cmd,
+kni_sock_compat_ioctl(struct socket *sock, uint32_t cmd,
                     unsigned long arg)
 {
        /* 32 bits app on 64 bits OS to be supported later */
-       KNI_PRINT("Not implemented.\n");
+       pr_debug("Not implemented.\n");
 
        return -EINVAL;
 }
 
 #define KNI_VHOST_WAIT_WQ_SAFE()                        \
-do {                                                   \
+do {                                                   \
        while ((BE_FINISH | BE_STOP) == kni->vq_status) \
-               msleep(1);                              \
-}while(0)                                               \
+               msleep(1);                              \
+} while (0)                                            \
 
 
 static int
@@ -577,7 +561,8 @@ kni_sock_release(struct socket *sock)
        if (q == NULL)
                return 0;
 
-       if (NULL != (kni = q->kni)) {
+       kni = q->kni;
+       if (kni != NULL) {
                kni->vq_status = BE_STOP;
                KNI_VHOST_WAIT_WQ_SAFE();
                kni->vhost_queue = NULL;
@@ -592,18 +577,17 @@ kni_sock_release(struct socket *sock)
 
        sock_put(&q->sk);
 
-       KNI_DBG("dummy sock release done\n");
+       pr_debug("dummy sock release done\n");
 
        return 0;
 }
 
 int
-kni_sock_getname (struct socket *sock,
-                 struct sockaddr *addr,
-                 int *sockaddr_len, int peer)
+kni_sock_getname(struct socket *sock, struct sockaddr *addr,
+               int *sockaddr_len, int peer)
 {
-       KNI_DBG("dummy sock getname\n");
-       ((struct sockaddr_ll*)addr)->sll_family = AF_PACKET;
+       pr_debug("dummy sock getname\n");
+       ((struct sockaddr_ll *)addr)->sll_family = AF_PACKET;
        return 0;
 }
 
@@ -646,7 +630,7 @@ kni_sk_destruct(struct sock *sk)
 
        /* make sure there's no packet in buffer */
        while (skb_dequeue(&sk->sk_receive_queue) != NULL)
-              ;
+               ;
 
        mb();
 
@@ -673,7 +657,7 @@ kni_vhost_backend_init(struct kni_dev *kni)
        if (kni->vhost_queue != NULL)
                return -1;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+#ifdef HAVE_SK_ALLOC_KERN_PARAM
        q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
                        &kni_raw_proto, 0);
 #else
@@ -694,8 +678,9 @@ kni_vhost_backend_init(struct kni_dev *kni)
        }
 
        /* cache init */
-       q->cache = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff),
-                          GFP_KERNEL);
+       q->cache = kzalloc(
+               RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff),
+               GFP_KERNEL);
        if (!q->cache)
                goto free_fd;
 
@@ -708,7 +693,7 @@ kni_vhost_backend_init(struct kni_dev *kni)
 
        for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) {
                elem = &q->cache[i];
-               kni_fifo_put(fifo, (void**)&elem, 1);
+               kni_fifo_put(fifo, (void **)&elem, 1);
        }
        q->fifo = fifo;
 
@@ -738,14 +723,12 @@ kni_vhost_backend_init(struct kni_dev *kni)
 
        kni->vq_status = BE_START;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
-       KNI_DBG("backend init sockfd=%d, sock->wq=0x%16llx,"
-                 "sk->sk_wq=0x%16llx",
+#ifdef HAVE_SOCKET_WQ
+       pr_debug("backend init sockfd=%d, sock->wq=0x%16llx,sk->sk_wq=0x%16llx",
                  q->sockfd, (uint64_t)q->sock->wq,
                  (uint64_t)q->sk.sk_wq);
 #else
-       KNI_DBG("backend init sockfd=%d, sock->wait at 0x%16llx,"
-                 "sk->sk_sleep=0x%16llx",
+       pr_debug("backend init sockfd=%d, sock->wait at 0x%16llx,sk->sk_sleep=0x%16llx",
                  q->sockfd, (uint64_t)&q->sock->wait,
                  (uint64_t)q->sk.sk_sleep);
 #endif
@@ -768,7 +751,7 @@ free_sock:
        q->sock = NULL;
 
 free_sk:
-       sk_free((struct sock*)q);
+       sk_free((struct sock *)q);
 
        return err;
 }
@@ -781,6 +764,7 @@ show_sock_fd(struct device *dev, struct device_attribute *attr,
        struct net_device *net_dev = container_of(dev, struct net_device, dev);
        struct kni_dev *kni = netdev_priv(net_dev);
        int sockfd = -1;
+
        if (kni->vhost_queue != NULL)
                sockfd = kni->vhost_queue->sockfd;
        return snprintf(buf, 10, "%d\n", sockfd);
@@ -792,6 +776,7 @@ show_sock_en(struct device *dev, struct device_attribute *attr,
 {
        struct net_device *net_dev = container_of(dev, struct net_device, dev);
        struct kni_dev *kni = netdev_priv(net_dev);
+
        return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1));
 }
 
@@ -804,7 +789,7 @@ set_sock_en(struct device *dev, struct device_attribute *attr,
        unsigned long en;
        int err = 0;
 
-       if (0 != kstrtoul(buf, 0, &en))
+       if (kstrtoul(buf, 0, &en) != 0)
                return -EINVAL;
 
        if (en)
@@ -818,7 +803,7 @@ static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en);
 static struct attribute *dev_attrs[] = {
        &dev_attr_sock_fd.attr,
        &dev_attr_sock_en.attr,
-        NULL,
+       NULL,
 };
 
 static const struct attribute_group dev_attr_grp = {
@@ -836,7 +821,7 @@ kni_vhost_backend_release(struct kni_dev *kni)
        /* dettach from kni */
        q->kni = NULL;
 
-       KNI_DBG("release backend done\n");
+       pr_debug("release backend done\n");
 
        return 0;
 }
@@ -851,7 +836,7 @@ kni_vhost_init(struct kni_dev *kni)
 
        kni->vq_status = BE_STOP;
 
-       KNI_DBG("kni_vhost_init done\n");
+       pr_debug("kni_vhost_init done\n");
 
        return 0;
 }
index 574683d..aab6d1a 100644 (file)
@@ -34,6 +34,8 @@
 #ifndef _RTE_DEV_INFO_H_
 #define _RTE_DEV_INFO_H_
 
+#include <stdint.h>
+
 /*
  * Placeholder for accessing device registers
  */
index 563e80f..8386904 100644 (file)
 #ifndef _RTE_ETH_CTRL_H_
 #define _RTE_ETH_CTRL_H_
 
+#include <stdint.h>
+#include <rte_common.h>
+#include "rte_ether.h"
+
 /**
  * @file
  *
@@ -95,6 +99,7 @@ enum rte_filter_type {
        RTE_ETH_FILTER_FDIR,
        RTE_ETH_FILTER_HASH,
        RTE_ETH_FILTER_L2_TUNNEL,
+       RTE_ETH_FILTER_GENERIC,
        RTE_ETH_FILTER_MAX
 };
 
@@ -420,8 +425,6 @@ struct rte_eth_l2_flow {
 struct rte_eth_ipv4_flow {
        uint32_t src_ip;      /**< IPv4 source address in big endian. */
        uint32_t dst_ip;      /**< IPv4 destination address in big endian. */
-       // TREX_PATCH (ip_id)
-    uint16_t ip_id;       /**< IPv4 IP ID to match */
        uint8_t  tos;         /**< Type of service to match. */
        uint8_t  ttl;         /**< Time to live to match. */
        uint8_t  proto;       /**< Protocol, next header in big endian. */
@@ -464,8 +467,6 @@ struct rte_eth_ipv6_flow {
        uint8_t  tc;             /**< Traffic class to match. */
        uint8_t  proto;          /**< Protocol, next header to match. */
        uint8_t  hop_limits;     /**< Hop limits to match. */
-       // TREX_PATCH (flow_label)
-       uint32_t flow_label;     /**<flow label to match. */
 };
 
 /**
@@ -594,9 +595,6 @@ struct rte_eth_fdir_action {
        /**< If report_status is RTE_ETH_FDIR_REPORT_ID_FLEX_4 or
             RTE_ETH_FDIR_REPORT_FLEX_8, flex_off specifies where the reported
             flex bytes start from in flexible payload. */
-    // TREX_PATCH
-    // Index for statistics counter that will count FDIR matches.
-    uint16_t stat_count_index;
 };
 
 /**
index e7bc9d6..eb0a94a 100644 (file)
@@ -58,7 +58,6 @@
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_common.h>
-#include <rte_ring.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
@@ -72,6 +71,7 @@
 static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
 struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
 static struct rte_eth_dev_data *rte_eth_dev_data;
+static uint8_t eth_dev_last_created_port;
 static uint8_t nb_ports;
 
 /* spinlock for eth device callbacks */
@@ -189,8 +189,23 @@ rte_eth_dev_find_free_port(void)
        return RTE_MAX_ETHPORTS;
 }
 
+static struct rte_eth_dev *
+eth_dev_get(uint8_t port_id)
+{
+       struct rte_eth_dev *eth_dev = &rte_eth_devices[port_id];
+
+       eth_dev->data = &rte_eth_dev_data[port_id];
+       eth_dev->attached = DEV_ATTACHED;
+       TAILQ_INIT(&(eth_dev->link_intr_cbs));
+
+       eth_dev_last_created_port = port_id;
+       nb_ports++;
+
+       return eth_dev;
+}
+
 struct rte_eth_dev *
-rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type)
+rte_eth_dev_allocate(const char *name)
 {
        uint8_t port_id;
        struct rte_eth_dev *eth_dev;
@@ -210,28 +225,44 @@ rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type)
                return NULL;
        }
 
-       eth_dev = &rte_eth_devices[port_id];
-       eth_dev->data = &rte_eth_dev_data[port_id];
+       memset(&rte_eth_dev_data[port_id], 0, sizeof(struct rte_eth_dev_data));
+       eth_dev = eth_dev_get(port_id);
        snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name);
        eth_dev->data->port_id = port_id;
-       eth_dev->attached = DEV_ATTACHED;
-       eth_dev->dev_type = type;
-       nb_ports++;
+       eth_dev->data->mtu = ETHER_MTU;
+
        return eth_dev;
 }
 
-static int
-rte_eth_dev_create_unique_device_name(char *name, size_t size,
-               struct rte_pci_device *pci_dev)
+/*
+ * Attach to a port already registered by the primary process, which
+ * makes sure that the same device would have the same port id both
+ * in the primary and secondary process.
+ */
+static struct rte_eth_dev *
+eth_dev_attach_secondary(const char *name)
 {
-       int ret;
+       uint8_t i;
+       struct rte_eth_dev *eth_dev;
 
-       ret = snprintf(name, size, "%d:%d.%d",
-                       pci_dev->addr.bus, pci_dev->addr.devid,
-                       pci_dev->addr.function);
-       if (ret < 0)
-               return ret;
-       return 0;
+       if (rte_eth_dev_data == NULL)
+               rte_eth_dev_data_alloc();
+
+       for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+               if (strcmp(rte_eth_dev_data[i].name, name) == 0)
+                       break;
+       }
+       if (i == RTE_MAX_ETHPORTS) {
+               RTE_PMD_DEBUG_TRACE(
+                       "device %s is not driven by the primary process\n",
+                       name);
+               return NULL;
+       }
+
+       eth_dev = eth_dev_get(i);
+       RTE_ASSERT(eth_dev->data->port_id == i);
+
+       return eth_dev;
 }
 
 int
@@ -245,9 +276,9 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
        return 0;
 }
 
-static int
-rte_eth_dev_init(struct rte_pci_driver *pci_drv,
-                struct rte_pci_device *pci_dev)
+int
+rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
+                     struct rte_pci_device *pci_dev)
 {
        struct eth_driver    *eth_drv;
        struct rte_eth_dev *eth_dev;
@@ -257,40 +288,43 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
 
        eth_drv = (struct eth_driver *)pci_drv;
 
-       /* Create unique Ethernet device name using PCI address */
-       rte_eth_dev_create_unique_device_name(ethdev_name,
-                       sizeof(ethdev_name), pci_dev);
-
-       eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI);
-       if (eth_dev == NULL)
-               return -ENOMEM;
+       rte_eal_pci_device_name(&pci_dev->addr, ethdev_name,
+                       sizeof(ethdev_name));
 
        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               eth_dev = rte_eth_dev_allocate(ethdev_name);
+               if (eth_dev == NULL)
+                       return -ENOMEM;
+
                eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
                                  eth_drv->dev_private_size,
                                  RTE_CACHE_LINE_SIZE);
                if (eth_dev->data->dev_private == NULL)
                        rte_panic("Cannot allocate memzone for private port data\n");
+       } else {
+               eth_dev = eth_dev_attach_secondary(ethdev_name);
+               if (eth_dev == NULL) {
+                       /*
+                        * if we failed to attach a device, it means the
+                        * device is skipped in primary process, due to
+                        * some errors. If so, we return a positive value,
+                        * to let EAL skip it for the secondary process
+                        * as well.
+                        */
+                       return 1;
+               }
        }
-       eth_dev->pci_dev = pci_dev;
+       eth_dev->device = &pci_dev->device;
+       eth_dev->intr_handle = &pci_dev->intr_handle;
        eth_dev->driver = eth_drv;
-       eth_dev->data->rx_mbuf_alloc_failed = 0;
-
-       /* init user callbacks */
-       TAILQ_INIT(&(eth_dev->link_intr_cbs));
-
-       /*
-        * Set the default MTU.
-        */
-       eth_dev->data->mtu = ETHER_MTU;
 
        /* Invoke PMD device initialization function */
        diag = (*eth_drv->eth_dev_init)(eth_dev);
        if (diag == 0)
                return 0;
 
-       RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%u device_id=0x%x) failed\n",
-                       pci_drv->name,
+       RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%x device_id=0x%x) failed\n",
+                       pci_drv->driver.name,
                        (unsigned) pci_dev->id.vendor_id,
                        (unsigned) pci_dev->id.device_id);
        if (rte_eal_process_type() == RTE_PROC_PRIMARY)
@@ -299,8 +333,8 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
        return diag;
 }
 
-static int
-rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
+int
+rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev)
 {
        const struct eth_driver *eth_drv;
        struct rte_eth_dev *eth_dev;
@@ -310,9 +344,8 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
        if (pci_dev == NULL)
                return -EINVAL;
 
-       /* Create unique Ethernet device name using PCI address */
-       rte_eth_dev_create_unique_device_name(ethdev_name,
-                       sizeof(ethdev_name), pci_dev);
+       rte_eal_pci_device_name(&pci_dev->addr, ethdev_name,
+                       sizeof(ethdev_name));
 
        eth_dev = rte_eth_dev_allocated(ethdev_name);
        if (eth_dev == NULL)
@@ -333,35 +366,13 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
        if (rte_eal_process_type() == RTE_PROC_PRIMARY)
                rte_free(eth_dev->data->dev_private);
 
-       eth_dev->pci_dev = NULL;
+       eth_dev->device = NULL;
        eth_dev->driver = NULL;
        eth_dev->data = NULL;
 
        return 0;
 }
 
-/**
- * Register an Ethernet [Poll Mode] driver.
- *
- * Function invoked by the initialization function of an Ethernet driver
- * to simultaneously register itself as a PCI driver and as an Ethernet
- * Poll Mode Driver.
- * Invokes the rte_eal_pci_register() function to register the *pci_drv*
- * structure embedded in the *eth_drv* structure, after having stored the
- * address of the rte_eth_dev_init() function in the *devinit* field of
- * the *pci_drv* structure.
- * During the PCI probing phase, the rte_eth_dev_init() function is
- * invoked for each PCI [Ethernet device] matching the embedded PCI
- * identifiers provided by the driver.
- */
-void
-rte_eth_driver_register(struct eth_driver *eth_drv)
-{
-       eth_drv->pci_drv.devinit = rte_eth_dev_init;
-       eth_drv->pci_drv.devuninit = rte_eth_dev_uninit;
-       rte_eal_pci_register(&eth_drv->pci_drv);
-}
-
 int
 rte_eth_dev_is_valid_port(uint8_t port_id)
 {
@@ -385,27 +396,6 @@ rte_eth_dev_count(void)
        return nb_ports;
 }
 
-static enum rte_eth_dev_type
-rte_eth_dev_get_device_type(uint8_t port_id)
-{
-       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, RTE_ETH_DEV_UNKNOWN);
-       return rte_eth_devices[port_id].dev_type;
-}
-
-static int
-rte_eth_dev_get_addr_by_port(uint8_t port_id, struct rte_pci_addr *addr)
-{
-       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
-
-       if (addr == NULL) {
-               RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
-               return -EINVAL;
-       }
-
-       *addr = rte_eth_devices[port_id].pci_dev->addr;
-       return 0;
-}
-
 int
 rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
 {
@@ -435,6 +425,9 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
                return -EINVAL;
        }
 
+       if (!nb_ports)
+               return -ENODEV;
+
        *port_id = RTE_MAX_ETHPORTS;
 
        for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
@@ -450,35 +443,6 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
        return -ENODEV;
 }
 
-/* TREX_PATCH removed "static" */
-int
-rte_eth_dev_get_port_by_addr(const struct rte_pci_addr *addr, uint8_t *port_id)
-{
-       int i;
-       struct rte_pci_device *pci_dev = NULL;
-
-       if (addr == NULL) {
-               RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
-               return -EINVAL;
-       }
-
-       *port_id = RTE_MAX_ETHPORTS;
-
-       for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
-
-               pci_dev = rte_eth_devices[i].pci_dev;
-
-               if (pci_dev &&
-                       !rte_eal_compare_pci_addr(&pci_dev->addr, addr)) {
-
-                       *port_id = i;
-
-                       return 0;
-               }
-       }
-       return -ENODEV;
-}
-
 static int
 rte_eth_dev_is_detachable(uint8_t port_id)
 {
@@ -504,127 +468,49 @@ rte_eth_dev_is_detachable(uint8_t port_id)
                return 1;
 }
 
-/* attach the new physical device, then store port_id of the device */
-static int
-rte_eth_dev_attach_pdev(struct rte_pci_addr *addr, uint8_t *port_id)
+/* attach the new device, then store port_id of the device */
+int
+rte_eth_dev_attach(const char *devargs, uint8_t *port_id)
 {
-       /* re-construct pci_device_list */
-       if (rte_eal_pci_scan())
-               goto err;
-       /* Invoke probe func of the driver can handle the new device. */
-       if (rte_eal_pci_probe_one(addr))
-               goto err;
+       int ret = -1;
+       int current = rte_eth_dev_count();
+       char *name = NULL;
+       char *args = NULL;
 
-       if (rte_eth_dev_get_port_by_addr(addr, port_id))
+       if ((devargs == NULL) || (port_id == NULL)) {
+               ret = -EINVAL;
                goto err;
+       }
 
-       return 0;
-err:
-       return -1;
-}
-
-/* detach the new physical device, then store pci_addr of the device */
-static int
-rte_eth_dev_detach_pdev(uint8_t port_id, struct rte_pci_addr *addr)
-{
-       struct rte_pci_addr freed_addr;
-       struct rte_pci_addr vp;
-
-       /* get pci address by port id */
-       if (rte_eth_dev_get_addr_by_port(port_id, &freed_addr))
+       /* parse devargs, then retrieve device name and args */
+       if (rte_eal_parse_devargs_str(devargs, &name, &args))
                goto err;
 
-       /* Zeroed pci addr means the port comes from virtual device */
-       vp.domain = vp.bus = vp.devid = vp.function = 0;
-       if (rte_eal_compare_pci_addr(&vp, &freed_addr) == 0)
+       ret = rte_eal_dev_attach(name, args);
+       if (ret < 0)
                goto err;
 
-       /* invoke devuninit func of the pci driver,
-        * also remove the device from pci_device_list */
-       if (rte_eal_pci_detach(&freed_addr))
+       /* no point looking at the port count if no port exists */
+       if (!rte_eth_dev_count()) {
+               RTE_LOG(ERR, EAL, "No port found for device (%s)\n", name);
+               ret = -1;
                goto err;
+       }
 
-       *addr = freed_addr;
-       return 0;
-err:
-       return -1;
-}
-
-/* attach the new virtual device, then store port_id of the device */
-static int
-rte_eth_dev_attach_vdev(const char *vdevargs, uint8_t *port_id)
-{
-       char *name = NULL, *args = NULL;
-       int ret = -1;
-
-       /* parse vdevargs, then retrieve device name and args */
-       if (rte_eal_parse_devargs_str(vdevargs, &name, &args))
-               goto end;
-
-       /* walk around dev_driver_list to find the driver of the device,
-        * then invoke probe function of the driver.
-        * rte_eal_vdev_init() updates port_id allocated after
-        * initialization.
+       /* if nothing happened, there is a bug here, since some driver told us
+        * it did attach a device, but did not create a port.
         */
-       if (rte_eal_vdev_init(name, args))
-               goto end;
-
-       if (rte_eth_dev_get_port_by_name(name, port_id))
-               goto end;
-
-       ret = 0;
-end:
-       free(name);
-       free(args);
-
-       return ret;
-}
-
-/* detach the new virtual device, then store the name of the device */
-static int
-rte_eth_dev_detach_vdev(uint8_t port_id, char *vdevname)
-{
-       char name[RTE_ETH_NAME_MAX_LEN];
-
-       /* get device name by port id */
-       if (rte_eth_dev_get_name_by_port(port_id, name))
-               goto err;
-       /* walk around dev_driver_list to find the driver of the device,
-        * then invoke uninit function of the driver */
-       if (rte_eal_vdev_uninit(name))
-               goto err;
-
-       strncpy(vdevname, name, sizeof(name));
-       return 0;
-err:
-       return -1;
-}
-
-/* attach the new device, then store port_id of the device */
-int
-rte_eth_dev_attach(const char *devargs, uint8_t *port_id)
-{
-       struct rte_pci_addr addr;
-       int ret = -1;
-
-       if ((devargs == NULL) || (port_id == NULL)) {
-               ret = -EINVAL;
+       if (current == rte_eth_dev_count()) {
+               ret = -1;
                goto err;
        }
 
-       if (eal_parse_pci_DomBDF(devargs, &addr) == 0) {
-               ret = rte_eth_dev_attach_pdev(&addr, port_id);
-               if (ret < 0)
-                       goto err;
-       } else {
-               ret = rte_eth_dev_attach_vdev(devargs, port_id);
-               if (ret < 0)
-                       goto err;
-       }
+       *port_id = eth_dev_last_created_port;
+       ret = 0;
 
-       return 0;
 err:
-       RTE_LOG(ERR, EAL, "Driver, cannot attach the device\n");
+       free(name);
+       free(args);
        return ret;
 }
 
@@ -632,7 +518,6 @@ err:
 int
 rte_eth_dev_detach(uint8_t port_id, char *name)
 {
-       struct rte_pci_addr addr;
        int ret = -1;
 
        if (name == NULL) {
@@ -640,33 +525,19 @@ rte_eth_dev_detach(uint8_t port_id, char *name)
                goto err;
        }
 
-       /* check whether the driver supports detach feature, or not */
+       /* FIXME: move this to eal, once device flags are relocated there */
        if (rte_eth_dev_is_detachable(port_id))
                goto err;
 
-       if (rte_eth_dev_get_device_type(port_id) == RTE_ETH_DEV_PCI) {
-               ret = rte_eth_dev_get_addr_by_port(port_id, &addr);
-               if (ret < 0)
-                       goto err;
-
-               ret = rte_eth_dev_detach_pdev(port_id, &addr);
-               if (ret < 0)
-                       goto err;
-
-               snprintf(name, RTE_ETH_NAME_MAX_LEN,
-                       "%04x:%02x:%02x.%d",
-                       addr.domain, addr.bus,
-                       addr.devid, addr.function);
-       } else {
-               ret = rte_eth_dev_detach_vdev(port_id, name);
-               if (ret < 0)
-                       goto err;
-       }
+       snprintf(name, sizeof(rte_eth_devices[port_id].data->name),
+                "%s", rte_eth_devices[port_id].data->name);
+       ret = rte_eal_dev_detach(name);
+       if (ret < 0)
+               goto err;
 
        return 0;
 
 err:
-       RTE_LOG(ERR, EAL, "Driver, cannot detach the device\n");
        return ret;
 }
 
@@ -712,6 +583,9 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 
                for (i = nb_queues; i < old_nb_queues; i++)
                        (*dev->dev_ops->rx_queue_release)(rxq[i]);
+
+               rte_free(dev->data->rx_queues);
+               dev->data->rx_queues = NULL;
        }
        dev->data->nb_rx_queues = nb_queues;
        return 0;
@@ -863,6 +737,9 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 
                for (i = nb_queues; i < old_nb_queues; i++)
                        (*dev->dev_ops->tx_queue_release)(txq[i]);
+
+               rte_free(dev->data->tx_queues);
+               dev->data->tx_queues = NULL;
        }
        dev->data->nb_tx_queues = nb_queues;
        return 0;
@@ -1033,39 +910,61 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
        return 0;
 }
 
+void
+_rte_eth_dev_reset(struct rte_eth_dev *dev)
+{
+       if (dev->data->dev_started) {
+               RTE_PMD_DEBUG_TRACE(
+                       "port %d must be stopped to allow reset\n",
+                       dev->data->port_id);
+               return;
+       }
+
+       rte_eth_dev_rx_queue_config(dev, 0);
+       rte_eth_dev_tx_queue_config(dev, 0);
+
+       memset(&dev->data->dev_conf, 0, sizeof(dev->data->dev_conf));
+}
+
 static void
 rte_eth_dev_config_restore(uint8_t port_id)
 {
        struct rte_eth_dev *dev;
        struct rte_eth_dev_info dev_info;
-       struct ether_addr addr;
+       struct ether_addr *addr;
        uint16_t i;
        uint32_t pool = 0;
+       uint64_t pool_mask;
 
        dev = &rte_eth_devices[port_id];
 
        rte_eth_dev_info_get(port_id, &dev_info);
 
-       if (RTE_ETH_DEV_SRIOV(dev).active)
-               pool = RTE_ETH_DEV_SRIOV(dev).def_vmdq_idx;
-
-       /* replay MAC address configuration */
-       for (i = 0; i < dev_info.max_mac_addrs; i++) {
-               addr = dev->data->mac_addrs[i];
-
-               /* skip zero address */
-               if (is_zero_ether_addr(&addr))
-                       continue;
-
-               /* add address to the hardware */
-               if  (*dev->dev_ops->mac_addr_add &&
-                       (dev->data->mac_pool_sel[i] & (1ULL << pool)))
-                       (*dev->dev_ops->mac_addr_add)(dev, &addr, i, pool);
-               else {
-                       RTE_PMD_DEBUG_TRACE("port %d: MAC address array not supported\n",
-                                       port_id);
-                       /* exit the loop but not return an error */
-                       break;
+       /* replay MAC address configuration including default MAC */
+       addr = &dev->data->mac_addrs[0];
+       if (*dev->dev_ops->mac_addr_set != NULL)
+               (*dev->dev_ops->mac_addr_set)(dev, addr);
+       else if (*dev->dev_ops->mac_addr_add != NULL)
+               (*dev->dev_ops->mac_addr_add)(dev, addr, 0, pool);
+
+       if (*dev->dev_ops->mac_addr_add != NULL) {
+               for (i = 1; i < dev_info.max_mac_addrs; i++) {
+                       addr = &dev->data->mac_addrs[i];
+
+                       /* skip zero address */
+                       if (is_zero_ether_addr(addr))
+                               continue;
+
+                       pool = 0;
+                       pool_mask = dev->data->mac_pool_sel[i];
+
+                       do {
+                               if (pool_mask & 1ULL)
+                                       (*dev->dev_ops->mac_addr_add)(dev,
+                                               addr, i, pool);
+                               pool_mask >>= 1;
+                               pool++;
+                       } while (pool_mask);
                }
        }
 
@@ -1191,6 +1090,7 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
        uint32_t mbp_buf_size;
        struct rte_eth_dev *dev;
        struct rte_eth_dev_info dev_info;
+       void **rxq;
 
        RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
@@ -1249,6 +1149,14 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
                return -EINVAL;
        }
 
+       rxq = dev->data->rx_queues;
+       if (rxq[rx_queue_id]) {
+               RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release,
+                                       -ENOTSUP);
+               (*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
+               rxq[rx_queue_id] = NULL;
+       }
+
        if (rx_conf == NULL)
                rx_conf = &dev_info.default_rxconf;
 
@@ -1270,6 +1178,7 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
 {
        struct rte_eth_dev *dev;
        struct rte_eth_dev_info dev_info;
+       void **txq;
 
        RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
@@ -1302,6 +1211,14 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
                return -EINVAL;
        }
 
+       txq = dev->data->tx_queues;
+       if (txq[tx_queue_id]) {
+               RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release,
+                                       -ENOTSUP);
+               (*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
+               txq[tx_queue_id] = NULL;
+       }
+
        if (tx_conf == NULL)
                tx_conf = &dev_info.default_txconf;
 
@@ -1480,54 +1397,6 @@ rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link)
        }
 }
 
-// TREX_PATCH
-// return in stats, statistics starting from start, for len counters.
-int
-rte_eth_fdir_stats_get(uint8_t port_id, uint32_t *stats, uint32_t start, uint32_t len)
-{
-       struct rte_eth_dev *dev;
-
-       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
-
-       dev = &rte_eth_devices[port_id];
-
-    // Only xl710 support this
-    i40e_trex_fdir_stats_get(dev, stats, start, len);
-
-    return 0;
-}
-
-// TREX_PATCH
-// zero statistics counters, starting from start, for len counters.
-int
-rte_eth_fdir_stats_reset(uint8_t port_id, uint32_t *stats, uint32_t start, uint32_t len)
-{
-       struct rte_eth_dev *dev;
-
-       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
-
-       dev = &rte_eth_devices[port_id];
-
-    // Only xl710 support this
-    i40e_trex_fdir_stats_reset(dev, stats, start, len);
-
-    return 0;
-}
-
-// TREX_PATCH
-int
-rte_eth_get_fw_ver(int port_id, uint32_t *version)
-{
-       struct rte_eth_dev *dev;
-
-       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
-
-       dev = &rte_eth_devices[port_id];
-
-    // Only xl710 support this
-    return i40e_trex_get_fw_ver(dev, version);
-}
-
 int
 rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats)
 {
@@ -1572,8 +1441,10 @@ get_xstats_count(uint8_t port_id)
        } else
                count = 0;
        count += RTE_NB_STATS;
-       count += dev->data->nb_rx_queues * RTE_NB_RXQ_STATS;
-       count += dev->data->nb_tx_queues * RTE_NB_TXQ_STATS;
+       count += RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS) *
+                RTE_NB_RXQ_STATS;
+       count += RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS) *
+                RTE_NB_TXQ_STATS;
        return count;
 }
 
@@ -1587,6 +1458,7 @@ rte_eth_xstats_get_names(uint8_t port_id,
        int cnt_expected_entries;
        int cnt_driver_entries;
        uint32_t idx, id_queue;
+       uint16_t num_q;
 
        cnt_expected_entries = get_xstats_count(port_id);
        if (xstats_names == NULL || cnt_expected_entries < 0 ||
@@ -1603,7 +1475,8 @@ rte_eth_xstats_get_names(uint8_t port_id,
                        "%s", rte_stats_strings[idx].name);
                cnt_used_entries++;
        }
-       for (id_queue = 0; id_queue < dev->data->nb_rx_queues; id_queue++) {
+       num_q = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS);
+       for (id_queue = 0; id_queue < num_q; id_queue++) {
                for (idx = 0; idx < RTE_NB_RXQ_STATS; idx++) {
                        snprintf(xstats_names[cnt_used_entries].name,
                                sizeof(xstats_names[0].name),
@@ -1613,7 +1486,8 @@ rte_eth_xstats_get_names(uint8_t port_id,
                }
 
        }
-       for (id_queue = 0; id_queue < dev->data->nb_tx_queues; id_queue++) {
+       num_q = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS);
+       for (id_queue = 0; id_queue < num_q; id_queue++) {
                for (idx = 0; idx < RTE_NB_TXQ_STATS; idx++) {
                        snprintf(xstats_names[cnt_used_entries].name,
                                sizeof(xstats_names[0].name),
@@ -1649,14 +1523,18 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats,
        unsigned count = 0, i, q;
        signed xcount = 0;
        uint64_t val, *stats_ptr;
+       uint16_t nb_rxqs, nb_txqs;
 
        RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
        dev = &rte_eth_devices[port_id];
 
+       nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS);
+       nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS);
+
        /* Return generic statistics */
-       count = RTE_NB_STATS + (dev->data->nb_rx_queues * RTE_NB_RXQ_STATS) +
-               (dev->data->nb_tx_queues * RTE_NB_TXQ_STATS);
+       count = RTE_NB_STATS + (nb_rxqs * RTE_NB_RXQ_STATS) +
+               (nb_txqs * RTE_NB_TXQ_STATS);
 
        /* implemented by the driver */
        if (dev->dev_ops->xstats_get != NULL) {
@@ -1687,7 +1565,7 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats,
        }
 
        /* per-rxq stats */
-       for (q = 0; q < dev->data->nb_rx_queues; q++) {
+       for (q = 0; q < nb_rxqs; q++) {
                for (i = 0; i < RTE_NB_RXQ_STATS; i++) {
                        stats_ptr = RTE_PTR_ADD(&eth_stats,
                                        rte_rxq_stats_strings[i].offset +
@@ -1698,7 +1576,7 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats,
        }
 
        /* per-txq stats */
-       for (q = 0; q < dev->data->nb_tx_queues; q++) {
+       for (q = 0; q < nb_txqs; q++) {
                for (i = 0; i < RTE_NB_TXQ_STATS; i++) {
                        stats_ptr = RTE_PTR_ADD(&eth_stats,
                                        rte_txq_stats_strings[i].offset +
@@ -1708,8 +1586,11 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats,
                }
        }
 
-       for (i = 0; i < count + xcount; i++)
+       for (i = 0; i < count; i++)
                xstats[i].id = i;
+       /* add an offset to driver-specific stats */
+       for ( ; i < count + xcount; i++)
+               xstats[i].id += count;
 
        return count + xcount;
 }
@@ -1766,6 +1647,18 @@ rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id,
                        STAT_QMAP_RX);
 }
 
+int
+rte_eth_dev_fw_version_get(uint8_t port_id, char *fw_version, size_t fw_size)
+{
+       struct rte_eth_dev *dev;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+       dev = &rte_eth_devices[port_id];
+
+       RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->fw_version_get, -ENOTSUP);
+       return (*dev->dev_ops->fw_version_get)(dev, fw_version, fw_size);
+}
+
 void
 rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
 {
@@ -1785,7 +1678,6 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
 
        RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
        (*dev->dev_ops->dev_infos_get)(dev, dev_info);
-       dev_info->pci_dev = dev->pci_dev;
        dev_info->driver_name = dev->data->drv_name;
        dev_info->nb_rx_queues = dev->data->nb_rx_queues;
        dev_info->nb_tx_queues = dev->data->nb_tx_queues;
@@ -2354,32 +2246,6 @@ rte_eth_dev_default_mac_addr_set(uint8_t port_id, struct ether_addr *addr)
        return 0;
 }
 
-int
-rte_eth_dev_set_vf_rxmode(uint8_t port_id,  uint16_t vf,
-                               uint16_t rx_mode, uint8_t on)
-{
-       uint16_t num_vfs;
-       struct rte_eth_dev *dev;
-       struct rte_eth_dev_info dev_info;
-
-       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
-       dev = &rte_eth_devices[port_id];
-       rte_eth_dev_info_get(port_id, &dev_info);
-
-       num_vfs = dev_info.max_vfs;
-       if (vf > num_vfs) {
-               RTE_PMD_DEBUG_TRACE("set VF RX mode:invalid VF id %d\n", vf);
-               return -EINVAL;
-       }
-
-       if (rx_mode == 0) {
-               RTE_PMD_DEBUG_TRACE("set VF RX mode:mode mask ca not be zero\n");
-               return -EINVAL;
-       }
-       RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx_mode, -ENOTSUP);
-       return (*dev->dev_ops->set_vf_rx_mode)(dev, vf, rx_mode, on);
-}
 
 /*
  * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find
@@ -2469,76 +2335,6 @@ rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on)
        return (*dev->dev_ops->uc_all_hash_table_set)(dev, on);
 }
 
-int
-rte_eth_dev_set_vf_rx(uint8_t port_id, uint16_t vf, uint8_t on)
-{
-       uint16_t num_vfs;
-       struct rte_eth_dev *dev;
-       struct rte_eth_dev_info dev_info;
-
-       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
-       dev = &rte_eth_devices[port_id];
-       rte_eth_dev_info_get(port_id, &dev_info);
-
-       num_vfs = dev_info.max_vfs;
-       if (vf > num_vfs) {
-               RTE_PMD_DEBUG_TRACE("port %d: invalid vf id\n", port_id);
-               return -EINVAL;
-       }
-
-       RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx, -ENOTSUP);
-       return (*dev->dev_ops->set_vf_rx)(dev, vf, on);
-}
-
-int
-rte_eth_dev_set_vf_tx(uint8_t port_id, uint16_t vf, uint8_t on)
-{
-       uint16_t num_vfs;
-       struct rte_eth_dev *dev;
-       struct rte_eth_dev_info dev_info;
-
-       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
-       dev = &rte_eth_devices[port_id];
-       rte_eth_dev_info_get(port_id, &dev_info);
-
-       num_vfs = dev_info.max_vfs;
-       if (vf > num_vfs) {
-               RTE_PMD_DEBUG_TRACE("set pool tx:invalid pool id=%d\n", vf);
-               return -EINVAL;
-       }
-
-       RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_tx, -ENOTSUP);
-       return (*dev->dev_ops->set_vf_tx)(dev, vf, on);
-}
-
-int
-rte_eth_dev_set_vf_vlan_filter(uint8_t port_id, uint16_t vlan_id,
-                              uint64_t vf_mask, uint8_t vlan_on)
-{
-       struct rte_eth_dev *dev;
-
-       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
-       dev = &rte_eth_devices[port_id];
-
-       if (vlan_id > ETHER_MAX_VLAN_ID) {
-               RTE_PMD_DEBUG_TRACE("VF VLAN filter:invalid VLAN id=%d\n",
-                       vlan_id);
-               return -EINVAL;
-       }
-
-       if (vf_mask == 0) {
-               RTE_PMD_DEBUG_TRACE("VF VLAN filter:pool_mask can not be 0\n");
-               return -EINVAL;
-       }
-
-       RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_vlan_filter, -ENOTSUP);
-       return (*dev->dev_ops->set_vf_vlan_filter)(dev, vlan_id,
-                                                  vf_mask, vlan_on);
-}
-
 int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
                                        uint16_t tx_rate)
 {
@@ -2569,45 +2365,12 @@ int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
        return (*dev->dev_ops->set_queue_rate_limit)(dev, queue_idx, tx_rate);
 }
 
-int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, uint16_t tx_rate,
-                               uint64_t q_msk)
-{
-       struct rte_eth_dev *dev;
-       struct rte_eth_dev_info dev_info;
-       struct rte_eth_link link;
-
-       if (q_msk == 0)
-               return 0;
-
-       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
-       dev = &rte_eth_devices[port_id];
-       rte_eth_dev_info_get(port_id, &dev_info);
-       link = dev->data->dev_link;
-
-       if (vf > dev_info.max_vfs) {
-               RTE_PMD_DEBUG_TRACE("set VF rate limit:port %d: "
-                               "invalid vf id=%d\n", port_id, vf);
-               return -EINVAL;
-       }
-
-       if (tx_rate > link.link_speed) {
-               RTE_PMD_DEBUG_TRACE("set VF rate limit:invalid tx_rate=%d, "
-                               "bigger than link speed= %d\n",
-                               tx_rate, link.link_speed);
-               return -EINVAL;
-       }
-
-       RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rate_limit, -ENOTSUP);
-       return (*dev->dev_ops->set_vf_rate_limit)(dev, vf, tx_rate, q_msk);
-}
-
 int
 rte_eth_mirror_rule_set(uint8_t port_id,
                        struct rte_eth_mirror_conf *mirror_conf,
                        uint8_t rule_id, uint8_t on)
 {
-       struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+       struct rte_eth_dev *dev;
 
        RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
        if (mirror_conf->rule_type == 0) {
@@ -2643,7 +2406,7 @@ rte_eth_mirror_rule_set(uint8_t port_id,
 int
 rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id)
 {
-       struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+       struct rte_eth_dev *dev;
 
        RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
@@ -2678,14 +2441,15 @@ rte_eth_dev_callback_register(uint8_t port_id,
        }
 
        /* create a new callback. */
-       if (user_cb == NULL)
+       if (user_cb == NULL) {
                user_cb = rte_zmalloc("INTR_USER_CALLBACK",
                                        sizeof(struct rte_eth_dev_callback), 0);
-       if (user_cb != NULL) {
-               user_cb->cb_fn = cb_fn;
-               user_cb->cb_arg = cb_arg;
-               user_cb->event = event;
-               TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next);
+               if (user_cb != NULL) {
+                       user_cb->cb_fn = cb_fn;
+                       user_cb->cb_arg = cb_arg;
+                       user_cb->event = event;
+                       TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next);
+               }
        }
 
        rte_spinlock_unlock(&rte_eth_dev_cb_lock);
@@ -2737,7 +2501,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
 
 void
 _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
-       enum rte_eth_event_type event)
+       enum rte_eth_event_type event, void *cb_arg)
 {
        struct rte_eth_dev_callback *cb_lst;
        struct rte_eth_dev_callback dev_cb;
@@ -2748,6 +2512,9 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
                        continue;
                dev_cb = *cb_lst;
                cb_lst->active = 1;
+               if (cb_arg != NULL)
+                       dev_cb.cb_arg = (void *) cb_arg;
+
                rte_spinlock_unlock(&rte_eth_dev_cb_lock);
                dev_cb.cb_fn(dev->data->port_id, dev_cb.event,
                                                dev_cb.cb_arg);
@@ -2769,7 +2536,13 @@ rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data)
        RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
        dev = &rte_eth_devices[port_id];
-       intr_handle = &dev->pci_dev->intr_handle;
+
+       if (!dev->intr_handle) {
+               RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n");
+               return -ENOTSUP;
+       }
+
+       intr_handle = dev->intr_handle;
        if (!intr_handle->intr_vec) {
                RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n");
                return -EPERM;
@@ -2797,7 +2570,7 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
        const struct rte_memzone *mz;
 
        snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-                dev->driver->pci_drv.name, ring_name,
+                dev->data->drv_name, ring_name,
                 dev->data->port_id, queue_id);
 
        mz = rte_memzone_lookup(z_name);
@@ -2829,7 +2602,12 @@ rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
                return -EINVAL;
        }
 
-       intr_handle = &dev->pci_dev->intr_handle;
+       if (!dev->intr_handle) {
+               RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n");
+               return -ENOTSUP;
+       }
+
+       intr_handle = dev->intr_handle;
        if (!intr_handle->intr_vec) {
                RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n");
                return -EPERM;
@@ -3431,15 +3209,15 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, struct rte_pci_device *pci_de
                return;
        }
 
+       eth_dev->intr_handle = &pci_dev->intr_handle;
+
        eth_dev->data->dev_flags = 0;
        if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
                eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
-       if (pci_dev->driver->drv_flags & RTE_PCI_DRV_DETACHABLE)
-               eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
        eth_dev->data->kdrv = pci_dev->kdrv;
-       eth_dev->data->numa_node = pci_dev->numa_node;
-       eth_dev->data->drv_name = pci_dev->driver->name;
+       eth_dev->data->numa_node = pci_dev->device.numa_node;
+       eth_dev->data->drv_name = pci_dev->driver->driver.name;
 }
 
 int
index 5339d3b..c17bbda 100644 (file)
@@ -182,6 +182,7 @@ extern "C" {
 #include <rte_pci.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
+#include <rte_errno.h>
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -190,6 +191,9 @@ struct rte_mbuf;
 
 /**
  * A structure used to retrieve statistics for an Ethernet port.
+ * Not all statistics fields in struct rte_eth_stats are supported
+ * by any type of network interface card (NIC). If any statistics
+ * field is not supported, its value is 0.
  */
 struct rte_eth_stats {
        uint64_t ipackets;  /**< Total number of successfully received packets. */
@@ -198,7 +202,7 @@ struct rte_eth_stats {
        uint64_t obytes;    /**< Total number of successfully transmitted bytes. */
        uint64_t imissed;
        /**< Total of RX packets dropped by the HW,
-        * because there are no available mbufs (i.e. RX queues are full).
+        * because there are no available buffer (i.e. RX queues are full).
         */
        uint64_t ierrors;   /**< Total number of erroneous received packets. */
        uint64_t oerrors;   /**< Total number of failed transmitted packets. */
@@ -255,6 +259,7 @@ struct rte_eth_stats {
 /**
  * A structure used to retrieve link-level information of an Ethernet port.
  */
+__extension__
 struct rte_eth_link {
        uint32_t link_speed;        /**< ETH_SPEED_NUM_ */
        uint16_t link_duplex  : 1;  /**< ETH_LINK_[HALF/FULL]_DUPLEX */
@@ -346,6 +351,7 @@ struct rte_eth_rxmode {
        enum rte_eth_rx_mq_mode mq_mode;
        uint32_t max_rx_pkt_len;  /**< Only used if jumbo_frame enabled. */
        uint16_t split_hdr_size;  /**< hdr buf size (header_split enabled).*/
+       __extension__
        uint16_t header_split : 1, /**< Header Split enable. */
                hw_ip_checksum   : 1, /**< IP/UDP/TCP checksum offload enable. */
                hw_vlan_filter   : 1, /**< VLAN filter enable. */
@@ -645,6 +651,7 @@ struct rte_eth_txmode {
 
        /* For i40e specifically */
        uint16_t pvid;
+       __extension__
        uint8_t hw_vlan_reject_tagged : 1,
                /**< If set, reject sending out tagged pkts */
                hw_vlan_reject_untagged : 1,
@@ -696,6 +703,29 @@ struct rte_eth_desc_lim {
        uint16_t nb_max;   /**< Max allowed number of descriptors. */
        uint16_t nb_min;   /**< Min allowed number of descriptors. */
        uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+
+       /**
+        * Max allowed number of segments per whole packet.
+        *
+        * - For TSO packet this is the total number of data descriptors allowed
+        *   by device.
+        *
+        * @see nb_mtu_seg_max
+        */
+       uint16_t nb_seg_max;
+
+       /**
+        * Max number of segments per one MTU.
+        *
+        * - For non-TSO packet, this is the maximum allowed number of segments
+        *   in a single transmit packet.
+        *
+        * - For TSO packet each segment within the TSO may span up to this
+        *   value.
+        *
+        * @see nb_seg_max
+        */
+       uint16_t nb_mtu_seg_max;
 };
 
 /**
@@ -767,8 +797,6 @@ struct rte_fdir_conf {
        struct rte_eth_fdir_masks mask;
        struct rte_eth_fdir_flex_conf flex_conf;
        /**< Flex payload configuration. */
-    // TREX_PATCH
-    uint8_t flexbytes_offset;
 };
 
 /**
@@ -853,6 +881,7 @@ struct rte_eth_conf {
 #define DEV_RX_OFFLOAD_TCP_LRO     0x00000010
 #define DEV_RX_OFFLOAD_QINQ_STRIP  0x00000020
 #define DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000040
+#define DEV_RX_OFFLOAD_MACSEC_STRIP     0x00000080
 
 /**
  * TX offload capabilities of a device.
@@ -866,6 +895,11 @@ struct rte_eth_conf {
 #define DEV_TX_OFFLOAD_UDP_TSO     0x00000040
 #define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */
 #define DEV_TX_OFFLOAD_QINQ_INSERT 0x00000100
+#define DEV_TX_OFFLOAD_VXLAN_TNL_TSO    0x00000200    /**< Used for tunneling packet. */
+#define DEV_TX_OFFLOAD_GRE_TNL_TSO      0x00000400    /**< Used for tunneling packet. */
+#define DEV_TX_OFFLOAD_IPIP_TNL_TSO     0x00000800    /**< Used for tunneling packet. */
+#define DEV_TX_OFFLOAD_GENEVE_TNL_TSO   0x00001000    /**< Used for tunneling packet. */
+#define DEV_TX_OFFLOAD_MACSEC_INSERT    0x00002000
 
 /**
  * Ethernet device information
@@ -930,23 +964,26 @@ struct rte_eth_txq_info {
 /**
  * An Ethernet device extended statistic structure
  *
- * This structure is used by ethdev->eth_xstats_get() to provide
- * statistics that are not provided in the generic rte_eth_stats
+ * This structure is used by rte_eth_xstats_get() to provide
+ * statistics that are not provided in the generic *rte_eth_stats*
  * structure.
+ * It maps a name id, corresponding to an index in the array returned
+ * by rte_eth_xstats_get_names(), to a statistic value.
  */
 struct rte_eth_xstat {
-       uint64_t id;
-       uint64_t value;
+       uint64_t id;        /**< The index in xstats name array. */
+       uint64_t value;     /**< The statistic counter value. */
 };
 
 /**
- * A name-key lookup element for extended statistics.
+ * A name element for extended statistics.
  *
- * This structure is used to map between names and ID numbers
- * for extended ethernet statistics.
+ * An array of this structure is returned by rte_eth_xstats_get_names().
+ * It lists the names of extended statistics for a PMD. The *rte_eth_xstat*
+ * structure references these names by their array index.
  */
 struct rte_eth_xstat_name {
-       char name[RTE_ETH_XSTATS_NAME_SIZE];
+       char name[RTE_ETH_XSTATS_NAME_SIZE]; /**< The statistic name. */
 };
 
 #define ETH_DCB_NUM_TCS    8
@@ -1142,6 +1179,10 @@ typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev,
 typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset);
 /**< @internal Check DD bit of specific RX descriptor */
 
+typedef int (*eth_fw_version_get_t)(struct rte_eth_dev *dev,
+                                    char *fw_version, size_t fw_size);
+/**< @internal Get firmware information of an Ethernet device. */
+
 typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev,
        uint16_t rx_queue_id, struct rte_eth_rxq_info *qinfo);
 
@@ -1183,6 +1224,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
                                   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. */
 
+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+                                  struct rte_mbuf **tx_pkts,
+                                  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
                               struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1241,39 +1287,11 @@ typedef int (*eth_uc_all_hash_table_set_t)(struct rte_eth_dev *dev,
                                  uint8_t on);
 /**< @internal Set all Unicast Hash bitmap */
 
-typedef int (*eth_set_vf_rx_mode_t)(struct rte_eth_dev *dev,
-                                 uint16_t vf,
-                                 uint16_t rx_mode,
-                                 uint8_t on);
-/**< @internal Set a VF receive mode */
-
-typedef int (*eth_set_vf_rx_t)(struct rte_eth_dev *dev,
-                               uint16_t vf,
-                               uint8_t on);
-/**< @internal Set a VF receive  mode */
-
-typedef int (*eth_set_vf_tx_t)(struct rte_eth_dev *dev,
-                               uint16_t vf,
-                               uint8_t on);
-/**< @internal Enable or disable a VF transmit   */
-
-typedef int (*eth_set_vf_vlan_filter_t)(struct rte_eth_dev *dev,
-                                 uint16_t vlan,
-                                 uint64_t vf_mask,
-                                 uint8_t vlan_on);
-/**< @internal Set VF VLAN pool filter */
-
 typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev,
                                uint16_t queue_idx,
                                uint16_t tx_rate);
 /**< @internal Set queue TX rate */
 
-typedef int (*eth_set_vf_rate_limit_t)(struct rte_eth_dev *dev,
-                               uint16_t vf,
-                               uint16_t tx_rate,
-                               uint64_t q_msk);
-/**< @internal Set VF TX rate */
-
 typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev,
                                  struct rte_eth_mirror_conf *mirror_conf,
                                  uint8_t rule_id,
@@ -1423,11 +1441,18 @@ struct eth_dev_ops {
        eth_dev_set_link_up_t      dev_set_link_up;   /**< Device link up. */
        eth_dev_set_link_down_t    dev_set_link_down; /**< Device link down. */
        eth_dev_close_t            dev_close;     /**< Close device. */
+       eth_link_update_t          link_update;   /**< Get device link state. */
+
        eth_promiscuous_enable_t   promiscuous_enable; /**< Promiscuous ON. */
        eth_promiscuous_disable_t  promiscuous_disable;/**< Promiscuous OFF. */
        eth_allmulticast_enable_t  allmulticast_enable;/**< RX multicast ON. */
        eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OF. */
-       eth_link_update_t          link_update;   /**< Get device link state. */
+       eth_mac_addr_remove_t      mac_addr_remove; /**< Remove MAC address. */
+       eth_mac_addr_add_t         mac_addr_add;  /**< Add a MAC address. */
+       eth_mac_addr_set_t         mac_addr_set;  /**< Set a MAC address. */
+       eth_set_mc_addr_list_t     set_mc_addr_list; /**< set list of mcast addrs. */
+       mtu_set_t                  mtu_set;       /**< Set MTU. */
+
        eth_stats_get_t            stats_get;     /**< Get generic device statistics. */
        eth_stats_reset_t          stats_reset;   /**< Reset generic device statistics. */
        eth_xstats_get_t           xstats_get;    /**< Get extended device statistics. */
@@ -1436,109 +1461,93 @@ struct eth_dev_ops {
        /**< Get names of extended statistics. */
        eth_queue_stats_mapping_set_t queue_stats_mapping_set;
        /**< Configure per queue stat counter mapping. */
+
        eth_dev_infos_get_t        dev_infos_get; /**< Get device info. */
+       eth_rxq_info_get_t         rxq_info_get; /**< retrieve RX queue information. */
+       eth_txq_info_get_t         txq_info_get; /**< retrieve TX queue information. */
+       eth_fw_version_get_t       fw_version_get; /**< Get firmware version. */
        eth_dev_supported_ptypes_get_t dev_supported_ptypes_get;
-       /**< Get packet types supported and identified by device*/
-       mtu_set_t                  mtu_set; /**< Set MTU. */
-       vlan_filter_set_t          vlan_filter_set;  /**< Filter VLAN Setup. */
-       vlan_tpid_set_t            vlan_tpid_set;      /**< Outer/Inner VLAN TPID Setup. */
+       /**< Get packet types supported and identified by device*/
+
+       vlan_filter_set_t          vlan_filter_set; /**< Filter VLAN Setup. */
+       vlan_tpid_set_t            vlan_tpid_set; /**< Outer/Inner VLAN TPID Setup. */
        vlan_strip_queue_set_t     vlan_strip_queue_set; /**< VLAN Stripping on queue. */
        vlan_offload_set_t         vlan_offload_set; /**< Set VLAN Offload. */
-       vlan_pvid_set_t            vlan_pvid_set; /**< Set port based TX VLAN insertion */
-       eth_queue_start_t          rx_queue_start;/**< Start RX for a queue.*/
-       eth_queue_stop_t           rx_queue_stop;/**< Stop RX for a queue.*/
-       eth_queue_start_t          tx_queue_start;/**< Start TX for a queue.*/
-       eth_queue_stop_t           tx_queue_stop;/**< Stop TX for a queue.*/
-       eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX queue.*/
-       eth_queue_release_t        rx_queue_release;/**< Release RX queue.*/
-       eth_rx_queue_count_t       rx_queue_count; /**< Get Rx queue count. */
-       eth_rx_descriptor_done_t   rx_descriptor_done;  /**< Check rxd DD bit */
-       /**< Enable Rx queue interrupt. */
-       eth_rx_enable_intr_t       rx_queue_intr_enable;
-       /**< Disable Rx queue interrupt.*/
-       eth_rx_disable_intr_t      rx_queue_intr_disable;
-       eth_tx_queue_setup_t       tx_queue_setup;/**< Set up device TX queue.*/
-       eth_queue_release_t        tx_queue_release;/**< Release TX queue.*/
+       vlan_pvid_set_t            vlan_pvid_set; /**< Set port based TX VLAN insertion. */
+
+       eth_queue_start_t          rx_queue_start;/**< Start RX for a queue. */
+       eth_queue_stop_t           rx_queue_stop; /**< Stop RX for a queue. */
+       eth_queue_start_t          tx_queue_start;/**< Start TX for a queue. */
+       eth_queue_stop_t           tx_queue_stop; /**< Stop TX for a queue. */
+       eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX queue. */
+       eth_queue_release_t        rx_queue_release; /**< Release RX queue. */
+       eth_rx_queue_count_t       rx_queue_count;/**< Get Rx queue count. */
+       eth_rx_descriptor_done_t   rx_descriptor_done; /**< Check rxd DD bit. */
+       eth_rx_enable_intr_t       rx_queue_intr_enable;  /**< Enable Rx queue interrupt. */
+       eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt. */
+       eth_tx_queue_setup_t       tx_queue_setup;/**< Set up device TX queue. */
+       eth_queue_release_t        tx_queue_release; /**< Release TX queue. */
+
        eth_dev_led_on_t           dev_led_on;    /**< Turn on LED. */
        eth_dev_led_off_t          dev_led_off;   /**< Turn off LED. */
+
        flow_ctrl_get_t            flow_ctrl_get; /**< Get flow control. */
        flow_ctrl_set_t            flow_ctrl_set; /**< Setup flow control. */
-       priority_flow_ctrl_set_t   priority_flow_ctrl_set; /**< Setup priority flow control.*/
-       eth_mac_addr_remove_t      mac_addr_remove; /**< Remove MAC address */
-       eth_mac_addr_add_t         mac_addr_add;  /**< Add a MAC address */
-       eth_mac_addr_set_t         mac_addr_set;  /**< Set a MAC address */
-       eth_uc_hash_table_set_t    uc_hash_table_set;  /**< Set Unicast Table Array */
-       eth_uc_all_hash_table_set_t uc_all_hash_table_set;  /**< Set Unicast hash bitmap */
-       eth_mirror_rule_set_t      mirror_rule_set;  /**< Add a traffic mirror rule.*/
-       eth_mirror_rule_reset_t    mirror_rule_reset;  /**< reset a traffic mirror rule.*/
-       eth_set_vf_rx_mode_t       set_vf_rx_mode;   /**< Set VF RX mode */
-       eth_set_vf_rx_t            set_vf_rx;  /**< enable/disable a VF receive */
-       eth_set_vf_tx_t            set_vf_tx;  /**< enable/disable a VF transmit */
-       eth_set_vf_vlan_filter_t   set_vf_vlan_filter;  /**< Set VF VLAN filter */
-       /** Add UDP tunnel port. */
-       eth_udp_tunnel_port_add_t udp_tunnel_port_add;
-       /** Del UDP tunnel port. */
-       eth_udp_tunnel_port_del_t udp_tunnel_port_del;
-       eth_set_queue_rate_limit_t set_queue_rate_limit;   /**< Set queue rate limit */
-       eth_set_vf_rate_limit_t    set_vf_rate_limit;   /**< Set VF rate limit */
-       /** Update redirection table. */
-       reta_update_t reta_update;
-       /** Query redirection table. */
-       reta_query_t reta_query;
-
-       eth_get_reg_t get_reg;
-       /**< Get registers */
-       eth_get_eeprom_length_t get_eeprom_length;
-       /**< Get eeprom length */
-       eth_get_eeprom_t get_eeprom;
-       /**< Get eeprom data */
-       eth_set_eeprom_t set_eeprom;
-       /**< Set eeprom */
-  /* bypass control */
+       priority_flow_ctrl_set_t   priority_flow_ctrl_set; /**< Setup priority flow control. */
+
+       eth_uc_hash_table_set_t    uc_hash_table_set; /**< Set Unicast Table Array. */
+       eth_uc_all_hash_table_set_t uc_all_hash_table_set; /**< Set Unicast hash bitmap. */
+
+       eth_mirror_rule_set_t      mirror_rule_set; /**< Add a traffic mirror rule. */
+       eth_mirror_rule_reset_t    mirror_rule_reset; /**< reset a traffic mirror rule. */
+
+       eth_udp_tunnel_port_add_t  udp_tunnel_port_add; /** Add UDP tunnel port. */
+       eth_udp_tunnel_port_del_t  udp_tunnel_port_del; /** Del UDP tunnel port. */
+       eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf;
+       /** Config ether type of l2 tunnel. */
+       eth_l2_tunnel_offload_set_t   l2_tunnel_offload_set;
+       /** Enable/disable l2 tunnel offload functions. */
+
+       eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit. */
+
+       rss_hash_update_t          rss_hash_update; /** Configure RSS hash protocols. */
+       rss_hash_conf_get_t        rss_hash_conf_get; /** Get current RSS hash configuration. */
+       reta_update_t              reta_update;   /** Update redirection table. */
+       reta_query_t               reta_query;    /** Query redirection table. */
+
+       eth_get_reg_t              get_reg;           /**< Get registers. */
+       eth_get_eeprom_length_t    get_eeprom_length; /**< Get eeprom length. */
+       eth_get_eeprom_t           get_eeprom;        /**< Get eeprom data. */
+       eth_set_eeprom_t           set_eeprom;        /**< Set eeprom. */
+
+       /* bypass control */
 #ifdef RTE_NIC_BYPASS
-  bypass_init_t bypass_init;
-  bypass_state_set_t bypass_state_set;
-  bypass_state_show_t bypass_state_show;
-  bypass_event_set_t bypass_event_set;
-  bypass_event_show_t bypass_event_show;
-  bypass_wd_timeout_set_t bypass_wd_timeout_set;
-  bypass_wd_timeout_show_t bypass_wd_timeout_show;
-  bypass_ver_show_t bypass_ver_show;
-  bypass_wd_reset_t bypass_wd_reset;
+       bypass_init_t              bypass_init;
+       bypass_state_set_t         bypass_state_set;
+       bypass_state_show_t        bypass_state_show;
+       bypass_event_set_t         bypass_event_set;
+       bypass_event_show_t        bypass_event_show;
+       bypass_wd_timeout_set_t    bypass_wd_timeout_set;
+       bypass_wd_timeout_show_t   bypass_wd_timeout_show;
+       bypass_ver_show_t          bypass_ver_show;
+       bypass_wd_reset_t          bypass_wd_reset;
 #endif
 
-       /** Configure RSS hash protocols. */
-       rss_hash_update_t rss_hash_update;
-       /** Get current RSS hash configuration. */
-       rss_hash_conf_get_t rss_hash_conf_get;
-       eth_filter_ctrl_t              filter_ctrl;
-       /**< common filter control. */
-       eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs */
-       eth_rxq_info_get_t rxq_info_get;
-       /**< retrieve RX queue information. */
-       eth_txq_info_get_t txq_info_get;
-       /**< retrieve TX queue information. */
+       eth_filter_ctrl_t          filter_ctrl; /**< common filter control. */
+
+       eth_get_dcb_info           get_dcb_info; /** Get DCB information. */
+
+       eth_timesync_enable_t      timesync_enable;
        /** Turn IEEE1588/802.1AS timestamping on. */
-       eth_timesync_enable_t timesync_enable;
+       eth_timesync_disable_t     timesync_disable;
        /** Turn IEEE1588/802.1AS timestamping off. */
-       eth_timesync_disable_t timesync_disable;
-       /** Read the IEEE1588/802.1AS RX timestamp. */
        eth_timesync_read_rx_timestamp_t timesync_read_rx_timestamp;
-       /** Read the IEEE1588/802.1AS TX timestamp. */
+       /** Read the IEEE1588/802.1AS RX timestamp. */
        eth_timesync_read_tx_timestamp_t timesync_read_tx_timestamp;
-
-       /** Get DCB information */
-       eth_get_dcb_info get_dcb_info;
-       /** Adjust the device clock.*/
-       eth_timesync_adjust_time timesync_adjust_time;
-       /** Get the device clock time. */
-       eth_timesync_read_time timesync_read_time;
-       /** Set the device clock time. */
-       eth_timesync_write_time timesync_write_time;
-       /** Config ether type of l2 tunnel */
-       eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf;
-       /** Enable/disable l2 tunnel offload functions */
-       eth_l2_tunnel_offload_set_t l2_tunnel_offload_set;
+       /** Read the IEEE1588/802.1AS TX timestamp. */
+       eth_timesync_adjust_time   timesync_adjust_time; /** Adjust the device clock. */
+       eth_timesync_read_time     timesync_read_time; /** Get the device clock time. */
+       eth_timesync_write_time    timesync_write_time; /** Set the device clock time. */
 };
 
 /**
@@ -1604,17 +1613,6 @@ struct rte_eth_rxtx_callback {
        void *param;
 };
 
-/**
- * The eth device type.
- */
-enum rte_eth_dev_type {
-       RTE_ETH_DEV_UNKNOWN,    /**< unknown device type */
-       RTE_ETH_DEV_PCI,
-               /**< Physical function and Virtual function of PCI devices */
-       RTE_ETH_DEV_VIRTUAL,    /**< non hardware device */
-       RTE_ETH_DEV_MAX         /**< max value of this enum */
-};
-
 /**
  * @internal
  * The generic data structure associated with each ethernet device.
@@ -1628,10 +1626,12 @@ enum rte_eth_dev_type {
 struct rte_eth_dev {
        eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
        eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+       eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */
        struct rte_eth_dev_data *data;  /**< Pointer to device data */
        const struct eth_driver *driver;/**< Driver for this device */
        const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
-       struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
+       struct rte_device *device; /**< Backing device */
+       struct rte_intr_handle *intr_handle; /**< Device interrupt handle */
        /** User application callbacks for NIC interrupts */
        struct rte_eth_dev_cb_list link_intr_cbs;
        /**
@@ -1645,7 +1645,6 @@ struct rte_eth_dev {
         */
        struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
        uint8_t attached; /**< Flag indicating the port is attached */
-       enum rte_eth_dev_type dev_type; /**< Flag indicating the device type */
 } __rte_cache_aligned;
 
 struct rte_eth_dev_sriov {
@@ -1693,6 +1692,7 @@ struct rte_eth_dev_data {
        struct ether_addr* hash_mac_addrs;
        /** Device Ethernet MAC addresses of hash filtering. */
        uint8_t port_id;           /**< Device [external] port identifier. */
+       __extension__
        uint8_t promiscuous   : 1, /**< RX promiscuous mode ON(1) / OFF(0). */
                scattered_rx : 1,  /**< RX of scattered packets is ON(1) / OFF(0) */
                all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */
@@ -1758,8 +1758,7 @@ struct rte_eth_dev *rte_eth_dev_allocated(const char *name);
  * @return
  *   - Slot in the rte_dev_devices array for a new device;
  */
-struct rte_eth_dev *rte_eth_dev_allocate(const char *name,
-               enum rte_eth_dev_type type);
+struct rte_eth_dev *rte_eth_dev_allocate(const char *name);
 
 /**
  * @internal
@@ -1778,7 +1777,7 @@ int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev);
  * @param devargs
  *  A pointer to a strings array describing the new device
  *  to be attached. The strings should be a pci address like
- *  '0000:01:00.0' or virtual device name like 'eth_pcap0'.
+ *  '0000:01:00.0' or virtual device name like 'net_pcap0'.
  * @param port_id
  *  A pointer to a port identifier actually attached.
  * @return
@@ -1872,18 +1871,6 @@ struct eth_driver {
        unsigned int dev_private_size;    /**< Size of device private data. */
 };
 
-/**
- * @internal
- * A function invoked by the initialization function of an Ethernet driver
- * to simultaneously register itself as a PCI driver and as an Ethernet
- * Poll Mode Driver (PMD).
- *
- * @param eth_drv
- *   The pointer to the *eth_driver* structure associated with
- *   the Ethernet driver.
- */
-void rte_eth_driver_register(struct eth_driver *eth_drv);
-
 /**
  * Convert a numerical speed in Mbps to a bitmap flag that can be used in
  * the bitmap link_speeds of the struct rte_eth_conf
@@ -1929,6 +1916,19 @@ uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex);
 int rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_queue,
                uint16_t nb_tx_queue, const struct rte_eth_conf *eth_conf);
 
+/**
+ * @internal
+ * Release device queues and clear its configuration to force the user
+ * application to reconfigure it. It is for internal use only.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ *
+ * @return
+ *  void
+ */
+void _rte_eth_dev_reset(struct rte_eth_dev *dev);
+
 /**
  * Allocate and set up a receive queue for an Ethernet device.
  *
@@ -2288,18 +2288,19 @@ void rte_eth_stats_reset(uint8_t port_id);
  * @param port_id
  *   The port identifier of the Ethernet device.
  * @param xstats_names
- *  Block of memory to insert names into. Must be at least size in capacity.
- *  If set to NULL, function returns required capacity.
+ *   An rte_eth_xstat_name array of at least *size* elements to
+ *   be filled. If set to NULL, the function returns the required number
+ *   of elements.
  * @param size
- *  Capacity of xstats_names (number of names).
+ *   The size of the xstats_names array (number of elements).
  * @return
- *   - positive value lower or equal to size: success. The return value
+ *   - positive value lower or equal to size: success. The return value
  *     is the number of entries filled in the stats table.
- *   - positive value higher than size: error, the given statistics table
+ *   - positive value higher than size: error, the given statistics table
  *     is too small. The return value corresponds to the size that should
  *     be given to succeed. The entries in the table are not valid and
  *     shall not be used by the caller.
- *   - negative value on error (invalid port id)
+ *   - A negative value on error (invalid port id).
  */
 int rte_eth_xstats_get_names(uint8_t port_id,
                struct rte_eth_xstat_name *xstats_names,
@@ -2312,19 +2313,20 @@ int rte_eth_xstats_get_names(uint8_t port_id,
  *   The port identifier of the Ethernet device.
  * @param xstats
  *   A pointer to a table of structure of type *rte_eth_xstat*
- *   to be filled with device statistics ids and values.
+ *   to be filled with device statistics ids and values: id is the
+ *   index of the name string in xstats_names (see rte_eth_xstats_get_names()),
+ *   and value is the statistic counter.
  *   This parameter can be set to NULL if n is 0.
  * @param n
- *   The size of the stats table, which should be large enough to store
- *   all the statistics of the device.
+ *   The size of the xstats array (number of elements).
  * @return
- *   - positive value lower or equal to n: success. The return value
+ *   - positive value lower or equal to n: success. The return value
  *     is the number of entries filled in the stats table.
- *   - positive value higher than n: error, the given statistics table
+ *   - positive value higher than n: error, the given statistics table
  *     is too small. The return value corresponds to the size that should
  *     be given to succeed. The entries in the table are not valid and
  *     shall not be used by the caller.
- *   - negative value on error (invalid port id)
+ *   - A negative value on error (invalid port id).
  */
 int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats,
                unsigned n);
@@ -2400,6 +2402,27 @@ void rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr);
  */
 void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info);
 
+/**
+ * Retrieve the firmware version of a device.
+ *
+ * @param port_id
+ *   The port identifier of the device.
+ * @param fw_version
+ *   A pointer to a string array storing the firmware version of a device,
+ *   the string includes terminating null. This pointer is allocated by caller.
+ * @param fw_size
+ *   The size of the string array pointed by fw_version, which should be
+ *   large enough to store firmware version of the device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if operation is not supported.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (>0) if *fw_size* is not enough to store firmware version, return
+ *          the size of the non truncated string.
+ */
+int rte_eth_dev_fw_version_get(uint8_t port_id,
+                              char *fw_version, size_t fw_size);
+
 /**
  * Retrieve the supported packet types of an Ethernet device.
  *
@@ -2835,6 +2858,115 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
        return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
 }
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prepare() function is invoked to prepare output packets to be
+ * transmitted on the output queue *queue_id* of the Ethernet device designated
+ * by its *port_id*.
+ * The *nb_pkts* parameter is the number of packets to be prepared which are
+ * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them
+ * allocated from a pool created with rte_pktmbuf_pool_create().
+ * For each packet to send, the rte_eth_tx_prepare() function performs
+ * the following operations:
+ *
+ * - Check if packet meets devices requirements for tx offloads.
+ *
+ * - Check limitations about number of segments.
+ *
+ * - Check additional requirements when debug is enabled.
+ *
+ * - Update and/or reset required checksums when tx offload is set for packet.
+ *
+ * Since this function can modify packet data, provided mbufs must be safely
+ * writable (e.g. modified data cannot be in shared segment).
+ *
+ * The rte_eth_tx_prepare() function returns the number of packets ready to be
+ * sent. A return value equal to *nb_pkts* means that all packets are valid and
+ * ready to be sent, otherwise stops processing on the first invalid packet and
+ * leaves the rest packets untouched.
+ *
+ * When this functionality is not implemented in the driver, all packets are
+ * are returned untouched.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ *   The value must be a valid port id.
+ * @param queue_id
+ *   The index of the transmit queue through which output packets must be
+ *   sent.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param tx_pkts
+ *   The address of an array of *nb_pkts* pointers to *rte_mbuf* structures
+ *   which contain the output packets.
+ * @param nb_pkts
+ *   The maximum number of packets to process.
+ * @return
+ *   The number of packets correct and ready to be sent. The return value can be
+ *   less than the value of the *tx_pkts* parameter when some packet doesn't
+ *   meet devices requirements with rte_errno set appropriately:
+ *   - -EINVAL: offload flags are not correctly set
+ *   - -ENOTSUP: the offload feature is not supported by the hardware
+ *
+ */
+
+#ifndef RTE_ETHDEV_TX_PREPARE_NOOP
+
+static inline uint16_t
+rte_eth_tx_prepare(uint8_t port_id, uint16_t queue_id,
+               struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+       struct rte_eth_dev *dev;
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+       if (!rte_eth_dev_is_valid_port(port_id)) {
+               RTE_PMD_DEBUG_TRACE("Invalid TX port_id=%d\n", port_id);
+               rte_errno = -EINVAL;
+               return 0;
+       }
+#endif
+
+       dev = &rte_eth_devices[port_id];
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+       if (queue_id >= dev->data->nb_tx_queues) {
+               RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+               rte_errno = -EINVAL;
+               return 0;
+       }
+#endif
+
+       if (!dev->tx_pkt_prepare)
+               return nb_pkts;
+
+       return (*dev->tx_pkt_prepare)(dev->data->tx_queues[queue_id],
+                       tx_pkts, nb_pkts);
+}
+
+#else
+
+/*
+ * Native NOOP operation for compilation targets which doesn't require any
+ * preparations steps, and functional NOOP may introduce unnecessary performance
+ * drop.
+ *
+ * Generally this is not a good idea to turn it on globally and didn't should
+ * be used if behavior of tx_preparation can change.
+ */
+
+static inline uint16_t
+rte_eth_tx_prepare(__rte_unused uint8_t port_id, __rte_unused uint16_t queue_id,
+               __rte_unused struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+       return nb_pkts;
+}
+
+#endif
+
 typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count,
                void *userdata);
 
@@ -3049,6 +3181,8 @@ enum rte_eth_event_type {
                                /**< queue state event (enabled/disabled) */
        RTE_ETH_EVENT_INTR_RESET,
                        /**< reset interrupt event, sent to VF on PF reset */
+       RTE_ETH_EVENT_VF_MBOX,  /**< message from the VF received by PF */
+       RTE_ETH_EVENT_MACSEC,   /**< MACsec offload related event */
        RTE_ETH_EVENT_MAX       /**< max value of this enum */
 };
 
@@ -3070,6 +3204,11 @@ typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \
  * @param cb_arg
  *  Pointer to the parameters for the registered callback.
  *
+ *  The user data is overwritten in the case of RTE_ETH_EVENT_VF_MBOX.
+ *     This even occurs when a message from the VF is received by the PF.
+ *     The user data is overwritten with struct rte_pmd_ixgbe_mb_event_param.
+ *     This struct is defined in rte_pmd_ixgbe.h.
+ *
  * @return
  *  - On success, zero.
  *  - On failure, a negative value.
@@ -3108,12 +3247,16 @@ int rte_eth_dev_callback_unregister(uint8_t port_id,
  *  Pointer to struct rte_eth_dev.
  * @param event
  *  Eth device interrupt event type.
+ * @param cb_arg
+ *  Update callback parameter to pass data back to user application.
+ *  This allows the user application to decide if a particular function
+ *  is permitted or not.
  *
  * @return
  *  void
  */
 void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
-                               enum rte_eth_event_type event);
+                               enum rte_eth_event_type event, void *cb_arg);
 
 /**
  * When there is no rx packet coming in Rx Queue for a long time, we can
@@ -3409,93 +3552,6 @@ int rte_eth_dev_uc_hash_table_set(uint8_t port,struct ether_addr *addr,
  */
 int rte_eth_dev_uc_all_hash_table_set(uint8_t port,uint8_t on);
 
- /**
- * Set RX L2 Filtering mode of a VF of an Ethernet device.
- *
- * @param port
- *   The port identifier of the Ethernet device.
- * @param vf
- *   VF id.
- * @param rx_mode
- *    The RX mode mask, which  is one or more of  accepting Untagged Packets,
- *    packets that match the PFUTA table, Broadcast and Multicast Promiscuous.
- *    ETH_VMDQ_ACCEPT_UNTAG,ETH_VMDQ_ACCEPT_HASH_UC,
- *    ETH_VMDQ_ACCEPT_BROADCAST and ETH_VMDQ_ACCEPT_MULTICAST will be used
- *    in rx_mode.
- * @param on
- *    1 - Enable a VF RX mode.
- *    0 - Disable a VF RX mode.
- * @return
- *   - (0) if successful.
- *   - (-ENOTSUP) if hardware doesn't support.
- *   - (-ENOTSUP) if hardware doesn't support.
- *   - (-EINVAL) if bad parameter.
- */
-int rte_eth_dev_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mode,
-                               uint8_t on);
-
-/**
-* Enable or disable a VF traffic transmit of the Ethernet device.
-*
-* @param port
-*   The port identifier of the Ethernet device.
-* @param vf
-*   VF id.
-* @param on
-*    1 - Enable a VF traffic transmit.
-*    0 - Disable a VF traffic transmit.
-* @return
-*   - (0) if successful.
-*   - (-ENODEV) if *port_id* invalid.
-*   - (-ENOTSUP) if hardware doesn't support.
-*   - (-EINVAL) if bad parameter.
-*/
-int
-rte_eth_dev_set_vf_tx(uint8_t port,uint16_t vf, uint8_t on);
-
-/**
-* Enable or disable a VF traffic receive of an Ethernet device.
-*
-* @param port
-*   The port identifier of the Ethernet device.
-* @param vf
-*   VF id.
-* @param on
-*    1 - Enable a VF traffic receive.
-*    0 - Disable a VF traffic receive.
-* @return
-*   - (0) if successful.
-*   - (-ENOTSUP) if hardware doesn't support.
-*   - (-ENODEV) if *port_id* invalid.
-*   - (-EINVAL) if bad parameter.
-*/
-int
-rte_eth_dev_set_vf_rx(uint8_t port,uint16_t vf, uint8_t on);
-
-/**
-* Enable/Disable hardware VF VLAN filtering by an Ethernet device of
-* received VLAN packets tagged with a given VLAN Tag Identifier.
-*
-* @param port id
-*   The port identifier of the Ethernet device.
-* @param vlan_id
-*   The VLAN Tag Identifier whose filtering must be enabled or disabled.
-* @param vf_mask
-*    Bitmap listing which VFs participate in the VLAN filtering.
-* @param vlan_on
-*    1 - Enable VFs VLAN filtering.
-*    0 - Disable VFs VLAN filtering.
-* @return
-*   - (0) if successful.
-*   - (-ENOTSUP) if hardware doesn't support.
-*   - (-ENODEV) if *port_id* invalid.
-*   - (-EINVAL) if bad parameter.
-*/
-int
-rte_eth_dev_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id,
-                               uint64_t vf_mask,
-                               uint8_t vlan_on);
-
 /**
  * Set a traffic mirroring rule on an Ethernet device
  *
@@ -3556,26 +3612,6 @@ int rte_eth_mirror_rule_reset(uint8_t port_id,
 int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
                        uint16_t tx_rate);
 
-/**
- * Set the rate limitation for a vf on an Ethernet device.
- *
- * @param port_id
- *   The port identifier of the Ethernet device.
- * @param vf
- *   VF id.
- * @param tx_rate
- *   The tx rate allocated from the total link speed for this VF id.
- * @param q_msk
- *   The queue mask which need to set the rate.
- * @return
- *   - (0) if successful.
- *   - (-ENOTSUP) if hardware doesn't support this feature.
- *   - (-ENODEV) if *port_id* invalid.
- *   - (-EINVAL) if bad parameter.
- */
-int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf,
-                       uint16_t tx_rate, uint64_t q_msk);
-
 /**
  * Initialize bypass logic. This function needs to be called before
  * executing any other bypass API.
@@ -4343,7 +4379,7 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
 
 /**
 * Get the port id from pci adrress or device name
-* Ex: 0000:2:00.0 or vdev name eth_pcap0
+* Ex: 0000:2:00.0 or vdev name net_pcap0
 *
 * @param name
 *  pci address or name of the device
@@ -4370,6 +4406,21 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id);
 int
 rte_eth_dev_get_name_by_port(uint8_t port_id, char *name);
 
+/**
+ * @internal
+ * Wrapper for use by pci drivers as a .probe function to attach to a ethdev
+ * interface.
+ */
+int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
+                         struct rte_pci_device *pci_dev);
+
+/**
+ * @internal
+ * Wrapper for use by pci drivers as a .remove function to detach a ethdev
+ * interface.
+ */
+int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/dpdk/lib/librte_ether/rte_flow.c b/src/dpdk/lib/librte_ether/rte_flow.c
new file mode 100644 (file)
index 0000000..aaa70d6
--- /dev/null
@@ -0,0 +1,159 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   Copyright 2016 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include <rte_errno.h>
+#include <rte_branch_prediction.h>
+#include "rte_ethdev.h"
+#include "rte_flow_driver.h"
+#include "rte_flow.h"
+
+/* Get generic flow operations structure from a port. */
+const struct rte_flow_ops *
+rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error)
+{
+       struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+       const struct rte_flow_ops *ops;
+       int code;
+
+       if (unlikely(!rte_eth_dev_is_valid_port(port_id)))
+               code = ENODEV;
+       else if (unlikely(!dev->dev_ops->filter_ctrl ||
+                         dev->dev_ops->filter_ctrl(dev,
+                                                   RTE_ETH_FILTER_GENERIC,
+                                                   RTE_ETH_FILTER_GET,
+                                                   &ops) ||
+                         !ops))
+               code = ENOSYS;
+       else
+               return ops;
+       rte_flow_error_set(error, code, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                          NULL, rte_strerror(code));
+       return NULL;
+}
+
+/* Check whether a flow rule can be created on a given port. */
+int
+rte_flow_validate(uint8_t port_id,
+                 const struct rte_flow_attr *attr,
+                 const struct rte_flow_item pattern[],
+                 const struct rte_flow_action actions[],
+                 struct rte_flow_error *error)
+{
+       const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error);
+       struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+       if (unlikely(!ops))
+               return -rte_errno;
+       if (likely(!!ops->validate))
+               return ops->validate(dev, attr, pattern, actions, error);
+       return -rte_flow_error_set(error, ENOSYS,
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                  NULL, rte_strerror(ENOSYS));
+}
+
+/* Create a flow rule on a given port. */
+struct rte_flow *
+rte_flow_create(uint8_t port_id,
+               const struct rte_flow_attr *attr,
+               const struct rte_flow_item pattern[],
+               const struct rte_flow_action actions[],
+               struct rte_flow_error *error)
+{
+       struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+       const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error);
+
+       if (unlikely(!ops))
+               return NULL;
+       if (likely(!!ops->create))
+               return ops->create(dev, attr, pattern, actions, error);
+       rte_flow_error_set(error, ENOSYS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                          NULL, rte_strerror(ENOSYS));
+       return NULL;
+}
+
+/* Destroy a flow rule on a given port. */
+int
+rte_flow_destroy(uint8_t port_id,
+                struct rte_flow *flow,
+                struct rte_flow_error *error)
+{
+       struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+       const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error);
+
+       if (unlikely(!ops))
+               return -rte_errno;
+       if (likely(!!ops->destroy))
+               return ops->destroy(dev, flow, error);
+       return -rte_flow_error_set(error, ENOSYS,
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                  NULL, rte_strerror(ENOSYS));
+}
+
+/* Destroy all flow rules associated with a port. */
+int
+rte_flow_flush(uint8_t port_id,
+              struct rte_flow_error *error)
+{
+       struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+       const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error);
+
+       if (unlikely(!ops))
+               return -rte_errno;
+       if (likely(!!ops->flush))
+               return ops->flush(dev, error);
+       return -rte_flow_error_set(error, ENOSYS,
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                  NULL, rte_strerror(ENOSYS));
+}
+
+/* Query an existing flow rule. */
+int
+rte_flow_query(uint8_t port_id,
+              struct rte_flow *flow,
+              enum rte_flow_action_type action,
+              void *data,
+              struct rte_flow_error *error)
+{
+       struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+       const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error);
+
+       if (!ops)
+               return -rte_errno;
+       if (likely(!!ops->query))
+               return ops->query(dev, flow, action, data, error);
+       return -rte_flow_error_set(error, ENOSYS,
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                  NULL, rte_strerror(ENOSYS));
+}
diff --git a/src/dpdk/lib/librte_ether/rte_flow.h b/src/dpdk/lib/librte_ether/rte_flow.h
new file mode 100644 (file)
index 0000000..171a569
--- /dev/null
@@ -0,0 +1,1090 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   Copyright 2016 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_FLOW_H_
+#define RTE_FLOW_H_
+
+/**
+ * @file
+ * RTE generic flow API
+ *
+ * This interface provides the ability to program packet matching and
+ * associated actions in hardware through flow rules.
+ */
+
+#include <rte_arp.h>
+#include <rte_ether.h>
+#include <rte_icmp.h>
+#include <rte_ip.h>
+#include <rte_sctp.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Flow rule attributes.
+ *
+ * Priorities are set on two levels: per group and per rule within groups.
+ *
+ * Lower values denote higher priority, the highest priority for both levels
+ * is 0, so that a rule with priority 0 in group 8 is always matched after a
+ * rule with priority 8 in group 0.
+ *
+ * Although optional, applications are encouraged to group similar rules as
+ * much as possible to fully take advantage of hardware capabilities
+ * (e.g. optimized matching) and work around limitations (e.g. a single
+ * pattern type possibly allowed in a given group).
+ *
+ * Group and priority levels are arbitrary and up to the application, they
+ * do not need to be contiguous nor start from 0, however the maximum number
+ * varies between devices and may be affected by existing flow rules.
+ *
+ * If a packet is matched by several rules of a given group for a given
+ * priority level, the outcome is undefined. It can take any path, may be
+ * duplicated or even cause unrecoverable errors.
+ *
+ * Note that support for more than a single group and priority level is not
+ * guaranteed.
+ *
+ * Flow rules can apply to inbound and/or outbound traffic (ingress/egress).
+ *
+ * Several pattern items and actions are valid and can be used in both
+ * directions. Those valid for only one direction are described as such.
+ *
+ * At least one direction must be specified.
+ *
+ * Specifying both directions at once for a given rule is not recommended
+ * but may be valid in a few cases (e.g. shared counter).
+ */
+struct rte_flow_attr {
+       uint32_t group; /**< Priority group. */
+       uint32_t priority; /**< Priority level within group. */
+       uint32_t ingress:1; /**< Rule applies to ingress traffic. */
+       uint32_t egress:1; /**< Rule applies to egress traffic. */
+       uint32_t reserved:30; /**< Reserved, must be zero. */
+};
+
+/**
+ * Matching pattern item types.
+ *
+ * Pattern items fall in two categories:
+ *
+ * - Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4,
+ *   IPV6, ICMP, UDP, TCP, SCTP, VXLAN and so on), usually associated with a
+ *   specification structure. These must be stacked in the same order as the
+ *   protocol layers to match, starting from the lowest.
+ *
+ * - Matching meta-data or affecting pattern processing (END, VOID, INVERT,
+ *   PF, VF, PORT and so on), often without a specification structure. Since
+ *   they do not match packet contents, these can be specified anywhere
+ *   within item lists without affecting others.
+ *
+ * See the description of individual types for more information. Those
+ * marked with [META] fall into the second category.
+ */
+enum rte_flow_item_type {
+       /**
+        * [META]
+        *
+        * End marker for item lists. Prevents further processing of items,
+        * thereby ending the pattern.
+        *
+        * No associated specification structure.
+        */
+       RTE_FLOW_ITEM_TYPE_END,
+
+       /**
+        * [META]
+        *
+        * Used as a placeholder for convenience. It is ignored and simply
+        * discarded by PMDs.
+        *
+        * No associated specification structure.
+        */
+       RTE_FLOW_ITEM_TYPE_VOID,
+
+       /**
+        * [META]
+        *
+        * Inverted matching, i.e. process packets that do not match the
+        * pattern.
+        *
+        * No associated specification structure.
+        */
+       RTE_FLOW_ITEM_TYPE_INVERT,
+
+       /**
+        * Matches any protocol in place of the current layer, a single ANY
+        * may also stand for several protocol layers.
+        *
+        * See struct rte_flow_item_any.
+        */
+       RTE_FLOW_ITEM_TYPE_ANY,
+
+       /**
+        * [META]
+        *
+        * Matches packets addressed to the physical function of the device.
+        *
+        * If the underlying device function differs from the one that would
+        * normally receive the matched traffic, specifying this item
+        * prevents it from reaching that device unless the flow rule
+        * contains a PF action. Packets are not duplicated between device
+        * instances by default.
+        *
+        * No associated specification structure.
+        */
+       RTE_FLOW_ITEM_TYPE_PF,
+
+       /**
+        * [META]
+        *
+        * Matches packets addressed to a virtual function ID of the device.
+        *
+        * If the underlying device function differs from the one that would
+        * normally receive the matched traffic, specifying this item
+        * prevents it from reaching that device unless the flow rule
+        * contains a VF action. Packets are not duplicated between device
+        * instances by default.
+        *
+        * See struct rte_flow_item_vf.
+        */
+       RTE_FLOW_ITEM_TYPE_VF,
+
+       /**
+        * [META]
+        *
+        * Matches packets coming from the specified physical port of the
+        * underlying device.
+        *
+        * The first PORT item overrides the physical port normally
+        * associated with the specified DPDK input port (port_id). This
+        * item can be provided several times to match additional physical
+        * ports.
+        *
+        * See struct rte_flow_item_port.
+        */
+       RTE_FLOW_ITEM_TYPE_PORT,
+
+       /**
+        * Matches a byte string of a given length at a given offset.
+        *
+        * See struct rte_flow_item_raw.
+        */
+       RTE_FLOW_ITEM_TYPE_RAW,
+
+       /**
+        * Matches an Ethernet header.
+        *
+        * See struct rte_flow_item_eth.
+        */
+       RTE_FLOW_ITEM_TYPE_ETH,
+
+       /**
+        * Matches an 802.1Q/ad VLAN tag.
+        *
+        * See struct rte_flow_item_vlan.
+        */
+       RTE_FLOW_ITEM_TYPE_VLAN,
+
+       /**
+        * Matches an IPv4 header.
+        *
+        * See struct rte_flow_item_ipv4.
+        */
+       RTE_FLOW_ITEM_TYPE_IPV4,
+
+       /**
+        * Matches an IPv6 header.
+        *
+        * See struct rte_flow_item_ipv6.
+        */
+       RTE_FLOW_ITEM_TYPE_IPV6,
+
+       /**
+        * Matches an ICMP header.
+        *
+        * See struct rte_flow_item_icmp.
+        */
+       RTE_FLOW_ITEM_TYPE_ICMP,
+
+       /**
+        * Matches a UDP header.
+        *
+        * See struct rte_flow_item_udp.
+        */
+       RTE_FLOW_ITEM_TYPE_UDP,
+
+       /**
+        * Matches a TCP header.
+        *
+        * See struct rte_flow_item_tcp.
+        */
+       RTE_FLOW_ITEM_TYPE_TCP,
+
+       /**
+        * Matches a SCTP header.
+        *
+        * See struct rte_flow_item_sctp.
+        */
+       RTE_FLOW_ITEM_TYPE_SCTP,
+
+       /**
+        * Matches a VXLAN header.
+        *
+        * See struct rte_flow_item_vxlan.
+        */
+       RTE_FLOW_ITEM_TYPE_VXLAN,
+
+       /**
+        * Matches a E_TAG header.
+        *
+        * See struct rte_flow_item_e_tag.
+        */
+       RTE_FLOW_ITEM_TYPE_E_TAG,
+
+       /**
+        * Matches a NVGRE header.
+        *
+        * See struct rte_flow_item_nvgre.
+        */
+       RTE_FLOW_ITEM_TYPE_NVGRE,
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ANY
+ *
+ * Matches any protocol in place of the current layer, a single ANY may also
+ * stand for several protocol layers.
+ *
+ * This is usually specified as the first pattern item when looking for a
+ * protocol anywhere in a packet.
+ *
+ * A zeroed mask stands for any number of layers.
+ */
+struct rte_flow_item_any {
+       uint32_t num; /**< Number of layers covered. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_ANY. */
+static const struct rte_flow_item_any rte_flow_item_any_mask = {
+       .num = 0x00000000,
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_VF
+ *
+ * Matches packets addressed to a virtual function ID of the device.
+ *
+ * If the underlying device function differs from the one that would
+ * normally receive the matched traffic, specifying this item prevents it
+ * from reaching that device unless the flow rule contains a VF
+ * action. Packets are not duplicated between device instances by default.
+ *
+ * - Likely to return an error or never match any traffic if this causes a
+ *   VF device to match traffic addressed to a different VF.
+ * - Can be specified multiple times to match traffic addressed to several
+ *   VF IDs.
+ * - Can be combined with a PF item to match both PF and VF traffic.
+ *
+ * A zeroed mask can be used to match any VF ID.
+ */
+struct rte_flow_item_vf {
+       uint32_t id; /**< Destination VF ID. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_VF. */
+static const struct rte_flow_item_vf rte_flow_item_vf_mask = {
+       .id = 0x00000000,
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_PORT
+ *
+ * Matches packets coming from the specified physical port of the underlying
+ * device.
+ *
+ * The first PORT item overrides the physical port normally associated with
+ * the specified DPDK input port (port_id). This item can be provided
+ * several times to match additional physical ports.
+ *
+ * Note that physical ports are not necessarily tied to DPDK input ports
+ * (port_id) when those are not under DPDK control. Possible values are
+ * specific to each device, they are not necessarily indexed from zero and
+ * may not be contiguous.
+ *
+ * As a device property, the list of allowed values as well as the value
+ * associated with a port_id should be retrieved by other means.
+ *
+ * A zeroed mask can be used to match any port index.
+ */
+struct rte_flow_item_port {
+       uint32_t index; /**< Physical port index. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_PORT. */
+static const struct rte_flow_item_port rte_flow_item_port_mask = {
+       .index = 0x00000000,
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_RAW
+ *
+ * Matches a byte string of a given length at a given offset.
+ *
+ * Offset is either absolute (using the start of the packet) or relative to
+ * the end of the previous matched item in the stack, in which case negative
+ * values are allowed.
+ *
+ * If search is enabled, offset is used as the starting point. The search
+ * area can be delimited by setting limit to a nonzero value, which is the
+ * maximum number of bytes after offset where the pattern may start.
+ *
+ * Matching a zero-length pattern is allowed, doing so resets the relative
+ * offset for subsequent items.
+ *
+ * This type does not support ranges (struct rte_flow_item.last).
+ */
+struct rte_flow_item_raw {
+       uint32_t relative:1; /**< Look for pattern after the previous item. */
+       uint32_t search:1; /**< Search pattern from offset (see also limit). */
+       uint32_t reserved:30; /**< Reserved, must be set to zero. */
+       int32_t offset; /**< Absolute or relative offset for pattern. */
+       uint16_t limit; /**< Search area limit for start of pattern. */
+       uint16_t length; /**< Pattern length. */
+       uint8_t pattern[]; /**< Byte string to look for. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_RAW. */
+static const struct rte_flow_item_raw rte_flow_item_raw_mask = {
+       .relative = 1,
+       .search = 1,
+       .reserved = 0x3fffffff,
+       .offset = 0xffffffff,
+       .limit = 0xffff,
+       .length = 0xffff,
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ETH
+ *
+ * Matches an Ethernet header.
+ */
+struct rte_flow_item_eth {
+       struct ether_addr dst; /**< Destination MAC. */
+       struct ether_addr src; /**< Source MAC. */
+       uint16_t type; /**< EtherType. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_ETH. */
+static const struct rte_flow_item_eth rte_flow_item_eth_mask = {
+       .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+       .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+       .type = 0x0000,
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_VLAN
+ *
+ * Matches an 802.1Q/ad VLAN tag.
+ *
+ * This type normally follows either RTE_FLOW_ITEM_TYPE_ETH or
+ * RTE_FLOW_ITEM_TYPE_VLAN.
+ */
+struct rte_flow_item_vlan {
+       uint16_t tpid; /**< Tag protocol identifier. */
+       uint16_t tci; /**< Tag control information. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_VLAN. */
+static const struct rte_flow_item_vlan rte_flow_item_vlan_mask = {
+       .tpid = 0x0000,
+       .tci = 0xffff,
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_IPV4
+ *
+ * Matches an IPv4 header.
+ *
+ * Note: IPv4 options are handled by dedicated pattern items.
+ */
+struct rte_flow_item_ipv4 {
+       struct ipv4_hdr hdr; /**< IPv4 header definition. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_IPV4. */
+static const struct rte_flow_item_ipv4 rte_flow_item_ipv4_mask = {
+       .hdr = {
+               .src_addr = 0xffffffff,
+               .dst_addr = 0xffffffff,
+       },
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_IPV6.
+ *
+ * Matches an IPv6 header.
+ *
+ * Note: IPv6 options are handled by dedicated pattern items.
+ */
+struct rte_flow_item_ipv6 {
+       struct ipv6_hdr hdr; /**< IPv6 header definition. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_IPV6. */
+static const struct rte_flow_item_ipv6 rte_flow_item_ipv6_mask = {
+       .hdr = {
+               .src_addr =
+                       "\xff\xff\xff\xff\xff\xff\xff\xff"
+                       "\xff\xff\xff\xff\xff\xff\xff\xff",
+               .dst_addr =
+                       "\xff\xff\xff\xff\xff\xff\xff\xff"
+                       "\xff\xff\xff\xff\xff\xff\xff\xff",
+       },
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ICMP.
+ *
+ * Matches an ICMP header.
+ */
+struct rte_flow_item_icmp {
+       struct icmp_hdr hdr; /**< ICMP header definition. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP. */
+static const struct rte_flow_item_icmp rte_flow_item_icmp_mask = {
+       .hdr = {
+               .icmp_type = 0xff,
+               .icmp_code = 0xff,
+       },
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_UDP.
+ *
+ * Matches a UDP header.
+ */
+struct rte_flow_item_udp {
+       struct udp_hdr hdr; /**< UDP header definition. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_UDP. */
+static const struct rte_flow_item_udp rte_flow_item_udp_mask = {
+       .hdr = {
+               .src_port = 0xffff,
+               .dst_port = 0xffff,
+       },
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_TCP.
+ *
+ * Matches a TCP header.
+ */
+struct rte_flow_item_tcp {
+       struct tcp_hdr hdr; /**< TCP header definition. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_TCP. */
+static const struct rte_flow_item_tcp rte_flow_item_tcp_mask = {
+       .hdr = {
+               .src_port = 0xffff,
+               .dst_port = 0xffff,
+       },
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_SCTP.
+ *
+ * Matches a SCTP header.
+ */
+struct rte_flow_item_sctp {
+       struct sctp_hdr hdr; /**< SCTP header definition. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_SCTP. */
+static const struct rte_flow_item_sctp rte_flow_item_sctp_mask = {
+       .hdr = {
+               .src_port = 0xffff,
+               .dst_port = 0xffff,
+       },
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_VXLAN.
+ *
+ * Matches a VXLAN header (RFC 7348).
+ */
+struct rte_flow_item_vxlan {
+       uint8_t flags; /**< Normally 0x08 (I flag). */
+       uint8_t rsvd0[3]; /**< Reserved, normally 0x000000. */
+       uint8_t vni[3]; /**< VXLAN identifier. */
+       uint8_t rsvd1; /**< Reserved, normally 0x00. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_VXLAN. */
+static const struct rte_flow_item_vxlan rte_flow_item_vxlan_mask = {
+       .vni = "\xff\xff\xff",
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_E_TAG.
+ *
+ * Matches a E-tag header.
+ */
+struct rte_flow_item_e_tag {
+       uint16_t tpid; /**< Tag protocol identifier (0x893F). */
+       /**
+        * E-Tag control information (E-TCI).
+        * E-PCP (3b), E-DEI (1b), ingress E-CID base (12b).
+        */
+       uint16_t epcp_edei_in_ecid_b;
+       /** Reserved (2b), GRP (2b), E-CID base (12b). */
+       uint16_t rsvd_grp_ecid_b;
+       uint8_t in_ecid_e; /**< Ingress E-CID ext. */
+       uint8_t ecid_e; /**< E-CID ext. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_NVGRE.
+ *
+ * Matches a NVGRE header.
+ */
+struct rte_flow_item_nvgre {
+       /**
+        * Checksum (1b), undefined (1b), key bit (1b), sequence number (1b),
+        * reserved 0 (9b), version (3b).
+        *
+        * c_k_s_rsvd0_ver must have value 0x2000 according to RFC 7637.
+        */
+       uint16_t c_k_s_rsvd0_ver;
+       uint16_t protocol; /**< Protocol type (0x6558). */
+       uint8_t tni[3]; /**< Virtual subnet ID. */
+       uint8_t flow_id; /**< Flow ID. */
+};
+
+/**
+ * Matching pattern item definition.
+ *
+ * A pattern is formed by stacking items starting from the lowest protocol
+ * layer to match. This stacking restriction does not apply to meta items
+ * which can be placed anywhere in the stack without affecting the meaning
+ * of the resulting pattern.
+ *
+ * Patterns are terminated by END items.
+ *
+ * The spec field should be a valid pointer to a structure of the related
+ * item type. It may remain unspecified (NULL) in many cases to request
+ * broad (nonspecific) matching. In such cases, last and mask must also be
+ * set to NULL.
+ *
+ * Optionally, last can point to a structure of the same type to define an
+ * inclusive range. This is mostly supported by integer and address fields,
+ * may cause errors otherwise. Fields that do not support ranges must be set
+ * to 0 or to the same value as the corresponding fields in spec.
+ *
+ * Only the fields defined to nonzero values in the default masks (see
+ * rte_flow_item_{name}_mask constants) are considered relevant by
+ * default. This can be overridden by providing a mask structure of the
+ * same type with applicable bits set to one. It can also be used to
+ * partially filter out specific fields (e.g. as an alternate mean to match
+ * ranges of IP addresses).
+ *
+ * Mask is a simple bit-mask applied before interpreting the contents of
+ * spec and last, which may yield unexpected results if not used
+ * carefully. For example, if for an IPv4 address field, spec provides
+ * 10.1.2.3, last provides 10.3.4.5 and mask provides 255.255.0.0, the
+ * effective range becomes 10.1.0.0 to 10.3.255.255.
+ */
+struct rte_flow_item {
+       enum rte_flow_item_type type; /**< Item type. */
+       const void *spec; /**< Pointer to item specification structure. */
+       const void *last; /**< Defines an inclusive range (spec to last). */
+       const void *mask; /**< Bit-mask applied to spec and last. */
+};
+
+/**
+ * Action types.
+ *
+ * Each possible action is represented by a type. Some have associated
+ * configuration structures. Several actions combined in a list can be
+ * affected to a flow rule. That list is not ordered.
+ *
+ * They fall in three categories:
+ *
+ * - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent
+ *   processing matched packets by subsequent flow rules, unless overridden
+ *   with PASSTHRU.
+ *
+ * - Non terminating actions (PASSTHRU, DUP) that leave matched packets up
+ *   for additional processing by subsequent flow rules.
+ *
+ * - Other non terminating meta actions that do not affect the fate of
+ *   packets (END, VOID, MARK, FLAG, COUNT).
+ *
+ * When several actions are combined in a flow rule, they should all have
+ * different types (e.g. dropping a packet twice is not possible).
+ *
+ * Only the last action of a given type is taken into account. PMDs still
+ * perform error checking on the entire list.
+ *
+ * Note that PASSTHRU is the only action able to override a terminating
+ * rule.
+ */
+enum rte_flow_action_type {
+       /**
+        * [META]
+        *
+        * End marker for action lists. Prevents further processing of
+        * actions, thereby ending the list.
+        *
+        * No associated configuration structure.
+        */
+       RTE_FLOW_ACTION_TYPE_END,
+
+       /**
+        * [META]
+        *
+        * Used as a placeholder for convenience. It is ignored and simply
+        * discarded by PMDs.
+        *
+        * No associated configuration structure.
+        */
+       RTE_FLOW_ACTION_TYPE_VOID,
+
+       /**
+        * Leaves packets up for additional processing by subsequent flow
+        * rules. This is the default when a rule does not contain a
+        * terminating action, but can be specified to force a rule to
+        * become non-terminating.
+        *
+        * No associated configuration structure.
+        */
+       RTE_FLOW_ACTION_TYPE_PASSTHRU,
+
+       /**
+        * [META]
+        *
+        * Attaches an integer value to packets and sets PKT_RX_FDIR and
+        * PKT_RX_FDIR_ID mbuf flags.
+        *
+        * See struct rte_flow_action_mark.
+        */
+       RTE_FLOW_ACTION_TYPE_MARK,
+
+       /**
+        * [META]
+        *
+        * Flags packets. Similar to MARK without a specific value; only
+        * sets the PKT_RX_FDIR mbuf flag.
+        *
+        * No associated configuration structure.
+        */
+       RTE_FLOW_ACTION_TYPE_FLAG,
+
+       /**
+        * Assigns packets to a given queue index.
+        *
+        * See struct rte_flow_action_queue.
+        */
+       RTE_FLOW_ACTION_TYPE_QUEUE,
+
+       /**
+        * Drops packets.
+        *
+        * PASSTHRU overrides this action if both are specified.
+        *
+        * No associated configuration structure.
+        */
+       RTE_FLOW_ACTION_TYPE_DROP,
+
+       /**
+        * [META]
+        *
+        * Enables counters for this rule.
+        *
+        * These counters can be retrieved and reset through rte_flow_query(),
+        * see struct rte_flow_query_count.
+        *
+        * No associated configuration structure.
+        */
+       RTE_FLOW_ACTION_TYPE_COUNT,
+
+       /**
+        * Duplicates packets to a given queue index.
+        *
+        * This is normally combined with QUEUE, however when used alone, it
+        * is actually similar to QUEUE + PASSTHRU.
+        *
+        * See struct rte_flow_action_dup.
+        */
+       RTE_FLOW_ACTION_TYPE_DUP,
+
+       /**
+        * Similar to QUEUE, except RSS is additionally performed on packets
+        * to spread them among several queues according to the provided
+        * parameters.
+        *
+        * See struct rte_flow_action_rss.
+        */
+       RTE_FLOW_ACTION_TYPE_RSS,
+
+       /**
+        * Redirects packets to the physical function (PF) of the current
+        * device.
+        *
+        * No associated configuration structure.
+        */
+       RTE_FLOW_ACTION_TYPE_PF,
+
+       /**
+        * Redirects packets to the virtual function (VF) of the current
+        * device with the specified ID.
+        *
+        * See struct rte_flow_action_vf.
+        */
+       RTE_FLOW_ACTION_TYPE_VF,
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_MARK
+ *
+ * Attaches an integer value to packets and sets PKT_RX_FDIR and
+ * PKT_RX_FDIR_ID mbuf flags.
+ *
+ * This value is arbitrary and application-defined. Maximum allowed value
+ * depends on the underlying implementation. It is returned in the
+ * hash.fdir.hi mbuf field.
+ */
+struct rte_flow_action_mark {
+       uint32_t id; /**< Integer value to return with packets. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_QUEUE
+ *
+ * Assign packets to a given queue index.
+ *
+ * Terminating by default.
+ */
+struct rte_flow_action_queue {
+       uint16_t index; /**< Queue index to use. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_COUNT (query)
+ *
+ * Query structure to retrieve and reset flow rule counters.
+ */
+struct rte_flow_query_count {
+       uint32_t reset:1; /**< Reset counters after query [in]. */
+       uint32_t hits_set:1; /**< hits field is set [out]. */
+       uint32_t bytes_set:1; /**< bytes field is set [out]. */
+       uint32_t reserved:29; /**< Reserved, must be zero [in, out]. */
+       uint64_t hits; /**< Number of hits for this rule [out]. */
+       uint64_t bytes; /**< Number of bytes through this rule [out]. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_DUP
+ *
+ * Duplicates packets to a given queue index.
+ *
+ * This is normally combined with QUEUE, however when used alone, it is
+ * actually similar to QUEUE + PASSTHRU.
+ *
+ * Non-terminating by default.
+ */
+struct rte_flow_action_dup {
+       uint16_t index; /**< Queue index to duplicate packets to. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_RSS
+ *
+ * Similar to QUEUE, except RSS is additionally performed on packets to
+ * spread them among several queues according to the provided parameters.
+ *
+ * Note: RSS hash result is stored in the hash.rss mbuf field which overlaps
+ * hash.fdir.lo. Since the MARK action sets the hash.fdir.hi field only,
+ * both can be requested simultaneously.
+ *
+ * Terminating by default.
+ */
+struct rte_flow_action_rss {
+       const struct rte_eth_rss_conf *rss_conf; /**< RSS parameters. */
+       uint16_t num; /**< Number of entries in queue[]. */
+       uint16_t queue[]; /**< Queues indices to use. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_VF
+ *
+ * Redirects packets to a virtual function (VF) of the current device.
+ *
+ * Packets matched by a VF pattern item can be redirected to their original
+ * VF ID instead of the specified one. This parameter may not be available
+ * and is not guaranteed to work properly if the VF part is matched by a
+ * prior flow rule or if packets are not addressed to a VF in the first
+ * place.
+ *
+ * Terminating by default.
+ */
+struct rte_flow_action_vf {
+       uint32_t original:1; /**< Use original VF ID if possible. */
+       uint32_t reserved:31; /**< Reserved, must be zero. */
+       uint32_t id; /**< VF ID to redirect packets to. */
+};
+
+/**
+ * Definition of a single action.
+ *
+ * A list of actions is terminated by a END action.
+ *
+ * For simple actions without a configuration structure, conf remains NULL.
+ */
+struct rte_flow_action {
+       enum rte_flow_action_type type; /**< Action type. */
+       const void *conf; /**< Pointer to action configuration structure. */
+};
+
+/**
+ * Opaque type returned after successfully creating a flow.
+ *
+ * This handle can be used to manage and query the related flow (e.g. to
+ * destroy it or retrieve counters).
+ */
+struct rte_flow;
+
+/**
+ * Verbose error types.
+ *
+ * Most of them provide the type of the object referenced by struct
+ * rte_flow_error.cause.
+ */
+enum rte_flow_error_type {
+       RTE_FLOW_ERROR_TYPE_NONE, /**< No error. */
+       RTE_FLOW_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */
+       RTE_FLOW_ERROR_TYPE_HANDLE, /**< Flow rule (handle). */
+       RTE_FLOW_ERROR_TYPE_ATTR_GROUP, /**< Group field. */
+       RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, /**< Priority field. */
+       RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, /**< Ingress field. */
+       RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */
+       RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */
+       RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */
+       RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */
+       RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */
+       RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */
+};
+
+/**
+ * Verbose error structure definition.
+ *
+ * This object is normally allocated by applications and set by PMDs, the
+ * message points to a constant string which does not need to be freed by
+ * the application, however its pointer can be considered valid only as long
+ * as its associated DPDK port remains configured. Closing the underlying
+ * device or unloading the PMD invalidates it.
+ *
+ * Both cause and message may be NULL regardless of the error type.
+ */
+struct rte_flow_error {
+       enum rte_flow_error_type type; /**< Cause field and error types. */
+       const void *cause; /**< Object responsible for the error. */
+       const char *message; /**< Human-readable error message. */
+};
+
+/**
+ * Check whether a flow rule can be created on a given port.
+ *
+ * While this function has no effect on the target device, the flow rule is
+ * validated against its current configuration state and the returned value
+ * should be considered valid by the caller for that state only.
+ *
+ * The returned value is guaranteed to remain valid only as long as no
+ * successful calls to rte_flow_create() or rte_flow_destroy() are made in
+ * the meantime and no device parameter affecting flow rules in any way are
+ * modified, due to possible collisions or resource limitations (although in
+ * such cases EINVAL should not be returned).
+ *
+ * @param port_id
+ *   Port identifier of Ethernet device.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 if flow rule is valid and can be created. A negative errno value
+ *   otherwise (rte_errno is also set), the following errors are defined:
+ *
+ *   -ENOSYS: underlying device does not support this functionality.
+ *
+ *   -EINVAL: unknown or invalid rule specification.
+ *
+ *   -ENOTSUP: valid but unsupported rule specification (e.g. partial
+ *   bit-masks are unsupported).
+ *
+ *   -EEXIST: collision with an existing rule.
+ *
+ *   -ENOMEM: not enough resources.
+ *
+ *   -EBUSY: action cannot be performed due to busy device resources, may
+ *   succeed if the affected queues or even the entire port are in a stopped
+ *   state (see rte_eth_dev_rx_queue_stop() and rte_eth_dev_stop()).
+ */
+int
+rte_flow_validate(uint8_t port_id,
+                 const struct rte_flow_attr *attr,
+                 const struct rte_flow_item pattern[],
+                 const struct rte_flow_action actions[],
+                 struct rte_flow_error *error);
+
+/**
+ * Create a flow rule on a given port.
+ *
+ * @param port_id
+ *   Port identifier of Ethernet device.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   A valid handle in case of success, NULL otherwise and rte_errno is set
+ *   to the positive version of one of the error codes defined for
+ *   rte_flow_validate().
+ */
+struct rte_flow *
+rte_flow_create(uint8_t port_id,
+               const struct rte_flow_attr *attr,
+               const struct rte_flow_item pattern[],
+               const struct rte_flow_action actions[],
+               struct rte_flow_error *error);
+
+/**
+ * Destroy a flow rule on a given port.
+ *
+ * Failure to destroy a flow rule handle may occur when other flow rules
+ * depend on it, and destroying it would result in an inconsistent state.
+ *
+ * This function is only guaranteed to succeed if handles are destroyed in
+ * reverse order of their creation.
+ *
+ * @param port_id
+ *   Port identifier of Ethernet device.
+ * @param flow
+ *   Flow rule handle to destroy.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+rte_flow_destroy(uint8_t port_id,
+                struct rte_flow *flow,
+                struct rte_flow_error *error);
+
+/**
+ * Destroy all flow rules associated with a port.
+ *
+ * In the unlikely event of failure, handles are still considered destroyed
+ * and no longer valid but the port must be assumed to be in an inconsistent
+ * state.
+ *
+ * @param port_id
+ *   Port identifier of Ethernet device.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+rte_flow_flush(uint8_t port_id,
+              struct rte_flow_error *error);
+
+/**
+ * Query an existing flow rule.
+ *
+ * This function allows retrieving flow-specific data such as counters.
+ * Data is gathered by special actions which must be present in the flow
+ * rule definition.
+ *
+ * \see RTE_FLOW_ACTION_TYPE_COUNT
+ *
+ * @param port_id
+ *   Port identifier of Ethernet device.
+ * @param flow
+ *   Flow rule handle to query.
+ * @param action
+ *   Action type to query.
+ * @param[in, out] data
+ *   Pointer to storage for the associated query data type.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+rte_flow_query(uint8_t port_id,
+              struct rte_flow *flow,
+              enum rte_flow_action_type action,
+              void *data,
+              struct rte_flow_error *error);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_FLOW_H_ */
diff --git a/src/dpdk/lib/librte_ether/rte_flow_driver.h b/src/dpdk/lib/librte_ether/rte_flow_driver.h
new file mode 100644 (file)
index 0000000..da5749d
--- /dev/null
@@ -0,0 +1,182 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   Copyright 2016 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_FLOW_DRIVER_H_
+#define RTE_FLOW_DRIVER_H_
+
+/**
+ * @file
+ * RTE generic flow API (driver side)
+ *
+ * This file provides implementation helpers for internal use by PMDs, they
+ * are not intended to be exposed to applications and are not subject to ABI
+ * versioning.
+ */
+
+#include <stdint.h>
+
+#include <rte_errno.h>
+#include "rte_ethdev.h"
+#include "rte_flow.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Generic flow operations structure implemented and returned by PMDs.
+ *
+ * To implement this API, PMDs must handle the RTE_ETH_FILTER_GENERIC filter
+ * type in their .filter_ctrl callback function (struct eth_dev_ops) as well
+ * as the RTE_ETH_FILTER_GET filter operation.
+ *
+ * If successful, this operation must result in a pointer to a PMD-specific
+ * struct rte_flow_ops written to the argument address as described below:
+ *
+ * \code
+ *
+ * // PMD filter_ctrl callback
+ *
+ * static const struct rte_flow_ops pmd_flow_ops = { ... };
+ *
+ * switch (filter_type) {
+ * case RTE_ETH_FILTER_GENERIC:
+ *     if (filter_op != RTE_ETH_FILTER_GET)
+ *         return -EINVAL;
+ *     *(const void **)arg = &pmd_flow_ops;
+ *     return 0;
+ * }
+ *
+ * \endcode
+ *
+ * See also rte_flow_ops_get().
+ *
+ * These callback functions are not supposed to be used by applications
+ * directly, which must rely on the API defined in rte_flow.h.
+ *
+ * Public-facing wrapper functions perform a few consistency checks so that
+ * unimplemented (i.e. NULL) callbacks simply return -ENOTSUP. These
+ * callbacks otherwise only differ by their first argument (with port ID
+ * already resolved to a pointer to struct rte_eth_dev).
+ */
+struct rte_flow_ops {
+       /** See rte_flow_validate(). */
+       int (*validate)
+               (struct rte_eth_dev *,
+                const struct rte_flow_attr *,
+                const struct rte_flow_item [],
+                const struct rte_flow_action [],
+                struct rte_flow_error *);
+       /** See rte_flow_create(). */
+       struct rte_flow *(*create)
+               (struct rte_eth_dev *,
+                const struct rte_flow_attr *,
+                const struct rte_flow_item [],
+                const struct rte_flow_action [],
+                struct rte_flow_error *);
+       /** See rte_flow_destroy(). */
+       int (*destroy)
+               (struct rte_eth_dev *,
+                struct rte_flow *,
+                struct rte_flow_error *);
+       /** See rte_flow_flush(). */
+       int (*flush)
+               (struct rte_eth_dev *,
+                struct rte_flow_error *);
+       /** See rte_flow_query(). */
+       int (*query)
+               (struct rte_eth_dev *,
+                struct rte_flow *,
+                enum rte_flow_action_type,
+                void *,
+                struct rte_flow_error *);
+};
+
+/**
+ * Initialize generic flow error structure.
+ *
+ * This function also sets rte_errno to a given value.
+ *
+ * @param[out] error
+ *   Pointer to flow error structure (may be NULL).
+ * @param code
+ *   Related error code (rte_errno).
+ * @param type
+ *   Cause field and error types.
+ * @param cause
+ *   Object responsible for the error.
+ * @param message
+ *   Human-readable error message.
+ *
+ * @return
+ *   Error code.
+ */
+static inline int
+rte_flow_error_set(struct rte_flow_error *error,
+                  int code,
+                  enum rte_flow_error_type type,
+                  const void *cause,
+                  const char *message)
+{
+       if (error) {
+               *error = (struct rte_flow_error){
+                       .type = type,
+                       .cause = cause,
+                       .message = message,
+               };
+       }
+       rte_errno = code;
+       return code;
+}
+
+/**
+ * Get generic flow operations structure from a port.
+ *
+ * @param port_id
+ *   Port identifier to query.
+ * @param[out] error
+ *   Pointer to flow error structure.
+ *
+ * @return
+ *   The flow operations structure associated with port_id, NULL in case of
+ *   error, in which case rte_errno is set and the error structure contains
+ *   additional details.
+ */
+const struct rte_flow_ops *
+rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_FLOW_DRIVER_H_ */
index 26e54f6..51db006 100644 (file)
@@ -98,6 +98,7 @@ rte_hash_find_existing(const char *name)
 
 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
 {
+       h->cmp_jump_table_idx = KEY_CUSTOM;
        h->rte_hash_custom_cmp_eq = func;
 }
 
@@ -159,7 +160,8 @@ rte_hash_create(const struct rte_hash_parameters *params)
                num_key_slots = params->entries + 1;
 
        snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
-       r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
+       /* Create ring (Dummy slot index is not enqueued) */
+       r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots - 1),
                        params->socket_id, 0);
        if (r == NULL) {
                RTE_LOG(ERR, HASH, "memory allocation failed\n");
@@ -282,6 +284,15 @@ rte_hash_create(const struct rte_hash_parameters *params)
        h->free_slots = r;
        h->hw_trans_mem_support = hw_trans_mem_support;
 
+#if defined(RTE_ARCH_X86)
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+               h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2;
+       else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
+               h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
+       else
+#endif
+               h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
+
        /* Turn on multi-writer only with explicit flat from user and TM
         * support.
         */
@@ -408,6 +419,7 @@ rte_hash_reset(struct rte_hash *h)
 static inline int
 make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
 {
+       static unsigned int nr_pushes;
        unsigned i, j;
        int ret;
        uint32_t next_bucket_idx;
@@ -419,10 +431,10 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
         */
        for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
                /* Search for space in alternative locations */
-               next_bucket_idx = bkt->signatures[i].alt & h->bucket_bitmask;
+               next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask;
                next_bkt[i] = &h->buckets[next_bucket_idx];
                for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) {
-                       if (next_bkt[i]->signatures[j].sig == NULL_SIGNATURE)
+                       if (next_bkt[i]->key_idx[j] == EMPTY_SLOT)
                                break;
                }
 
@@ -432,8 +444,8 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
 
        /* Alternative location has spare room (end of recursive function) */
        if (i != RTE_HASH_BUCKET_ENTRIES) {
-               next_bkt[i]->signatures[j].alt = bkt->signatures[i].current;
-               next_bkt[i]->signatures[j].current = bkt->signatures[i].alt;
+               next_bkt[i]->sig_alt[j] = bkt->sig_current[i];
+               next_bkt[i]->sig_current[j] = bkt->sig_alt[i];
                next_bkt[i]->key_idx[j] = bkt->key_idx[i];
                return i;
        }
@@ -444,11 +456,13 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
                        break;
 
        /* All entries have been pushed, so entry cannot be added */
-       if (i == RTE_HASH_BUCKET_ENTRIES)
+       if (i == RTE_HASH_BUCKET_ENTRIES || nr_pushes > RTE_HASH_MAX_PUSHES)
                return -ENOSPC;
 
        /* Set flag to indicate that this entry is going to be pushed */
        bkt->flag[i] = 1;
+
+       nr_pushes++;
        /* Need room in alternative bucket to insert the pushed entry */
        ret = make_space_bucket(h, next_bkt[i]);
        /*
@@ -458,9 +472,10 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
         * or return error
         */
        bkt->flag[i] = 0;
+       nr_pushes = 0;
        if (ret >= 0) {
-               next_bkt[i]->signatures[ret].alt = bkt->signatures[i].current;
-               next_bkt[i]->signatures[ret].current = bkt->signatures[i].alt;
+               next_bkt[i]->sig_alt[ret] = bkt->sig_current[i];
+               next_bkt[i]->sig_current[ret] = bkt->sig_alt[i];
                next_bkt[i]->key_idx[ret] = bkt->key_idx[i];
                return i;
        } else
@@ -542,8 +557,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
        /* Check if key is already inserted in primary location */
        for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-               if (prim_bkt->signatures[i].current == sig &&
-                               prim_bkt->signatures[i].alt == alt_hash) {
+               if (prim_bkt->sig_current[i] == sig &&
+                               prim_bkt->sig_alt[i] == alt_hash) {
                        k = (struct rte_hash_key *) ((char *)keys +
                                        prim_bkt->key_idx[i] * h->key_entry_size);
                        if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -562,8 +577,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
        /* Check if key is already inserted in secondary location */
        for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-               if (sec_bkt->signatures[i].alt == sig &&
-                               sec_bkt->signatures[i].current == alt_hash) {
+               if (sec_bkt->sig_alt[i] == sig &&
+                               sec_bkt->sig_current[i] == alt_hash) {
                        k = (struct rte_hash_key *) ((char *)keys +
                                        sec_bkt->key_idx[i] * h->key_entry_size);
                        if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -608,9 +623,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 #endif
                for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
                        /* Check if slot is available */
-                       if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
-                               prim_bkt->signatures[i].current = sig;
-                               prim_bkt->signatures[i].alt = alt_hash;
+                       if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
+                               prim_bkt->sig_current[i] = sig;
+                               prim_bkt->sig_alt[i] = alt_hash;
                                prim_bkt->key_idx[i] = new_idx;
                                break;
                        }
@@ -630,8 +645,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
                 */
                ret = make_space_bucket(h, prim_bkt);
                if (ret >= 0) {
-                       prim_bkt->signatures[ret].current = sig;
-                       prim_bkt->signatures[ret].alt = alt_hash;
+                       prim_bkt->sig_current[ret] = sig;
+                       prim_bkt->sig_alt[ret] = alt_hash;
                        prim_bkt->key_idx[ret] = new_idx;
                        if (h->add_key == ADD_KEY_MULTIWRITER)
                                rte_spinlock_unlock(h->multiwriter_lock);
@@ -705,8 +720,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 
        /* Check if key is in primary location */
        for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-               if (bkt->signatures[i].current == sig &&
-                               bkt->signatures[i].sig != NULL_SIGNATURE) {
+               if (bkt->sig_current[i] == sig &&
+                               bkt->key_idx[i] != EMPTY_SLOT) {
                        k = (struct rte_hash_key *) ((char *)keys +
                                        bkt->key_idx[i] * h->key_entry_size);
                        if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -728,8 +743,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 
        /* Check if key is in secondary location */
        for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-               if (bkt->signatures[i].current == alt_hash &&
-                               bkt->signatures[i].alt == sig) {
+               if (bkt->sig_current[i] == alt_hash &&
+                               bkt->sig_alt[i] == sig) {
                        k = (struct rte_hash_key *) ((char *)keys +
                                        bkt->key_idx[i] * h->key_entry_size);
                        if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -783,7 +798,8 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
        unsigned lcore_id, n_slots;
        struct lcore_cache *cached_free_slots;
 
-       bkt->signatures[i].sig = NULL_SIGNATURE;
+       bkt->sig_current[i] = NULL_SIGNATURE;
+       bkt->sig_alt[i] = NULL_SIGNATURE;
        if (h->hw_trans_mem_support) {
                lcore_id = rte_lcore_id();
                cached_free_slots = &h->local_free_slots[lcore_id];
@@ -814,14 +830,15 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
        unsigned i;
        struct rte_hash_bucket *bkt;
        struct rte_hash_key *k, *keys = h->key_store;
+       int32_t ret;
 
        bucket_idx = sig & h->bucket_bitmask;
        bkt = &h->buckets[bucket_idx];
 
        /* Check if key is in primary location */
        for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-               if (bkt->signatures[i].current == sig &&
-                               bkt->signatures[i].sig != NULL_SIGNATURE) {
+               if (bkt->sig_current[i] == sig &&
+                               bkt->key_idx[i] != EMPTY_SLOT) {
                        k = (struct rte_hash_key *) ((char *)keys +
                                        bkt->key_idx[i] * h->key_entry_size);
                        if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -831,7 +848,9 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
                                 * Return index where key is stored,
                                 * substracting the first dummy index
                                 */
-                               return bkt->key_idx[i] - 1;
+                               ret = bkt->key_idx[i] - 1;
+                               bkt->key_idx[i] = EMPTY_SLOT;
+                               return ret;
                        }
                }
        }
@@ -843,8 +862,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 
        /* Check if key is in secondary location */
        for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-               if (bkt->signatures[i].current == alt_hash &&
-                               bkt->signatures[i].sig != NULL_SIGNATURE) {
+               if (bkt->sig_current[i] == alt_hash &&
+                               bkt->key_idx[i] != EMPTY_SLOT) {
                        k = (struct rte_hash_key *) ((char *)keys +
                                        bkt->key_idx[i] * h->key_entry_size);
                        if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -854,7 +873,9 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
                                 * Return index where key is stored,
                                 * substracting the first dummy index
                                 */
-                               return bkt->key_idx[i] - 1;
+                               ret = bkt->key_idx[i] - 1;
+                               bkt->key_idx[i] = EMPTY_SLOT;
+                               return ret;
                        }
                }
        }
@@ -897,280 +918,189 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
        return 0;
 }
 
-/* Lookup bulk stage 0: Prefetch input key */
 static inline void
-lookup_stage0(unsigned *idx, uint64_t *lookup_mask,
-               const void * const *keys)
+compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
+                       const struct rte_hash_bucket *prim_bkt,
+                       const struct rte_hash_bucket *sec_bkt,
+                       hash_sig_t prim_hash, hash_sig_t sec_hash,
+                       enum rte_hash_sig_compare_function sig_cmp_fn)
 {
-       *idx = __builtin_ctzl(*lookup_mask);
-       if (*lookup_mask == 0)
-               *idx = 0;
+       unsigned int i;
+
+       switch (sig_cmp_fn) {
+#ifdef RTE_MACHINE_CPUFLAG_AVX2
+       case RTE_HASH_COMPARE_AVX2:
+               *prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
+                               _mm256_load_si256(
+                                       (__m256i const *)prim_bkt->sig_current),
+                               _mm256_set1_epi32(prim_hash)));
+               *sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
+                               _mm256_load_si256(
+                                       (__m256i const *)sec_bkt->sig_current),
+                               _mm256_set1_epi32(sec_hash)));
+               break;
+#endif
+#ifdef RTE_MACHINE_CPUFLAG_SSE2
+       case RTE_HASH_COMPARE_SSE:
+               /* Compare the first 4 signatures in the bucket */
+               *prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+                               _mm_load_si128(
+                                       (__m128i const *)prim_bkt->sig_current),
+                               _mm_set1_epi32(prim_hash)));
+               *prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+                               _mm_load_si128(
+                                       (__m128i const *)&prim_bkt->sig_current[4]),
+                               _mm_set1_epi32(prim_hash)))) << 4;
+               /* Compare the first 4 signatures in the bucket */
+               *sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+                               _mm_load_si128(
+                                       (__m128i const *)sec_bkt->sig_current),
+                               _mm_set1_epi32(sec_hash)));
+               *sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+                               _mm_load_si128(
+                                       (__m128i const *)&sec_bkt->sig_current[4]),
+                               _mm_set1_epi32(sec_hash)))) << 4;
+               break;
+#endif
+       default:
+               for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+                       *prim_hash_matches |=
+                               ((prim_hash == prim_bkt->sig_current[i]) << i);
+                       *sec_hash_matches |=
+                               ((sec_hash == sec_bkt->sig_current[i]) << i);
+               }
+       }
 
-       rte_prefetch0(keys[*idx]);
-       *lookup_mask &= ~(1llu << *idx);
 }
 
-/*
- * Lookup bulk stage 1: Calculate primary/secondary hashes
- * and prefetch primary/secondary buckets
- */
+#define PREFETCH_OFFSET 4
 static inline void
-lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash,
-               const struct rte_hash_bucket **primary_bkt,
-               const struct rte_hash_bucket **secondary_bkt,
-               hash_sig_t *hash_vals, const void * const *keys,
-               const struct rte_hash *h)
+__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
+                       int32_t num_keys, int32_t *positions,
+                       uint64_t *hit_mask, void *data[])
 {
-       *prim_hash = rte_hash_hash(h, keys[idx]);
-       hash_vals[idx] = *prim_hash;
-       *sec_hash = rte_hash_secondary_hash(*prim_hash);
+       uint64_t hits = 0;
+       int32_t i;
+       uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
+       uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX];
+       const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+       const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+       uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
+       uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
+
+       /* Prefetch first keys */
+       for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
+               rte_prefetch0(keys[i]);
 
-       *primary_bkt = &h->buckets[*prim_hash & h->bucket_bitmask];
-       *secondary_bkt = &h->buckets[*sec_hash & h->bucket_bitmask];
+       /*
+        * Prefetch rest of the keys, calculate primary and
+        * secondary bucket and prefetch them
+        */
+       for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
+               rte_prefetch0(keys[i + PREFETCH_OFFSET]);
 
-       rte_prefetch0(*primary_bkt);
-       rte_prefetch0(*secondary_bkt);
-}
+               prim_hash[i] = rte_hash_hash(h, keys[i]);
+               sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
 
-/*
- * Lookup bulk stage 2:  Search for match hashes in primary/secondary locations
- * and prefetch first key slot
- */
-static inline void
-lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash,
-               const struct rte_hash_bucket *prim_bkt,
-               const struct rte_hash_bucket *sec_bkt,
-               const struct rte_hash_key **key_slot, int32_t *positions,
-               uint64_t *extra_hits_mask, const void *keys,
-               const struct rte_hash *h)
-{
-       unsigned prim_hash_matches, sec_hash_matches, key_idx, i;
-       unsigned total_hash_matches;
+               primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
+               secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
 
-       prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES;
-       sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES;
-       for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-               prim_hash_matches |= ((prim_hash == prim_bkt->signatures[i].current) << i);
-               sec_hash_matches |= ((sec_hash == sec_bkt->signatures[i].current) << i);
+               rte_prefetch0(primary_bkt[i]);
+               rte_prefetch0(secondary_bkt[i]);
        }
 
-       key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)];
-       if (key_idx == 0)
-               key_idx = sec_bkt->key_idx[__builtin_ctzl(sec_hash_matches)];
+       /* Calculate and prefetch rest of the buckets */
+       for (; i < num_keys; i++) {
+               prim_hash[i] = rte_hash_hash(h, keys[i]);
+               sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
 
-       total_hash_matches = (prim_hash_matches |
-                               (sec_hash_matches << (RTE_HASH_BUCKET_ENTRIES + 1)));
-       *key_slot = (const struct rte_hash_key *) ((const char *)keys +
-                                       key_idx * h->key_entry_size);
+               primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
+               secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
 
-       rte_prefetch0(*key_slot);
-       /*
-        * Return index where key is stored,
-        * substracting the first dummy index
-        */
-       positions[idx] = (key_idx - 1);
+               rte_prefetch0(primary_bkt[i]);
+               rte_prefetch0(secondary_bkt[i]);
+       }
 
-       *extra_hits_mask |= (uint64_t)(__builtin_popcount(total_hash_matches) > 3) << idx;
+       /* Compare signatures and prefetch key slot of first hit */
+       for (i = 0; i < num_keys; i++) {
+               compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
+                               primary_bkt[i], secondary_bkt[i],
+                               prim_hash[i], sec_hash[i], h->sig_cmp_fn);
+
+               if (prim_hitmask[i]) {
+                       uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]);
+                       uint32_t key_idx = primary_bkt[i]->key_idx[first_hit];
+                       const struct rte_hash_key *key_slot =
+                               (const struct rte_hash_key *)(
+                               (const char *)h->key_store +
+                               key_idx * h->key_entry_size);
+                       rte_prefetch0(key_slot);
+                       continue;
+               }
 
-}
+               if (sec_hitmask[i]) {
+                       uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]);
+                       uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit];
+                       const struct rte_hash_key *key_slot =
+                               (const struct rte_hash_key *)(
+                               (const char *)h->key_store +
+                               key_idx * h->key_entry_size);
+                       rte_prefetch0(key_slot);
+               }
+       }
 
+       /* Compare keys, first hits in primary first */
+       for (i = 0; i < num_keys; i++) {
+               positions[i] = -ENOENT;
+               while (prim_hitmask[i]) {
+                       uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]);
+
+                       uint32_t key_idx = primary_bkt[i]->key_idx[hit_index];
+                       const struct rte_hash_key *key_slot =
+                               (const struct rte_hash_key *)(
+                               (const char *)h->key_store +
+                               key_idx * h->key_entry_size);
+                       /*
+                        * If key index is 0, do not compare key,
+                        * as it is checking the dummy slot
+                        */
+                       if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
+                               if (data != NULL)
+                                       data[i] = key_slot->pdata;
 
-/* Lookup bulk stage 3: Check if key matches, update hit mask and return data */
-static inline void
-lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * const *keys,
-               const int32_t *positions, void *data[], uint64_t *hits,
-               const struct rte_hash *h)
-{
-       unsigned hit;
-       unsigned key_idx;
+                               hits |= 1ULL << i;
+                               positions[i] = key_idx - 1;
+                               goto next_key;
+                       }
+                       prim_hitmask[i] &= ~(1 << (hit_index));
+               }
 
-       hit = !rte_hash_cmp_eq(key_slot->key, keys[idx], h);
-       if (data != NULL)
-               data[idx] = key_slot->pdata;
+               while (sec_hitmask[i]) {
+                       uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]);
 
-       key_idx = positions[idx] + 1;
-       /*
-        * If key index is 0, force hit to be 0, in case key to be looked up
-        * is all zero (as in the dummy slot), which would result in a wrong hit
-        */
-       *hits |= (uint64_t)(hit && !!key_idx)  << idx;
-}
+                       uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index];
+                       const struct rte_hash_key *key_slot =
+                               (const struct rte_hash_key *)(
+                               (const char *)h->key_store +
+                               key_idx * h->key_entry_size);
+                       /*
+                        * If key index is 0, do not compare key,
+                        * as it is checking the dummy slot
+                        */
 
-static inline void
-__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
-                       uint32_t num_keys, int32_t *positions,
-                       uint64_t *hit_mask, void *data[])
-{
-       uint64_t hits = 0;
-       uint64_t extra_hits_mask = 0;
-       uint64_t lookup_mask, miss_mask;
-       unsigned idx;
-       const void *key_store = h->key_store;
-       int ret;
-       hash_sig_t hash_vals[RTE_HASH_LOOKUP_BULK_MAX];
-
-       unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31;
-       const struct rte_hash_bucket *primary_bkt10, *primary_bkt11;
-       const struct rte_hash_bucket *secondary_bkt10, *secondary_bkt11;
-       const struct rte_hash_bucket *primary_bkt20, *primary_bkt21;
-       const struct rte_hash_bucket *secondary_bkt20, *secondary_bkt21;
-       const struct rte_hash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31;
-       hash_sig_t primary_hash10, primary_hash11;
-       hash_sig_t secondary_hash10, secondary_hash11;
-       hash_sig_t primary_hash20, primary_hash21;
-       hash_sig_t secondary_hash20, secondary_hash21;
-
-       lookup_mask = (uint64_t) -1 >> (64 - num_keys);
-       miss_mask = lookup_mask;
-
-       lookup_stage0(&idx00, &lookup_mask, keys);
-       lookup_stage0(&idx01, &lookup_mask, keys);
-
-       idx10 = idx00, idx11 = idx01;
-
-       lookup_stage0(&idx00, &lookup_mask, keys);
-       lookup_stage0(&idx01, &lookup_mask, keys);
-       lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
-                       &primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
-       lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
-                       &primary_bkt11, &secondary_bkt11, hash_vals, keys, h);
-
-       primary_bkt20 = primary_bkt10;
-       primary_bkt21 = primary_bkt11;
-       secondary_bkt20 = secondary_bkt10;
-       secondary_bkt21 = secondary_bkt11;
-       primary_hash20 = primary_hash10;
-       primary_hash21 = primary_hash11;
-       secondary_hash20 = secondary_hash10;
-       secondary_hash21 = secondary_hash11;
-       idx20 = idx10, idx21 = idx11;
-       idx10 = idx00, idx11 = idx01;
-
-       lookup_stage0(&idx00, &lookup_mask, keys);
-       lookup_stage0(&idx01, &lookup_mask, keys);
-       lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
-                       &primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
-       lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
-                       &primary_bkt11, &secondary_bkt11, hash_vals, keys, h);
-       lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
-                       secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
-                       key_store, h);
-       lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
-                       secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
-                       key_store, h);
-
-       while (lookup_mask) {
-               k_slot30 = k_slot20, k_slot31 = k_slot21;
-               idx30 = idx20, idx31 = idx21;
-               primary_bkt20 = primary_bkt10;
-               primary_bkt21 = primary_bkt11;
-               secondary_bkt20 = secondary_bkt10;
-               secondary_bkt21 = secondary_bkt11;
-               primary_hash20 = primary_hash10;
-               primary_hash21 = primary_hash11;
-               secondary_hash20 = secondary_hash10;
-               secondary_hash21 = secondary_hash11;
-               idx20 = idx10, idx21 = idx11;
-               idx10 = idx00, idx11 = idx01;
-
-               lookup_stage0(&idx00, &lookup_mask, keys);
-               lookup_stage0(&idx01, &lookup_mask, keys);
-               lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
-                       &primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
-               lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
-                       &primary_bkt11, &secondary_bkt11, hash_vals, keys, h);
-               lookup_stage2(idx20, primary_hash20, secondary_hash20,
-                       primary_bkt20, secondary_bkt20, &k_slot20, positions,
-                       &extra_hits_mask, key_store, h);
-               lookup_stage2(idx21, primary_hash21, secondary_hash21,
-                       primary_bkt21, secondary_bkt21, &k_slot21, positions,
-                       &extra_hits_mask, key_store, h);
-               lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
-               lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-       }
+                       if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
+                               if (data != NULL)
+                                       data[i] = key_slot->pdata;
 
-       k_slot30 = k_slot20, k_slot31 = k_slot21;
-       idx30 = idx20, idx31 = idx21;
-       primary_bkt20 = primary_bkt10;
-       primary_bkt21 = primary_bkt11;
-       secondary_bkt20 = secondary_bkt10;
-       secondary_bkt21 = secondary_bkt11;
-       primary_hash20 = primary_hash10;
-       primary_hash21 = primary_hash11;
-       secondary_hash20 = secondary_hash10;
-       secondary_hash21 = secondary_hash11;
-       idx20 = idx10, idx21 = idx11;
-       idx10 = idx00, idx11 = idx01;
-
-       lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
-               &primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
-       lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
-               &primary_bkt11, &secondary_bkt11, hash_vals, keys, h);
-       lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
-               secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
-               key_store, h);
-       lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
-               secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
-               key_store, h);
-       lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
-       lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-
-       k_slot30 = k_slot20, k_slot31 = k_slot21;
-       idx30 = idx20, idx31 = idx21;
-       primary_bkt20 = primary_bkt10;
-       primary_bkt21 = primary_bkt11;
-       secondary_bkt20 = secondary_bkt10;
-       secondary_bkt21 = secondary_bkt11;
-       primary_hash20 = primary_hash10;
-       primary_hash21 = primary_hash11;
-       secondary_hash20 = secondary_hash10;
-       secondary_hash21 = secondary_hash11;
-       idx20 = idx10, idx21 = idx11;
-
-       lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
-               secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
-               key_store, h);
-       lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
-               secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
-               key_store, h);
-       lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
-       lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-
-       k_slot30 = k_slot20, k_slot31 = k_slot21;
-       idx30 = idx20, idx31 = idx21;
-
-       lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
-       lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-
-       /* ignore any items we have already found */
-       extra_hits_mask &= ~hits;
-
-       if (unlikely(extra_hits_mask)) {
-               /* run a single search for each remaining item */
-               do {
-                       idx = __builtin_ctzl(extra_hits_mask);
-                       if (data != NULL) {
-                               ret = rte_hash_lookup_with_hash_data(h,
-                                               keys[idx], hash_vals[idx], &data[idx]);
-                               if (ret >= 0)
-                                       hits |= 1ULL << idx;
-                       } else {
-                               positions[idx] = rte_hash_lookup_with_hash(h,
-                                                       keys[idx], hash_vals[idx]);
-                               if (positions[idx] >= 0)
-                                       hits |= 1llu << idx;
+                               hits |= 1ULL << i;
+                               positions[i] = key_idx - 1;
+                               goto next_key;
                        }
-                       extra_hits_mask &= ~(1llu << idx);
-               } while (extra_hits_mask);
-       }
+                       sec_hitmask[i] &= ~(1 << (hit_index));
+               }
 
-       miss_mask &= ~hits;
-       if (unlikely(miss_mask)) {
-               do {
-                       idx = __builtin_ctzl(miss_mask);
-                       positions[idx] = -ENOENT;
-                       miss_mask &= ~(1llu << idx);
-               } while (miss_mask);
+next_key:
+               continue;
        }
 
        if (hit_mask != NULL)
@@ -1223,7 +1153,7 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
        idx = *next % RTE_HASH_BUCKET_ENTRIES;
 
        /* If current position is empty, go to the next one */
-       while (h->buckets[bucket_idx].signatures[idx].sig == NULL_SIGNATURE) {
+       while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
                (*next)++;
                /* End of table */
                if (*next == total_entries)
index 6c76700..1b8ffed 100644 (file)
@@ -130,14 +130,18 @@ enum add_key_case {
 };
 
 /** Number of items per bucket. */
-#define RTE_HASH_BUCKET_ENTRIES                4
+#define RTE_HASH_BUCKET_ENTRIES                8
 
 #define NULL_SIGNATURE                 0
 
+#define EMPTY_SLOT                     0
+
 #define KEY_ALIGNMENT                  16
 
 #define LCORE_CACHE_SIZE               64
 
+#define RTE_HASH_MAX_PUSHES             100
+
 #define RTE_HASH_BFS_QUEUE_MAX_LEN       1000
 
 #define RTE_XABORT_CUCKOO_PATH_INVALIDED 0x4
@@ -149,17 +153,6 @@ struct lcore_cache {
        void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
 } __rte_cache_aligned;
 
-/* Structure storing both primary and secondary hashes */
-struct rte_hash_signatures {
-       union {
-               struct {
-                       hash_sig_t current;
-                       hash_sig_t alt;
-               };
-               uint64_t sig;
-       };
-};
-
 /* Structure that stores key-value pair */
 struct rte_hash_key {
        union {
@@ -170,11 +163,22 @@ struct rte_hash_key {
        char key[0];
 } __attribute__((aligned(KEY_ALIGNMENT)));
 
+/* All different signature compare functions */
+enum rte_hash_sig_compare_function {
+       RTE_HASH_COMPARE_SCALAR = 0,
+       RTE_HASH_COMPARE_SSE,
+       RTE_HASH_COMPARE_AVX2,
+       RTE_HASH_COMPARE_NUM
+};
+
 /** Bucket structure */
 struct rte_hash_bucket {
-       struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
-       /* Includes dummy key index that always contains index 0 */
-       uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1];
+       hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES];
+
+       uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES];
+
+       hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES];
+
        uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
 } __rte_cache_aligned;
 
@@ -183,30 +187,38 @@ struct rte_hash {
        char name[RTE_HASH_NAMESIZE];   /**< Name of the hash. */
        uint32_t entries;               /**< Total table entries. */
        uint32_t num_buckets;           /**< Number of buckets in table. */
-       uint32_t key_len;               /**< Length of hash key. */
+
+       struct rte_ring *free_slots;
+       /**< Ring that stores all indexes of the free slots in the key table */
+       uint8_t hw_trans_mem_support;
+       /**< Hardware transactional memory support */
+       struct lcore_cache *local_free_slots;
+       /**< Local cache per lcore, storing some indexes of the free slots */
+       enum add_key_case add_key; /**< Multi-writer hash add behavior */
+
+       rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
+
+       /* Fields used in lookup */
+
+       uint32_t key_len __rte_cache_aligned;
+       /**< Length of hash key. */
        rte_hash_function hash_func;    /**< Function used to calculate hash. */
        uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
        rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
        /**< Custom function used to compare keys. */
        enum cmp_jump_table_case cmp_jump_table_idx;
        /**< Indicates which compare function to use. */
-       uint32_t bucket_bitmask;        /**< Bitmask for getting bucket index
-                                               from hash signature. */
+       enum rte_hash_sig_compare_function sig_cmp_fn;
+       /**< Indicates which signature compare function to use. */
+       uint32_t bucket_bitmask;
+       /**< Bitmask for getting bucket index from hash signature. */
        uint32_t key_entry_size;         /**< Size of each key entry. */
 
-       struct rte_ring *free_slots;    /**< Ring that stores all indexes
-                                               of the free slots in the key table */
        void *key_store;                /**< Table storing all keys and data */
-       struct rte_hash_bucket *buckets;        /**< Table with buckets storing all the
-                                                       hash values and key indexes
-                                                       to the key table*/
-       uint8_t hw_trans_mem_support;   /**< Hardware transactional
-                                                       memory support */
-       struct lcore_cache *local_free_slots;
-       /**< Local cache per lcore, storing some indexes of the free slots */
-       enum add_key_case add_key; /**< Multi-writer hash add behavior */
-
-       rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
+       struct rte_hash_bucket *buckets;
+       /**< Table with buckets storing all the hash values and key indexes
+        * to the key table.
+        */
 } __rte_cache_aligned;
 
 struct queue_node {
index fa5630b..0c94244 100644 (file)
@@ -53,10 +53,9 @@ rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt,
                        */
                        for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
                                /* Check if slot is available */
-                               if (likely(prim_bkt->signatures[i].sig ==
-                                               NULL_SIGNATURE)) {
-                                       prim_bkt->signatures[i].current = sig;
-                                       prim_bkt->signatures[i].alt = alt_hash;
+                               if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
+                                       prim_bkt->sig_current[i] = sig;
+                                       prim_bkt->sig_alt[i] = alt_hash;
                                        prim_bkt->key_idx[i] = new_idx;
                                        break;
                                }
@@ -102,7 +101,7 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
                                prev_slot = curr_node->prev_slot;
 
                                prev_alt_bkt_idx
-                                       = prev_bkt->signatures[prev_slot].alt
+                                       = prev_bkt->sig_alt[prev_slot]
                                            & h->bucket_bitmask;
 
                                if (unlikely(&h->buckets[prev_alt_bkt_idx]
@@ -114,10 +113,10 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
                                 * Cuckoo insert to move elements back to its
                                 * primary bucket if available
                                 */
-                               curr_bkt->signatures[curr_slot].alt =
-                                   prev_bkt->signatures[prev_slot].current;
-                               curr_bkt->signatures[curr_slot].current =
-                                   prev_bkt->signatures[prev_slot].alt;
+                               curr_bkt->sig_alt[curr_slot] =
+                                   prev_bkt->sig_current[prev_slot];
+                               curr_bkt->sig_current[curr_slot] =
+                                   prev_bkt->sig_alt[prev_slot];
                                curr_bkt->key_idx[curr_slot]
                                    = prev_bkt->key_idx[prev_slot];
 
@@ -126,8 +125,8 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
                                curr_bkt = curr_node->bkt;
                        }
 
-                       curr_bkt->signatures[curr_slot].current = sig;
-                       curr_bkt->signatures[curr_slot].alt = alt_hash;
+                       curr_bkt->sig_current[curr_slot] = sig;
+                       curr_bkt->sig_alt[curr_slot] = alt_hash;
                        curr_bkt->key_idx[curr_slot] = new_idx;
 
                        rte_xend();
@@ -168,10 +167,11 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
 
        /* Cuckoo bfs Search */
        while (likely(tail != head && head <
-                                       queue + RTE_HASH_BFS_QUEUE_MAX_LEN - 4)) {
+                                       queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
+                                       RTE_HASH_BUCKET_ENTRIES)) {
                curr_bkt = tail->bkt;
                for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-                       if (curr_bkt->signatures[i].sig == NULL_SIGNATURE) {
+                       if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
                                if (likely(rte_hash_cuckoo_move_insert_mw_tm(h,
                                                tail, i, sig,
                                                alt_hash, new_idx) == 0))
@@ -179,7 +179,7 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
                        }
 
                        /* Enqueue new node and keep prev node info */
-                       alt_bkt = &(h->buckets[curr_bkt->signatures[i].alt
+                       alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
                                                    & h->bucket_bitmask]);
                        head->bkt = alt_bkt;
                        head->prev = tail;
index a430961..bd46048 100644 (file)
@@ -115,7 +115,7 @@ struct rte_fbk_hash_table {
        uint32_t init_val;              /**< For initialising hash function. */
 
        /** A flat table of all buckets. */
-       union rte_fbk_hash_entry t[0];
+       union rte_fbk_hash_entry t[];
 };
 
 /**
index d98e98e..a4886a8 100644 (file)
@@ -54,6 +54,7 @@ extern "C" {
 #include <stdint.h>
 #include <rte_byteorder.h>
 #include <rte_ip.h>
+#include <rte_common.h>
 
 #ifdef __SSE3__
 #include <rte_vect.h>
@@ -102,6 +103,7 @@ static const __m128i rte_thash_ipv6_bswap_mask = {
 struct rte_ipv4_tuple {
        uint32_t        src_addr;
        uint32_t        dst_addr;
+       RTE_STD_C11
        union {
                struct {
                        uint16_t dport;
@@ -119,6 +121,7 @@ struct rte_ipv4_tuple {
 struct rte_ipv6_tuple {
        uint8_t         src_addr[16];
        uint8_t         dst_addr[16];
+       RTE_STD_C11
        union {
                struct {
                        uint16_t dport;
index 8d56abd..854ac83 100644 (file)
@@ -92,9 +92,9 @@ rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params)
  * into a list of valid keys.
  */
 static int
-is_valid_key(const char *valid[], const char *key_match)
+is_valid_key(const char * const valid[], const char *key_match)
 {
-       const char **valid_ptr;
+       const char * const *valid_ptr;
 
        for (valid_ptr = valid; *valid_ptr != NULL; valid_ptr++) {
                if (strcmp(key_match, *valid_ptr) == 0)
@@ -109,7 +109,7 @@ is_valid_key(const char *valid[], const char *key_match)
  */
 static int
 check_for_valid_keys(struct rte_kvargs *kvlist,
-               const char *valid[])
+               const char * const valid[])
 {
        unsigned i, ret;
        struct rte_kvargs_pair *pair;
@@ -187,7 +187,7 @@ rte_kvargs_free(struct rte_kvargs *kvlist)
  * check if only valid keys were used.
  */
 struct rte_kvargs *
-rte_kvargs_parse(const char *args, const char *valid_keys[])
+rte_kvargs_parse(const char *args, const char * const valid_keys[])
 {
        struct rte_kvargs *kvlist;
 
index ae9ae79..5821c72 100644 (file)
@@ -97,7 +97,8 @@ struct rte_kvargs {
  *   - A pointer to an allocated rte_kvargs structure on success
  *   - NULL on error
  */
-struct rte_kvargs *rte_kvargs_parse(const char *args, const char *valid_keys[]);
+struct rte_kvargs *rte_kvargs_parse(const char *args,
+               const char *const valid_keys[]);
 
 /**
  * Free a rte_kvargs structure
index 4846b89..72ad91e 100644 (file)
 #include <rte_lcore.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
-#include <rte_ring.h>
 #include <rte_mempool.h>
 #include <rte_mbuf.h>
 #include <rte_string_fns.h>
 #include <rte_hexdump.h>
 #include <rte_errno.h>
+#include <rte_memcpy.h>
 
 /*
  * ctrlmbuf constructor, given as a callback function to
@@ -174,10 +174,12 @@ rte_pktmbuf_pool_create(const char *name, unsigned n,
        if (mp == NULL)
                return NULL;
 
-       rte_errno = rte_mempool_set_ops_byname(mp,
-                       RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL);
-       if (rte_errno != 0) {
+       ret = rte_mempool_set_ops_byname(mp,
+               RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL);
+       if (ret != 0) {
                RTE_LOG(ERR, MBUF, "error setting mempool handler\n");
+               rte_mempool_free(mp);
+               rte_errno = -ret;
                return NULL;
        }
        rte_pktmbuf_pool_init(mp, &mbp_priv);
@@ -262,6 +264,40 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
        }
 }
 
+/* read len data bytes in a mbuf at specified offset (internal) */
+const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off,
+       uint32_t len, void *buf)
+{
+       const struct rte_mbuf *seg = m;
+       uint32_t buf_off = 0, copy_len;
+
+       if (off + len > rte_pktmbuf_pkt_len(m))
+               return NULL;
+
+       while (off >= rte_pktmbuf_data_len(seg)) {
+               off -= rte_pktmbuf_data_len(seg);
+               seg = seg->next;
+       }
+
+       if (off + len <= rte_pktmbuf_data_len(seg))
+               return rte_pktmbuf_mtod_offset(seg, char *, off);
+
+       /* rare case: header is split among several segments */
+       while (len > 0) {
+               copy_len = rte_pktmbuf_data_len(seg) - off;
+               if (copy_len > len)
+                       copy_len = len;
+               rte_memcpy((char *)buf + buf_off,
+                       rte_pktmbuf_mtod_offset(seg, char *, off), copy_len);
+               off = 0;
+               buf_off += copy_len;
+               len -= copy_len;
+               seg = seg->next;
+       }
+
+       return buf;
+}
+
 /*
  * Get the name of a RX offload flag. Must be kept synchronized with flag
  * definitions in rte_mbuf.h.
@@ -273,16 +309,78 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
        case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH";
        case PKT_RX_FDIR: return "PKT_RX_FDIR";
        case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD";
+       case PKT_RX_L4_CKSUM_GOOD: return "PKT_RX_L4_CKSUM_GOOD";
+       case PKT_RX_L4_CKSUM_NONE: return "PKT_RX_L4_CKSUM_NONE";
        case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD";
+       case PKT_RX_IP_CKSUM_GOOD: return "PKT_RX_IP_CKSUM_GOOD";
+       case PKT_RX_IP_CKSUM_NONE: return "PKT_RX_IP_CKSUM_NONE";
        case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD";
        case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED";
        case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
        case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
        case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
+       case PKT_RX_LRO: return "PKT_RX_LRO";
        default: return NULL;
        }
 }
 
+struct flag_mask {
+       uint64_t flag;
+       uint64_t mask;
+       const char *default_name;
+};
+
+/* write the list of rx ol flags in buffer buf */
+int
+rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
+{
+       const struct flag_mask rx_flags[] = {
+               { PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, NULL },
+               { PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, NULL },
+               { PKT_RX_FDIR, PKT_RX_FDIR, NULL },
+               { PKT_RX_L4_CKSUM_BAD, PKT_RX_L4_CKSUM_MASK, NULL },
+               { PKT_RX_L4_CKSUM_GOOD, PKT_RX_L4_CKSUM_MASK, NULL },
+               { PKT_RX_L4_CKSUM_NONE, PKT_RX_L4_CKSUM_MASK, NULL },
+               { PKT_RX_L4_CKSUM_UNKNOWN, PKT_RX_L4_CKSUM_MASK,
+                 "PKT_RX_L4_CKSUM_UNKNOWN" },
+               { PKT_RX_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK, NULL },
+               { PKT_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_MASK, NULL },
+               { PKT_RX_IP_CKSUM_NONE, PKT_RX_IP_CKSUM_MASK, NULL },
+               { PKT_RX_IP_CKSUM_UNKNOWN, PKT_RX_IP_CKSUM_MASK,
+                 "PKT_RX_IP_CKSUM_UNKNOWN" },
+               { PKT_RX_EIP_CKSUM_BAD, PKT_RX_EIP_CKSUM_BAD, NULL },
+               { PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN_STRIPPED, NULL },
+               { PKT_RX_IEEE1588_PTP, PKT_RX_IEEE1588_PTP, NULL },
+               { PKT_RX_IEEE1588_TMST, PKT_RX_IEEE1588_TMST, NULL },
+               { PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL },
+               { PKT_RX_LRO, PKT_RX_LRO, NULL },
+       };
+       const char *name;
+       unsigned int i;
+       int ret;
+
+       if (buflen == 0)
+               return -1;
+
+       buf[0] = '\0';
+       for (i = 0; i < RTE_DIM(rx_flags); i++) {
+               if ((mask & rx_flags[i].mask) != rx_flags[i].flag)
+                       continue;
+               name = rte_get_rx_ol_flag_name(rx_flags[i].flag);
+               if (name == NULL)
+                       name = rx_flags[i].default_name;
+               ret = snprintf(buf, buflen, "%s ", name);
+               if (ret < 0)
+                       return -1;
+               if ((size_t)ret >= buflen)
+                       return -1;
+               buf += ret;
+               buflen -= ret;
+       }
+
+       return 0;
+}
+
 /*
  * Get the name of a TX offload flag. Must be kept synchronized with flag
  * definitions in rte_mbuf.h.
@@ -302,6 +400,65 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
        case PKT_TX_OUTER_IP_CKSUM: return "PKT_TX_OUTER_IP_CKSUM";
        case PKT_TX_OUTER_IPV4: return "PKT_TX_OUTER_IPV4";
        case PKT_TX_OUTER_IPV6: return "PKT_TX_OUTER_IPV6";
+       case PKT_TX_TUNNEL_VXLAN: return "PKT_TX_TUNNEL_VXLAN";
+       case PKT_TX_TUNNEL_GRE: return "PKT_TX_TUNNEL_GRE";
+       case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP";
+       case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE";
+       case PKT_TX_MACSEC: return "PKT_TX_MACSEC";
        default: return NULL;
        }
 }
+
+/* write the list of tx ol flags in buffer buf */
+int
+rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
+{
+       const struct flag_mask tx_flags[] = {
+               { PKT_TX_VLAN_PKT, PKT_TX_VLAN_PKT, NULL },
+               { PKT_TX_IP_CKSUM, PKT_TX_IP_CKSUM, NULL },
+               { PKT_TX_TCP_CKSUM, PKT_TX_L4_MASK, NULL },
+               { PKT_TX_SCTP_CKSUM, PKT_TX_L4_MASK, NULL },
+               { PKT_TX_UDP_CKSUM, PKT_TX_L4_MASK, NULL },
+               { PKT_TX_L4_NO_CKSUM, PKT_TX_L4_MASK, "PKT_TX_L4_NO_CKSUM" },
+               { PKT_TX_IEEE1588_TMST, PKT_TX_IEEE1588_TMST, NULL },
+               { PKT_TX_TCP_SEG, PKT_TX_TCP_SEG, NULL },
+               { PKT_TX_IPV4, PKT_TX_IPV4, NULL },
+               { PKT_TX_IPV6, PKT_TX_IPV6, NULL },
+               { PKT_TX_OUTER_IP_CKSUM, PKT_TX_OUTER_IP_CKSUM, NULL },
+               { PKT_TX_OUTER_IPV4, PKT_TX_OUTER_IPV4, NULL },
+               { PKT_TX_OUTER_IPV6, PKT_TX_OUTER_IPV6, NULL },
+               { PKT_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_MASK,
+                 "PKT_TX_TUNNEL_NONE" },
+               { PKT_TX_TUNNEL_GRE, PKT_TX_TUNNEL_MASK,
+                 "PKT_TX_TUNNEL_NONE" },
+               { PKT_TX_TUNNEL_IPIP, PKT_TX_TUNNEL_MASK,
+                 "PKT_TX_TUNNEL_NONE" },
+               { PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK,
+                 "PKT_TX_TUNNEL_NONE" },
+               { PKT_TX_MACSEC, PKT_TX_MACSEC, NULL },
+       };
+       const char *name;
+       unsigned int i;
+       int ret;
+
+       if (buflen == 0)
+               return -1;
+
+       buf[0] = '\0';
+       for (i = 0; i < RTE_DIM(tx_flags); i++) {
+               if ((mask & tx_flags[i].mask) != tx_flags[i].flag)
+                       continue;
+               name = rte_get_tx_ol_flag_name(tx_flags[i].flag);
+               if (name == NULL)
+                       name = tx_flags[i].default_name;
+               ret = snprintf(buf, buflen, "%s ", name);
+               if (ret < 0)
+                       return -1;
+               if ((size_t)ret >= buflen)
+                       return -1;
+               buf += ret;
+               buflen -= ret;
+       }
+
+       return 0;
+}
index 9e60799..0d01167 100644 (file)
@@ -44,7 +44,7 @@
  * buffers. The message buffers are stored in a mempool, using the
  * RTE mempool library.
  *
- * This library provide an API to allocate/free packet mbufs, which are
+ * This library provides an API to allocate/free packet mbufs, which are
  * used to carry network packets.
  *
  * To understand the concepts of packet buffers or mbufs, you
@@ -60,6 +60,7 @@
 #include <rte_atomic.h>
 #include <rte_prefetch.h>
 #include <rte_branch_prediction.h>
+#include <rte_mbuf_ptype.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -90,8 +91,25 @@ extern "C" {
 
 #define PKT_RX_RSS_HASH      (1ULL << 1)  /**< RX packet with RSS hash result. */
 #define PKT_RX_FDIR          (1ULL << 2)  /**< RX packet with FDIR match indicate. */
-#define PKT_RX_L4_CKSUM_BAD  (1ULL << 3)  /**< L4 cksum of RX pkt. is not OK. */
-#define PKT_RX_IP_CKSUM_BAD  (1ULL << 4)  /**< IP cksum of RX pkt. is not OK. */
+
+/**
+ * Deprecated.
+ * Checking this flag alone is deprecated: check the 2 bits of
+ * PKT_RX_L4_CKSUM_MASK.
+ * This flag was set when the L4 checksum of a packet was detected as
+ * wrong by the hardware.
+ */
+#define PKT_RX_L4_CKSUM_BAD  (1ULL << 3)
+
+/**
+ * Deprecated.
+ * Checking this flag alone is deprecated: check the 2 bits of
+ * PKT_RX_IP_CKSUM_MASK.
+ * This flag was set when the IP checksum of a packet was detected as
+ * wrong by the hardware.
+ */
+#define PKT_RX_IP_CKSUM_BAD  (1ULL << 4)
+
 #define PKT_RX_EIP_CKSUM_BAD (1ULL << 5)  /**< External IP header checksum error. */
 
 /**
@@ -101,7 +119,35 @@ extern "C" {
  */
 #define PKT_RX_VLAN_STRIPPED (1ULL << 6)
 
-/* hole, some bits can be reused here  */
+/**
+ * Mask of bits used to determine the status of RX IP checksum.
+ * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
+ * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
+ * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
+ * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet
+ *   data, but the integrity of the IP header is verified.
+ */
+#define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7))
+
+#define PKT_RX_IP_CKSUM_UNKNOWN 0
+#define PKT_RX_IP_CKSUM_BAD     (1ULL << 4)
+#define PKT_RX_IP_CKSUM_GOOD    (1ULL << 7)
+#define PKT_RX_IP_CKSUM_NONE    ((1ULL << 4) | (1ULL << 7))
+
+/**
+ * Mask of bits used to determine the status of RX L4 checksum.
+ * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
+ * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
+ * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
+ * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
+ *   data, but the integrity of the L4 data is verified.
+ */
+#define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8))
+
+#define PKT_RX_L4_CKSUM_UNKNOWN 0
+#define PKT_RX_L4_CKSUM_BAD     (1ULL << 3)
+#define PKT_RX_L4_CKSUM_GOOD    (1ULL << 8)
+#define PKT_RX_L4_CKSUM_NONE    ((1ULL << 3) | (1ULL << 8))
 
 #define PKT_RX_IEEE1588_PTP  (1ULL << 9)  /**< RX IEEE1588 L2 Ethernet PT Packet. */
 #define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
@@ -124,10 +170,35 @@ extern "C" {
  */
 #define PKT_RX_QINQ_PKT      PKT_RX_QINQ_STRIPPED
 
+/**
+ * When packets are coalesced by a hardware or virtual driver, this flag
+ * can be set in the RX mbuf, meaning that the m->tso_segsz field is
+ * valid and is set to the segment size of original packets.
+ */
+#define PKT_RX_LRO           (1ULL << 16)
+
 /* add new RX flags here */
 
 /* add new TX flags here */
 
+/**
+ * Offload the MACsec. This flag must be set by the application to enable
+ * this offload feature for a packet to be transmitted.
+ */
+#define PKT_TX_MACSEC        (1ULL << 44)
+
+/**
+ * Bits 45:48 used for the tunnel type.
+ * When doing Tx offload like TSO or checksum, the HW needs to configure the
+ * tunnel type into the HW descriptors.
+ */
+#define PKT_TX_TUNNEL_VXLAN   (0x1ULL << 45)
+#define PKT_TX_TUNNEL_GRE     (0x2ULL << 45)
+#define PKT_TX_TUNNEL_IPIP    (0x3ULL << 45)
+#define PKT_TX_TUNNEL_GENEVE  (0x4ULL << 45)
+/* add new TX TUNNEL type here */
+#define PKT_TX_TUNNEL_MASK    (0xFULL << 45)
+
 /**
  * Second VLAN insertion (QinQ) flag.
  */
@@ -218,6 +289,19 @@ extern "C" {
  */
 #define PKT_TX_OUTER_IPV6    (1ULL << 60)
 
+/**
+ * Bitmask of all supported packet Tx offload features flags,
+ * which can be set for packet.
+ */
+#define PKT_TX_OFFLOAD_MASK (    \
+               PKT_TX_IP_CKSUM |        \
+               PKT_TX_L4_MASK |         \
+               PKT_TX_OUTER_IP_CKSUM |  \
+               PKT_TX_TCP_SEG |         \
+               PKT_TX_QINQ_PKT |        \
+               PKT_TX_VLAN_PKT |        \
+               PKT_TX_TUNNEL_MASK)
+
 #define __RESERVED           (1ULL << 61) /**< reserved for future mbuf use */
 
 #define IND_ATTACHED_MBUF    (1ULL << 62) /**< Indirect attached mbuf */
@@ -225,500 +309,6 @@ extern "C" {
 /* Use final bit of flags to indicate a control mbuf */
 #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
 
-/*
- * 32 bits are divided into several fields to mark packet types. Note that
- * each field is indexical.
- * - Bit 3:0 is for L2 types.
- * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
- * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types.
- * - Bit 15:12 is for tunnel types.
- * - Bit 19:16 is for inner L2 types.
- * - Bit 23:20 is for inner L3 types.
- * - Bit 27:24 is for inner L4 types.
- * - Bit 31:28 is reserved.
- *
- * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT,
- * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP
- * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits.
- *
- * Note that L3 types values are selected for checking IPV4/IPV6 header from
- * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and
- * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values.
- *
- * Note that the packet types of the same packet recognized by different
- * hardware may be different, as different hardware may have different
- * capability of packet type recognition.
- *
- * examples:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=0x29
- * | 'version'=6, 'next header'=0x3A
- * | 'ICMPv6 header'>
- * will be recognized on i40e hardware as packet type combination of,
- * RTE_PTYPE_L2_ETHER |
- * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
- * RTE_PTYPE_TUNNEL_IP |
- * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- * RTE_PTYPE_INNER_L4_ICMP.
- *
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=0x2F
- * | 'GRE header'
- * | 'version'=6, 'next header'=0x11
- * | 'UDP header'>
- * will be recognized on i40e hardware as packet type combination of,
- * RTE_PTYPE_L2_ETHER |
- * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
- * RTE_PTYPE_TUNNEL_GRENAT |
- * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- * RTE_PTYPE_INNER_L4_UDP.
- */
-#define RTE_PTYPE_UNKNOWN                   0x00000000
-/**
- * Ethernet packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=[0x0800|0x86DD]>
- */
-#define RTE_PTYPE_L2_ETHER                  0x00000001
-/**
- * Ethernet packet type for time sync.
- *
- * Packet format:
- * <'ether type'=0x88F7>
- */
-#define RTE_PTYPE_L2_ETHER_TIMESYNC         0x00000002
-/**
- * ARP (Address Resolution Protocol) packet type.
- *
- * Packet format:
- * <'ether type'=0x0806>
- */
-#define RTE_PTYPE_L2_ETHER_ARP              0x00000003
-/**
- * LLDP (Link Layer Discovery Protocol) packet type.
- *
- * Packet format:
- * <'ether type'=0x88CC>
- */
-#define RTE_PTYPE_L2_ETHER_LLDP             0x00000004
-/**
- * NSH (Network Service Header) packet type.
- *
- * Packet format:
- * <'ether type'=0x894F>
- */
-#define RTE_PTYPE_L2_ETHER_NSH              0x00000005
-/**
- * Mask of layer 2 packet types.
- * It is used for outer packet for tunneling cases.
- */
-#define RTE_PTYPE_L2_MASK                   0x0000000f
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for outer packet for tunneling cases, and does not contain any
- * header option.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=5>
- */
-#define RTE_PTYPE_L3_IPV4                   0x00000010
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for outer packet for tunneling cases, and contains header
- * options.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=[6-15], 'options'>
- */
-#define RTE_PTYPE_L3_IPV4_EXT               0x00000030
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for outer packet for tunneling cases, and does not contain any
- * extension header.
- *
- * Packet format:
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=0x3B>
- */
-#define RTE_PTYPE_L3_IPV6                   0x00000040
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for outer packet for tunneling cases, and may or maynot contain
- * header options.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=[5-15], <'options'>>
- */
-#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN       0x00000090
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for outer packet for tunneling cases, and contains extension
- * headers.
- *
- * Packet format:
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
- *   'extension headers'>
- */
-#define RTE_PTYPE_L3_IPV6_EXT               0x000000c0
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for outer packet for tunneling cases, and may or maynot contain
- * extension headers.
- *
- * Packet format:
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
- *   <'extension headers'>>
- */
-#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN       0x000000e0
-/**
- * Mask of layer 3 packet types.
- * It is used for outer packet for tunneling cases.
- */
-#define RTE_PTYPE_L3_MASK                   0x000000f0
-/**
- * TCP (Transmission Control Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=6, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=6>
- */
-#define RTE_PTYPE_L4_TCP                    0x00000100
-/**
- * UDP (User Datagram Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=17, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=17>
- */
-#define RTE_PTYPE_L4_UDP                    0x00000200
-/**
- * Fragmented IP (Internet Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * It refers to those packets of any IP types, which can be recognized as
- * fragmented. A fragmented packet cannot be recognized as any other L4 types
- * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP,
- * RTE_PTYPE_L4_NONFRAG).
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'MF'=1>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=44>
- */
-#define RTE_PTYPE_L4_FRAG                   0x00000300
-/**
- * SCTP (Stream Control Transmission Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=132, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=132>
- */
-#define RTE_PTYPE_L4_SCTP                   0x00000400
-/**
- * ICMP (Internet Control Message Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=1, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=1>
- */
-#define RTE_PTYPE_L4_ICMP                   0x00000500
-/**
- * Non-fragmented IP (Internet Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * It refers to those packets of any IP types, while cannot be recognized as
- * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP,
- * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP).
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'!=[6|17|44|132|1]>
- */
-#define RTE_PTYPE_L4_NONFRAG                0x00000600
-/**
- * Mask of layer 4 packet types.
- * It is used for outer packet for tunneling cases.
- */
-#define RTE_PTYPE_L4_MASK                   0x00000f00
-/**
- * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=[4|41]>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[4|41]>
- */
-#define RTE_PTYPE_TUNNEL_IP                 0x00001000
-/**
- * GRE (Generic Routing Encapsulation) tunneling packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=47>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=47>
- */
-#define RTE_PTYPE_TUNNEL_GRE                0x00002000
-/**
- * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=17
- * | 'destination port'=4798>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=17
- * | 'destination port'=4798>
- */
-#define RTE_PTYPE_TUNNEL_VXLAN              0x00003000
-/**
- * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling
- * packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=47
- * | 'protocol type'=0x6558>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=47
- * | 'protocol type'=0x6558'>
- */
-#define RTE_PTYPE_TUNNEL_NVGRE              0x00004000
-/**
- * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=17
- * | 'destination port'=6081>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=17
- * | 'destination port'=6081>
- */
-#define RTE_PTYPE_TUNNEL_GENEVE             0x00005000
-/**
- * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area
- * Network) or GRE (Generic Routing Encapsulation) could be recognized as this
- * packet type, if they can not be recognized independently as of hardware
- * capability.
- */
-#define RTE_PTYPE_TUNNEL_GRENAT             0x00006000
-/**
- * Mask of tunneling packet types.
- */
-#define RTE_PTYPE_TUNNEL_MASK               0x0000f000
-/**
- * Ethernet packet type.
- * It is used for inner packet type only.
- *
- * Packet format (inner only):
- * <'ether type'=[0x800|0x86DD]>
- */
-#define RTE_PTYPE_INNER_L2_ETHER            0x00010000
-/**
- * Ethernet packet type with VLAN (Virtual Local Area Network) tag.
- *
- * Packet format (inner only):
- * <'ether type'=[0x800|0x86DD], vlan=[1-4095]>
- */
-#define RTE_PTYPE_INNER_L2_ETHER_VLAN       0x00020000
-/**
- * Mask of inner layer 2 packet types.
- */
-#define RTE_PTYPE_INNER_L2_MASK             0x000f0000
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for inner packet only, and does not contain any header option.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=5>
- */
-#define RTE_PTYPE_INNER_L3_IPV4             0x00100000
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for inner packet only, and contains header options.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=[6-15], 'options'>
- */
-#define RTE_PTYPE_INNER_L3_IPV4_EXT         0x00200000
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for inner packet only, and does not contain any extension header.
- *
- * Packet format (inner only):
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=0x3B>
- */
-#define RTE_PTYPE_INNER_L3_IPV6             0x00300000
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for inner packet only, and may or maynot contain header options.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=[5-15], <'options'>>
- */
-#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for inner packet only, and contains extension headers.
- *
- * Packet format (inner only):
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
- *   'extension headers'>
- */
-#define RTE_PTYPE_INNER_L3_IPV6_EXT         0x00500000
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for inner packet only, and may or maynot contain extension
- * headers.
- *
- * Packet format (inner only):
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
- *   <'extension headers'>>
- */
-#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000
-/**
- * Mask of inner layer 3 packet types.
- */
-#define RTE_PTYPE_INNER_L3_MASK             0x00f00000
-/**
- * TCP (Transmission Control Protocol) packet type.
- * It is used for inner packet only.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=6, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=6>
- */
-#define RTE_PTYPE_INNER_L4_TCP              0x01000000
-/**
- * UDP (User Datagram Protocol) packet type.
- * It is used for inner packet only.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=17, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=17>
- */
-#define RTE_PTYPE_INNER_L4_UDP              0x02000000
-/**
- * Fragmented IP (Internet Protocol) packet type.
- * It is used for inner packet only, and may or maynot have layer 4 packet.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'MF'=1>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=44>
- */
-#define RTE_PTYPE_INNER_L4_FRAG             0x03000000
-/**
- * SCTP (Stream Control Transmission Protocol) packet type.
- * It is used for inner packet only.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=132, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=132>
- */
-#define RTE_PTYPE_INNER_L4_SCTP             0x04000000
-/**
- * ICMP (Internet Control Message Protocol) packet type.
- * It is used for inner packet only.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=1, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=1>
- */
-#define RTE_PTYPE_INNER_L4_ICMP             0x05000000
-/**
- * Non-fragmented IP (Internet Protocol) packet type.
- * It is used for inner packet only, and may or maynot have other unknown layer
- * 4 packet types.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'!=[6|17|44|132|1]>
- */
-#define RTE_PTYPE_INNER_L4_NONFRAG          0x06000000
-/**
- * Mask of inner layer 4 packet types.
- */
-#define RTE_PTYPE_INNER_L4_MASK             0x0f000000
-
-/**
- * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
- * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can
- * determine if it is an IPV4 packet.
- */
-#define  RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4)
-
-/**
- * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
- * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can
- * determine if it is an IPV4 packet.
- */
-#define  RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6)
-
-/* Check if it is a tunneling packet */
-#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & (RTE_PTYPE_TUNNEL_MASK | \
-                                                 RTE_PTYPE_INNER_L2_MASK | \
-                                                 RTE_PTYPE_INNER_L3_MASK | \
-                                                 RTE_PTYPE_INNER_L4_MASK))
-
 /** Alignment constraint of mbuf private area. */
 #define RTE_MBUF_PRIV_ALIGN 8
 
@@ -732,6 +322,20 @@ extern "C" {
  */
 const char *rte_get_rx_ol_flag_name(uint64_t mask);
 
+/**
+ * Dump the list of RX offload flags in a buffer
+ *
+ * @param mask
+ *   The mask describing the RX flags.
+ * @param buf
+ *   The output buffer.
+ * @param buflen
+ *   The length of the buffer.
+ * @return
+ *   0 on success, (-1) on error.
+ */
+int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
+
 /**
  * Get the name of a TX offload flag
  *
@@ -744,6 +348,20 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask);
  */
 const char *rte_get_tx_ol_flag_name(uint64_t mask);
 
+/**
+ * Dump the list of TX offload flags in a buffer
+ *
+ * @param mask
+ *   The mask describing the TX flags.
+ * @param buf
+ *   The output buffer.
+ * @param buflen
+ *   The length of the buffer.
+ * @return
+ *   0 on success, (-1) on error.
+ */
+int rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
+
 /**
  * Some NICs need at least 2KB buffer to RX standard Ethernet frame without
  * splitting it into multiple segments.
@@ -756,8 +374,11 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask);
 
 /* define a set of marker types that can be used to refer to set points in the
  * mbuf */
+__extension__
 typedef void    *MARKER[0];   /**< generic marker for a point in a structure */
+__extension__
 typedef uint8_t  MARKER8[0];  /**< generic marker with 1B alignment */
+__extension__
 typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes
                                * with a single assignment */
 
@@ -784,6 +405,7 @@ struct rte_mbuf {
         * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
         * config option.
         */
+       RTE_STD_C11
        union {
                rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
                uint16_t refcnt;              /**< Non-atomically accessed refcnt */
@@ -803,6 +425,7 @@ struct rte_mbuf {
         * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
         * vlan is stripped from the data.
         */
+       RTE_STD_C11
        union {
                uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
                struct {
@@ -824,6 +447,7 @@ struct rte_mbuf {
        union {
                uint32_t rss;     /**< RSS hash result if RSS enabled */
                struct {
+                       RTE_STD_C11
                        union {
                                struct {
                                        uint16_t hash;
@@ -851,6 +475,7 @@ struct rte_mbuf {
        /* second cache line - fields only used in slow path or on TX */
        MARKER cacheline1 __rte_cache_min_aligned;
 
+       RTE_STD_C11
        union {
                void *userdata;   /**< Can be used for external metadata */
                uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
@@ -860,10 +485,15 @@ struct rte_mbuf {
        struct rte_mbuf *next;    /**< Next segment of scattered packet. */
 
        /* fields to support TX offloads */
+       RTE_STD_C11
        union {
                uint64_t tx_offload;       /**< combined for easy fetch */
+               __extension__
                struct {
-                       uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+                       uint64_t l2_len:7;
+                       /**< L2 (MAC) Header Length for non-tunneling pkt.
+                        * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
+                        */
                        uint64_t l3_len:9; /**< L3 (IP) Header Length. */
                        uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
                        uint64_t tso_segsz:16; /**< TCP TSO segment size */
@@ -1059,9 +689,6 @@ rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value)
 static inline uint16_t
 rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value)
 {
-    // TREX_PATCH - The code in #if 0 caused tx queue to hang when running:
-    // sudo ./t-rex-64-o -f avl/sfr_delay_10_1g_no_bundeling.yaml -m 35 -p -d 100
-#if 0
        /*
         * The atomic_add is an expensive operation, so we don't want to
         * call it in the case where we know we are the uniq holder of
@@ -1073,7 +700,7 @@ rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value)
                rte_mbuf_refcnt_set(m, 1 + value);
                return 1 + value;
        }
-#endif
+
        return (uint16_t)(rte_atomic16_add_return(&m->refcnt_atomic, value));
 }
 
@@ -1160,13 +787,6 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp)
        return m;
 }
 
-/* compat with older versions */
-__rte_deprecated static inline struct rte_mbuf *
-__rte_mbuf_raw_alloc(struct rte_mempool *mp)
-{
-       return rte_mbuf_raw_alloc(mp);
-}
-
 /**
  * @internal Put mbuf back into its original mempool.
  * The use of that function is reserved for RTE internal needs.
@@ -1387,6 +1007,19 @@ rte_pktmbuf_priv_size(struct rte_mempool *mp)
        return mbp_priv->mbuf_priv_size;
 }
 
+/**
+ * Reset the data_off field of a packet mbuf to its default value.
+ *
+ * The given mbuf must have only one segment, which should be empty.
+ *
+ * @param m
+ *   The packet mbuf's data_off field has to be reset.
+ */
+static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m)
+{
+       m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
+}
+
 /**
  * Reset the fields of a packet mbuf to their default values.
  *
@@ -1407,8 +1040,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
 
        m->ol_flags = 0;
        m->packet_type = 0;
-       m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?
-                       RTE_PKTMBUF_HEADROOM : m->buf_len;
+       rte_pktmbuf_reset_headroom(m);
 
        m->data_len = 0;
        __rte_mbuf_sanity_check(m, 1);
@@ -1526,7 +1158,6 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
        mi->buf_addr = m->buf_addr;
        mi->buf_len = m->buf_len;
 
-       mi->next = m->next;
        mi->data_off = m->data_off;
        mi->data_len = m->data_len;
        mi->port = m->port;
@@ -1572,7 +1203,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
        m->buf_addr = (char *)m + mbuf_size;
        m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size;
        m->buf_len = (uint16_t)buf_len;
-       m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
+       rte_pktmbuf_reset_headroom(m);
        m->data_len = 0;
        m->ol_flags = 0;
 
@@ -1960,6 +1591,41 @@ static inline int rte_pktmbuf_is_contiguous(const struct rte_mbuf *m)
        return !!(m->nb_segs == 1);
 }
 
+/**
+ * @internal used by rte_pktmbuf_read().
+ */
+const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off,
+       uint32_t len, void *buf);
+
+/**
+ * Read len data bytes in a mbuf at specified offset.
+ *
+ * If the data is contiguous, return the pointer in the mbuf data, else
+ * copy the data in the buffer provided by the user and return its
+ * pointer.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @param off
+ *   The offset of the data in the mbuf.
+ * @param len
+ *   The amount of bytes to read.
+ * @param buf
+ *   The buffer where data is copied if it is not contigous in mbuf
+ *   data. Its length should be at least equal to the len parameter.
+ * @return
+ *   The pointer to the data, either in the mbuf if it is contiguous,
+ *   or in the user buffer. If mbuf is too small, NULL is returned.
+ */
+static inline const void *rte_pktmbuf_read(const struct rte_mbuf *m,
+       uint32_t off, uint32_t len, void *buf)
+{
+       if (likely(off + len <= rte_pktmbuf_data_len(m)))
+               return rte_pktmbuf_mtod_offset(m, char *, off);
+       else
+               return __rte_pktmbuf_read(m, off, len, buf);
+}
+
 /**
  * Chain an mbuf to another, thereby creating a segmented packet.
  *
@@ -1999,7 +1665,109 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail
 }
 
 /**
- * Dump an mbuf structure to the console.
+ * Validate general requirements for Tx offload in mbuf.
+ *
+ * This function checks correctness and completeness of Tx offload settings.
+ *
+ * @param m
+ *   The packet mbuf to be validated.
+ * @return
+ *   0 if packet is valid
+ */
+static inline int
+rte_validate_tx_offload(const struct rte_mbuf *m)
+{
+       uint64_t ol_flags = m->ol_flags;
+       uint64_t inner_l3_offset = m->l2_len;
+
+       /* Does packet set any of available offloads? */
+       if (!(ol_flags & PKT_TX_OFFLOAD_MASK))
+               return 0;
+
+       if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
+               inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
+
+       /* Headers are fragmented */
+       if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len)
+               return -ENOTSUP;
+
+       /* IP checksum can be counted only for IPv4 packet */
+       if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6))
+               return -EINVAL;
+
+       /* IP type not set when required */
+       if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG))
+               if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)))
+                       return -EINVAL;
+
+       /* Check requirements for TSO packet */
+       if (ol_flags & PKT_TX_TCP_SEG)
+               if ((m->tso_segsz == 0) ||
+                               ((ol_flags & PKT_TX_IPV4) &&
+                               !(ol_flags & PKT_TX_IP_CKSUM)))
+                       return -EINVAL;
+
+       /* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */
+       if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) &&
+                       !(ol_flags & PKT_TX_OUTER_IPV4))
+               return -EINVAL;
+
+       return 0;
+}
+
+/**
+ * Linearize data in mbuf.
+ *
+ * This function moves the mbuf data in the first segment if there is enough
+ * tailroom. The subsequent segments are unchained and freed.
+ *
+ * @param mbuf
+ *   mbuf to linearize
+ * @return
+ *   - 0, on success
+ *   - -1, on error
+ */
+static inline int
+rte_pktmbuf_linearize(struct rte_mbuf *mbuf)
+{
+       int seg_len, copy_len;
+       struct rte_mbuf *m;
+       struct rte_mbuf *m_next;
+       char *buffer;
+
+       if (rte_pktmbuf_is_contiguous(mbuf))
+               return 0;
+
+       /* Extend first segment to the total packet length */
+       copy_len = rte_pktmbuf_pkt_len(mbuf) - rte_pktmbuf_data_len(mbuf);
+
+       if (unlikely(copy_len > rte_pktmbuf_tailroom(mbuf)))
+               return -1;
+
+       buffer = rte_pktmbuf_mtod_offset(mbuf, char *, mbuf->data_len);
+       mbuf->data_len = (uint16_t)(mbuf->pkt_len);
+
+       /* Append data from next segments to the first one */
+       m = mbuf->next;
+       while (m != NULL) {
+               m_next = m->next;
+
+               seg_len = rte_pktmbuf_data_len(m);
+               rte_memcpy(buffer, rte_pktmbuf_mtod(m, char *), seg_len);
+               buffer += seg_len;
+
+               rte_pktmbuf_free_seg(m);
+               m = m_next;
+       }
+
+       mbuf->next = NULL;
+       mbuf->nb_segs = 1;
+
+       return 0;
+}
+
+/**
+ * Dump an mbuf structure to a file.
  *
  * Dump all fields for the given packet mbuf and all its associated
  * segments (in the case of a chained buffer).
diff --git a/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.c b/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.c
new file mode 100644 (file)
index 0000000..e5c4fae
--- /dev/null
@@ -0,0 +1,227 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+#include <rte_mbuf_ptype.h>
+
+/* get the name of the l2 packet type */
+const char *rte_get_ptype_l2_name(uint32_t ptype)
+{
+       switch (ptype & RTE_PTYPE_L2_MASK) {
+       case RTE_PTYPE_L2_ETHER: return "L2_ETHER";
+       case RTE_PTYPE_L2_ETHER_TIMESYNC: return "L2_ETHER_TIMESYNC";
+       case RTE_PTYPE_L2_ETHER_ARP: return "L2_ETHER_ARP";
+       case RTE_PTYPE_L2_ETHER_LLDP: return "L2_ETHER_LLDP";
+       case RTE_PTYPE_L2_ETHER_NSH: return "L2_ETHER_NSH";
+       case RTE_PTYPE_L2_ETHER_VLAN: return "L2_ETHER_VLAN";
+       case RTE_PTYPE_L2_ETHER_QINQ: return "L2_ETHER_QINQ";
+       default: return "L2_UNKNOWN";
+       }
+}
+
+/* get the name of the l3 packet type */
+const char *rte_get_ptype_l3_name(uint32_t ptype)
+{
+       switch (ptype & RTE_PTYPE_L3_MASK) {
+       case RTE_PTYPE_L3_IPV4: return "L3_IPV4";
+       case RTE_PTYPE_L3_IPV4_EXT: return "L3_IPV4_EXT";
+       case RTE_PTYPE_L3_IPV6: return "L3_IPV6";
+       case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN: return "L3_IPV4_EXT_UNKNOWN";
+       case RTE_PTYPE_L3_IPV6_EXT: return "L3_IPV6_EXT";
+       case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN: return "L3_IPV6_EXT_UNKNOWN";
+       default: return "L3_UNKNOWN";
+       }
+}
+
+/* get the name of the l4 packet type */
+const char *rte_get_ptype_l4_name(uint32_t ptype)
+{
+       switch (ptype & RTE_PTYPE_L4_MASK) {
+       case RTE_PTYPE_L4_TCP: return "L4_TCP";
+       case RTE_PTYPE_L4_UDP: return "L4_UDP";
+       case RTE_PTYPE_L4_FRAG: return "L4_FRAG";
+       case RTE_PTYPE_L4_SCTP: return "L4_SCTP";
+       case RTE_PTYPE_L4_ICMP: return "L4_ICMP";
+       case RTE_PTYPE_L4_NONFRAG: return "L4_NONFRAG";
+       default: return "L4_UNKNOWN";
+       }
+}
+
+/* get the name of the tunnel packet type */
+const char *rte_get_ptype_tunnel_name(uint32_t ptype)
+{
+       switch (ptype & RTE_PTYPE_TUNNEL_MASK) {
+       case RTE_PTYPE_TUNNEL_IP: return "TUNNEL_IP";
+       case RTE_PTYPE_TUNNEL_GRE: return "TUNNEL_GRE";
+       case RTE_PTYPE_TUNNEL_VXLAN: return "TUNNEL_VXLAN";
+       case RTE_PTYPE_TUNNEL_NVGRE: return "TUNNEL_NVGRE";
+       case RTE_PTYPE_TUNNEL_GENEVE: return "TUNNEL_GENEVE";
+       case RTE_PTYPE_TUNNEL_GRENAT: return "TUNNEL_GRENAT";
+       default: return "TUNNEL_UNKNOWN";
+       }
+}
+
+/* get the name of the inner_l2 packet type */
+const char *rte_get_ptype_inner_l2_name(uint32_t ptype)
+{
+       switch (ptype & RTE_PTYPE_INNER_L2_MASK) {
+       case RTE_PTYPE_INNER_L2_ETHER: return "INNER_L2_ETHER";
+       case RTE_PTYPE_INNER_L2_ETHER_VLAN: return "INNER_L2_ETHER_VLAN";
+       case RTE_PTYPE_INNER_L2_ETHER_QINQ: return "INNER_L2_ETHER_QINQ";
+       default: return "INNER_L2_UNKNOWN";
+       }
+}
+
+/* get the name of the inner_l3 packet type */
+const char *rte_get_ptype_inner_l3_name(uint32_t ptype)
+{
+       switch (ptype & RTE_PTYPE_INNER_L3_MASK) {
+       case RTE_PTYPE_INNER_L3_IPV4: return "INNER_L3_IPV4";
+       case RTE_PTYPE_INNER_L3_IPV4_EXT: return "INNER_L3_IPV4_EXT";
+       case RTE_PTYPE_INNER_L3_IPV6: return "INNER_L3_IPV6";
+       case RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN:
+               return "INNER_L3_IPV4_EXT_UNKNOWN";
+       case RTE_PTYPE_INNER_L3_IPV6_EXT: return "INNER_L3_IPV6_EXT";
+       case RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN:
+               return "INNER_L3_IPV6_EXT_UNKNOWN";
+       default: return "INNER_L3_UNKNOWN";
+       }
+}
+
+/* get the name of the inner_l4 packet type */
+const char *rte_get_ptype_inner_l4_name(uint32_t ptype)
+{
+       switch (ptype & RTE_PTYPE_INNER_L4_MASK) {
+       case RTE_PTYPE_INNER_L4_TCP: return "INNER_L4_TCP";
+       case RTE_PTYPE_INNER_L4_UDP: return "INNER_L4_UDP";
+       case RTE_PTYPE_INNER_L4_FRAG: return "INNER_L4_FRAG";
+       case RTE_PTYPE_INNER_L4_SCTP: return "INNER_L4_SCTP";
+       case RTE_PTYPE_INNER_L4_ICMP: return "INNER_L4_ICMP";
+       case RTE_PTYPE_INNER_L4_NONFRAG: return "INNER_L4_NONFRAG";
+       default: return "INNER_L4_UNKNOWN";
+       }
+}
+
+/* write the packet type name into the buffer */
+int rte_get_ptype_name(uint32_t ptype, char *buf, size_t buflen)
+{
+       int ret;
+
+       if (buflen == 0)
+               return -1;
+
+       buf[0] = '\0';
+       if ((ptype & RTE_PTYPE_ALL_MASK) == RTE_PTYPE_UNKNOWN) {
+               ret = snprintf(buf, buflen, "UNKNOWN");
+               if (ret < 0)
+                       return -1;
+               if ((size_t)ret >= buflen)
+                       return -1;
+               return 0;
+       }
+
+       if ((ptype & RTE_PTYPE_L2_MASK) != 0) {
+               ret = snprintf(buf, buflen, "%s ",
+                       rte_get_ptype_l2_name(ptype));
+               if (ret < 0)
+                       return -1;
+               if ((size_t)ret >= buflen)
+                       return -1;
+               buf += ret;
+               buflen -= ret;
+       }
+       if ((ptype & RTE_PTYPE_L3_MASK) != 0) {
+               ret = snprintf(buf, buflen, "%s ",
+                       rte_get_ptype_l3_name(ptype));
+               if (ret < 0)
+                       return -1;
+               if ((size_t)ret >= buflen)
+                       return -1;
+               buf += ret;
+               buflen -= ret;
+       }
+       if ((ptype & RTE_PTYPE_L4_MASK) != 0) {
+               ret = snprintf(buf, buflen, "%s ",
+                       rte_get_ptype_l4_name(ptype));
+               if (ret < 0)
+                       return -1;
+               if ((size_t)ret >= buflen)
+                       return -1;
+               buf += ret;
+               buflen -= ret;
+       }
+       if ((ptype & RTE_PTYPE_TUNNEL_MASK) != 0) {
+               ret = snprintf(buf, buflen, "%s ",
+                       rte_get_ptype_tunnel_name(ptype));
+               if (ret < 0)
+                       return -1;
+               if ((size_t)ret >= buflen)
+                       return -1;
+               buf += ret;
+               buflen -= ret;
+       }
+       if ((ptype & RTE_PTYPE_INNER_L2_MASK) != 0) {
+               ret = snprintf(buf, buflen, "%s ",
+                       rte_get_ptype_inner_l2_name(ptype));
+               if (ret < 0)
+                       return -1;
+               if ((size_t)ret >= buflen)
+                       return -1;
+               buf += ret;
+               buflen -= ret;
+       }
+       if ((ptype & RTE_PTYPE_INNER_L3_MASK) != 0) {
+               ret = snprintf(buf, buflen, "%s ",
+                       rte_get_ptype_inner_l3_name(ptype));
+               if (ret < 0)
+                       return -1;
+               if ((size_t)ret >= buflen)
+                       return -1;
+               buf += ret;
+               buflen -= ret;
+       }
+       if ((ptype & RTE_PTYPE_INNER_L4_MASK) != 0) {
+               ret = snprintf(buf, buflen, "%s ",
+                       rte_get_ptype_inner_l4_name(ptype));
+               if (ret < 0)
+                       return -1;
+               if ((size_t)ret >= buflen)
+                       return -1;
+               buf += ret;
+               buflen -= ret;
+       }
+
+       return 0;
+}
diff --git a/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.h b/src/dpdk/lib/librte_mbuf/rte_mbuf_ptype.h
new file mode 100644 (file)
index 0000000..ff6de9d
--- /dev/null
@@ -0,0 +1,668 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation.
+ *   Copyright 2014-2016 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MBUF_PTYPE_H_
+#define _RTE_MBUF_PTYPE_H_
+
+/**
+ * @file
+ * RTE Mbuf Packet Types
+ *
+ * This file contains declarations for features related to mbuf packet
+ * types. The packet type gives information about the data carried by the
+ * mbuf, and is stored in the mbuf in a 32 bits field.
+ *
+ * The 32 bits are divided into several fields to mark packet types. Note that
+ * each field is indexical.
+ * - Bit 3:0 is for L2 types.
+ * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
+ * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types.
+ * - Bit 15:12 is for tunnel types.
+ * - Bit 19:16 is for inner L2 types.
+ * - Bit 23:20 is for inner L3 types.
+ * - Bit 27:24 is for inner L4 types.
+ * - Bit 31:28 is reserved.
+ *
+ * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT,
+ * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP
+ * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits.
+ *
+ * Note that L3 types values are selected for checking IPV4/IPV6 header from
+ * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and
+ * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values.
+ *
+ * Note that the packet types of the same packet recognized by different
+ * hardware may be different, as different hardware may have different
+ * capability of packet type recognition.
+ *
+ * examples:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=0x29
+ * | 'version'=6, 'next header'=0x3A
+ * | 'ICMPv6 header'>
+ * will be recognized on i40e hardware as packet type combination of,
+ * RTE_PTYPE_L2_ETHER |
+ * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ * RTE_PTYPE_TUNNEL_IP |
+ * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ * RTE_PTYPE_INNER_L4_ICMP.
+ *
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=0x2F
+ * | 'GRE header'
+ * | 'version'=6, 'next header'=0x11
+ * | 'UDP header'>
+ * will be recognized on i40e hardware as packet type combination of,
+ * RTE_PTYPE_L2_ETHER |
+ * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ * RTE_PTYPE_TUNNEL_GRENAT |
+ * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ * RTE_PTYPE_INNER_L4_UDP.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * No packet type information.
+ */
+#define RTE_PTYPE_UNKNOWN                   0x00000000
+/**
+ * Ethernet packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=[0x0800|0x86DD]>
+ */
+#define RTE_PTYPE_L2_ETHER                  0x00000001
+/**
+ * Ethernet packet type for time sync.
+ *
+ * Packet format:
+ * <'ether type'=0x88F7>
+ */
+#define RTE_PTYPE_L2_ETHER_TIMESYNC         0x00000002
+/**
+ * ARP (Address Resolution Protocol) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0806>
+ */
+#define RTE_PTYPE_L2_ETHER_ARP              0x00000003
+/**
+ * LLDP (Link Layer Discovery Protocol) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x88CC>
+ */
+#define RTE_PTYPE_L2_ETHER_LLDP             0x00000004
+/**
+ * NSH (Network Service Header) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x894F>
+ */
+#define RTE_PTYPE_L2_ETHER_NSH              0x00000005
+/**
+ * VLAN packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x8100]>
+ */
+#define RTE_PTYPE_L2_ETHER_VLAN             0x00000006
+/**
+ * QinQ packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x88A8]>
+ */
+#define RTE_PTYPE_L2_ETHER_QINQ             0x00000007
+/**
+ * Mask of layer 2 packet types.
+ * It is used for outer packet for tunneling cases.
+ */
+#define RTE_PTYPE_L2_MASK                   0x0000000f
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for outer packet for tunneling cases, and does not contain any
+ * header option.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=5>
+ */
+#define RTE_PTYPE_L3_IPV4                   0x00000010
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for outer packet for tunneling cases, and contains header
+ * options.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[6-15], 'options'>
+ */
+#define RTE_PTYPE_L3_IPV4_EXT               0x00000030
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for outer packet for tunneling cases, and does not contain any
+ * extension header.
+ *
+ * Packet format:
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=0x3B>
+ */
+#define RTE_PTYPE_L3_IPV6                   0x00000040
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for outer packet for tunneling cases, and may or maynot contain
+ * header options.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[5-15], <'options'>>
+ */
+#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN       0x00000090
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for outer packet for tunneling cases, and contains extension
+ * headers.
+ *
+ * Packet format:
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ *   'extension headers'>
+ */
+#define RTE_PTYPE_L3_IPV6_EXT               0x000000c0
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for outer packet for tunneling cases, and may or maynot contain
+ * extension headers.
+ *
+ * Packet format:
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ *   <'extension headers'>>
+ */
+#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN       0x000000e0
+/**
+ * Mask of layer 3 packet types.
+ * It is used for outer packet for tunneling cases.
+ */
+#define RTE_PTYPE_L3_MASK                   0x000000f0
+/**
+ * TCP (Transmission Control Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=6, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=6>
+ */
+#define RTE_PTYPE_L4_TCP                    0x00000100
+/**
+ * UDP (User Datagram Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17>
+ */
+#define RTE_PTYPE_L4_UDP                    0x00000200
+/**
+ * Fragmented IP (Internet Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * It refers to those packets of any IP types, which can be recognized as
+ * fragmented. A fragmented packet cannot be recognized as any other L4 types
+ * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP,
+ * RTE_PTYPE_L4_NONFRAG).
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'MF'=1>
+ * or,
+ * <'ether type'=0x0800
+ * | 'version'=4, 'frag_offset'!=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=44>
+ */
+#define RTE_PTYPE_L4_FRAG                   0x00000300
+/**
+ * SCTP (Stream Control Transmission Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=132, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=132>
+ */
+#define RTE_PTYPE_L4_SCTP                   0x00000400
+/**
+ * ICMP (Internet Control Message Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=1, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=1>
+ */
+#define RTE_PTYPE_L4_ICMP                   0x00000500
+/**
+ * Non-fragmented IP (Internet Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * It refers to those packets of any IP types, while cannot be recognized as
+ * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP,
+ * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP).
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'!=[6|17|44|132|1]>
+ */
+#define RTE_PTYPE_L4_NONFRAG                0x00000600
+/**
+ * Mask of layer 4 packet types.
+ * It is used for outer packet for tunneling cases.
+ */
+#define RTE_PTYPE_L4_MASK                   0x00000f00
+/**
+ * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=[4|41]>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[4|41]>
+ */
+#define RTE_PTYPE_TUNNEL_IP                 0x00001000
+/**
+ * GRE (Generic Routing Encapsulation) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=47>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=47>
+ */
+#define RTE_PTYPE_TUNNEL_GRE                0x00002000
+/**
+ * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=4798>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=4798>
+ */
+#define RTE_PTYPE_TUNNEL_VXLAN              0x00003000
+/**
+ * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling
+ * packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=47
+ * | 'protocol type'=0x6558>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=47
+ * | 'protocol type'=0x6558'>
+ */
+#define RTE_PTYPE_TUNNEL_NVGRE              0x00004000
+/**
+ * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=6081>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=6081>
+ */
+#define RTE_PTYPE_TUNNEL_GENEVE             0x00005000
+/**
+ * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area
+ * Network) or GRE (Generic Routing Encapsulation) could be recognized as this
+ * packet type, if they can not be recognized independently as of hardware
+ * capability.
+ */
+#define RTE_PTYPE_TUNNEL_GRENAT             0x00006000
+/**
+ * Mask of tunneling packet types.
+ */
+#define RTE_PTYPE_TUNNEL_MASK               0x0000f000
+/**
+ * Ethernet packet type.
+ * It is used for inner packet type only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=[0x800|0x86DD]>
+ */
+#define RTE_PTYPE_INNER_L2_ETHER            0x00010000
+/**
+ * Ethernet packet type with VLAN (Virtual Local Area Network) tag.
+ *
+ * Packet format (inner only):
+ * <'ether type'=[0x800|0x86DD], vlan=[1-4095]>
+ */
+#define RTE_PTYPE_INNER_L2_ETHER_VLAN       0x00020000
+/**
+ * QinQ packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x88A8]>
+ */
+#define RTE_PTYPE_INNER_L2_ETHER_QINQ       0x00030000
+/**
+ * Mask of inner layer 2 packet types.
+ */
+#define RTE_PTYPE_INNER_L2_MASK             0x000f0000
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for inner packet only, and does not contain any header option.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=5>
+ */
+#define RTE_PTYPE_INNER_L3_IPV4             0x00100000
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for inner packet only, and contains header options.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[6-15], 'options'>
+ */
+#define RTE_PTYPE_INNER_L3_IPV4_EXT         0x00200000
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for inner packet only, and does not contain any extension header.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=0x3B>
+ */
+#define RTE_PTYPE_INNER_L3_IPV6             0x00300000
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for inner packet only, and may or maynot contain header options.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[5-15], <'options'>>
+ */
+#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for inner packet only, and contains extension headers.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ *   'extension headers'>
+ */
+#define RTE_PTYPE_INNER_L3_IPV6_EXT         0x00500000
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for inner packet only, and may or maynot contain extension
+ * headers.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ *   <'extension headers'>>
+ */
+#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000
+/**
+ * Mask of inner layer 3 packet types.
+ */
+#define RTE_PTYPE_INNER_L3_MASK             0x00f00000
+/**
+ * TCP (Transmission Control Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=6, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=6>
+ */
+#define RTE_PTYPE_INNER_L4_TCP              0x01000000
+/**
+ * UDP (User Datagram Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17>
+ */
+#define RTE_PTYPE_INNER_L4_UDP              0x02000000
+/**
+ * Fragmented IP (Internet Protocol) packet type.
+ * It is used for inner packet only, and may or maynot have layer 4 packet.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'MF'=1>
+ * or,
+ * <'ether type'=0x0800
+ * | 'version'=4, 'frag_offset'!=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=44>
+ */
+#define RTE_PTYPE_INNER_L4_FRAG             0x03000000
+/**
+ * SCTP (Stream Control Transmission Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=132, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=132>
+ */
+#define RTE_PTYPE_INNER_L4_SCTP             0x04000000
+/**
+ * ICMP (Internet Control Message Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=1, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=1>
+ */
+#define RTE_PTYPE_INNER_L4_ICMP             0x05000000
+/**
+ * Non-fragmented IP (Internet Protocol) packet type.
+ * It is used for inner packet only, and may or maynot have other unknown layer
+ * 4 packet types.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'!=[6|17|44|132|1]>
+ */
+#define RTE_PTYPE_INNER_L4_NONFRAG          0x06000000
+/**
+ * Mask of inner layer 4 packet types.
+ */
+#define RTE_PTYPE_INNER_L4_MASK             0x0f000000
+/**
+ * All valid layer masks.
+ */
+#define RTE_PTYPE_ALL_MASK                  0x0fffffff
+
+/**
+ * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
+ * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can
+ * determine if it is an IPV4 packet.
+ */
+#define  RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4)
+
+/**
+ * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
+ * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can
+ * determine if it is an IPV4 packet.
+ */
+#define  RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6)
+
+/* Check if it is a tunneling packet */
+#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) &                                \
+       (RTE_PTYPE_TUNNEL_MASK |                                        \
+               RTE_PTYPE_INNER_L2_MASK |                               \
+               RTE_PTYPE_INNER_L3_MASK |                               \
+               RTE_PTYPE_INNER_L4_MASK))
+
+/**
+ * Get the name of the l2 packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_l2_name(uint32_t ptype);
+
+/**
+ * Get the name of the l3 packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_l3_name(uint32_t ptype);
+
+/**
+ * Get the name of the l4 packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_l4_name(uint32_t ptype);
+
+/**
+ * Get the name of the tunnel packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_tunnel_name(uint32_t ptype);
+
+/**
+ * Get the name of the inner_l2 packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_inner_l2_name(uint32_t ptype);
+
+/**
+ * Get the name of the inner_l3 packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_inner_l3_name(uint32_t ptype);
+
+/**
+ * Get the name of the inner_l4 packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_inner_l4_name(uint32_t ptype);
+
+/**
+ * Write the packet type name into the buffer
+ *
+ * @param ptype
+ *   The packet type value.
+ * @param buf
+ *   The buffer where the string is written.
+ * @param buflen
+ *   The length of the buffer.
+ * @return
+ *   - 0 on success
+ *   - (-1) if the buffer is too small
+ */
+int rte_get_ptype_name(uint32_t ptype, char *buf, size_t buflen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MBUF_PTYPE_H_ */
index 2e28e2e..1c2aed8 100644 (file)
@@ -55,7 +55,6 @@
 #include <rte_per_lcore.h>
 #include <rte_lcore.h>
 #include <rte_branch_prediction.h>
-#include <rte_ring.h>
 #include <rte_errno.h>
 #include <rte_string_fns.h>
 #include <rte_spinlock.h>
@@ -429,7 +428,7 @@ rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
 
                /* populate with the largest group of contiguous pages */
                for (n = 1; (i + n) < pg_num &&
-                            paddr[i] + pg_sz == paddr[i+n]; n++)
+                            paddr[i + n - 1] + pg_sz == paddr[i + n]; n++)
                        ;
 
                ret = rte_mempool_populate_phys(mp, vaddr + i * pg_sz,
@@ -579,8 +578,10 @@ rte_mempool_populate_default(struct rte_mempool *mp)
                                mz->len, pg_sz,
                                rte_mempool_memchunk_mz_free,
                                (void *)(uintptr_t)mz);
-               if (ret < 0)
+               if (ret < 0) {
+                       rte_memzone_free(mz);
                        goto fail;
+               }
        }
 
        return mp->size;
@@ -817,7 +818,6 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
                goto exit_unlock;
        }
        mp->mz = mz;
-       mp->socket_id = socket_id;
        mp->size = n;
        mp->flags = flags;
        mp->socket_id = socket_id;
@@ -879,7 +879,7 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
         * Since we have 4 combinations of the SP/SC/MP/MC examine the flags to
         * set the correct index into the table of ops structs.
         */
-       if (flags & (MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET))
+       if ((flags & MEMPOOL_F_SP_PUT) && (flags & MEMPOOL_F_SC_GET))
                rte_mempool_set_ops_byname(mp, "ring_sp_sc", NULL);
        else if (flags & MEMPOOL_F_SP_PUT)
                rte_mempool_set_ops_byname(mp, "ring_sp_mc", NULL);
@@ -909,9 +909,8 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
 /*
  * Create the mempool over already allocated chunk of memory.
  * That external memory buffer can consists of physically disjoint pages.
- * Setting vaddr to NULL, makes mempool to fallback to original behaviour
- * and allocate space for mempool and it's elements as one big chunk of
- * physically continuos memory.
+ * Setting vaddr to NULL, makes mempool to fallback to rte_mempool_create()
+ * behavior.
  */
 struct rte_mempool *
 rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
index 059ad9e..d0f5b27 100644 (file)
@@ -75,6 +75,7 @@
 #include <rte_branch_prediction.h>
 #include <rte_ring.h>
 #include <rte_memcpy.h>
+#include <rte_common.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -216,6 +217,7 @@ struct rte_mempool {
         * RTE_MEMPOOL_NAMESIZE next time the ABI changes
         */
        char name[RTE_MEMZONE_NAMESIZE]; /**< Name of mempool. */
+       RTE_STD_C11
        union {
                void *pool_data;         /**< Ring or pool to store objects. */
                uint64_t pool_id;        /**< External mempool identifier. */
@@ -355,7 +357,7 @@ void rte_mempool_check_cookies(const struct rte_mempool *mp,
  * Prototype for implementation specific data provisioning function.
  *
  * The function should provide the implementation specific memory for
- * for use by the other mempool ops functions in a given mempool ops struct.
+ * use by the other mempool ops functions in a given mempool ops struct.
  * E.g. the default ops provides an instance of the rte_ring for this purpose.
  * it will most likely point to a different type of data structure, and
  * will be transparent to the application programmer.
@@ -549,7 +551,7 @@ int rte_mempool_register_ops(const struct rte_mempool_ops *ops);
 /**
  * Macro to statically register the ops of a mempool handler.
  * Note that the rte_mempool_register_ops fails silently here when
- * more then RTE_MEMPOOL_MAX_OPS_IDX is registered.
+ * more than RTE_MEMPOOL_MAX_OPS_IDX is registered.
  */
 #define MEMPOOL_REGISTER_OPS(ops)                                      \
        void mp_hdlr_init_##ops(void);                                  \
@@ -587,10 +589,8 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *);
 /**
  * Create a new mempool named *name* in memory.
  *
- * This function uses ``memzone_reserve()`` to allocate memory. The
+ * This function uses ``rte_memzone_reserve()`` to allocate memory. The
  * pool contains n elements of elt_size. Its size is set to n.
- * All elements of the mempool are allocated together with the mempool header,
- * in one physically continuous chunk of memory.
  *
  * @param name
  *   The name of the mempool.
@@ -610,9 +610,7 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *);
  *   never be used. The access to the per-lcore table is of course
  *   faster than the multi-producer/consumer pool. The cache can be
  *   disabled if the cache_size argument is set to 0; it can be useful to
- *   avoid losing objects in cache. Note that even if not used, the
- *   memory space for cache is always reserved in a mempool structure,
- *   except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0.
+ *   avoid losing objects in cache.
  * @param private_data_size
  *   The size of the private data appended after the mempool
  *   structure. This is useful for storing some private data after the
@@ -746,7 +744,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
  *
  * The mempool is allocated and initialized, but it is not populated: no
  * memory is allocated for the mempool elements. The user has to call
- * rte_mempool_populate_*() or to add memory chunks to the pool. Once
+ * rte_mempool_populate_*() to add memory chunks to the pool. Once
  * populated, the user may also want to initialize each object with
  * rte_mempool_obj_iter().
  *
@@ -798,6 +796,10 @@ rte_mempool_free(struct rte_mempool *mp);
  * Add a virtually and physically contiguous memory chunk in the pool
  * where objects can be instanciated.
  *
+ * If the given physical address is unknown (paddr = RTE_BAD_PHYS_ADDR),
+ * the chunk doesn't need to be physically contiguous (only virtually),
+ * and allocated objects may span two pages.
+ *
  * @param mp
  *   A pointer to the mempool structure.
  * @param vaddr
@@ -946,7 +948,7 @@ uint32_t rte_mempool_mem_iter(struct rte_mempool *mp,
        rte_mempool_mem_cb_t *mem_cb, void *mem_cb_arg);
 
 /**
- * Dump the status of the mempool to the console.
+ * Dump the status of the mempool to a file.
  *
  * @param f
  *   A pointer to a file for output
@@ -1036,19 +1038,15 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
  */
 static inline void __attribute__((always_inline))
 __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
-                     unsigned n, struct rte_mempool_cache *cache, int flags)
+                     unsigned n, struct rte_mempool_cache *cache)
 {
        void **cache_objs;
 
        /* increment stat now, adding in mempool always success */
        __MEMPOOL_STAT_ADD(mp, put, n);
 
-       /* No cache provided or single producer */
-       if (unlikely(cache == NULL || flags & MEMPOOL_F_SP_PUT))
-               goto ring_enqueue;
-
-       /* Go straight to ring if put would overflow mem allocated for cache */
-       if (unlikely(n > RTE_MEMPOOL_CACHE_MAX_SIZE))
+       /* No cache provided or if put would overflow mem allocated for cache */
+       if (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE))
                goto ring_enqueue;
 
        cache_objs = &cache->objs[cache->len];
@@ -1102,10 +1100,11 @@ ring_enqueue:
  */
 static inline void __attribute__((always_inline))
 rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
-                       unsigned n, struct rte_mempool_cache *cache, int flags)
+                       unsigned n, struct rte_mempool_cache *cache,
+                       __rte_unused int flags)
 {
        __mempool_check_cookies(mp, obj_table, n, 0);
-       __mempool_generic_put(mp, obj_table, n, cache, flags);
+       __mempool_generic_put(mp, obj_table, n, cache);
 }
 
 /**
@@ -1242,15 +1241,14 @@ rte_mempool_put(struct rte_mempool *mp, void *obj)
  */
 static inline int __attribute__((always_inline))
 __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
-                     unsigned n, struct rte_mempool_cache *cache, int flags)
+                     unsigned n, struct rte_mempool_cache *cache)
 {
        int ret;
        uint32_t index, len;
        void **cache_objs;
 
-       /* No cache provided or single consumer */
-       if (unlikely(cache == NULL || flags & MEMPOOL_F_SC_GET ||
-                    n >= cache->size))
+       /* No cache provided or cannot be satisfied from cache */
+       if (unlikely(cache == NULL || n >= cache->size))
                goto ring_dequeue;
 
        cache_objs = cache->objs;
@@ -1324,10 +1322,10 @@ ring_dequeue:
  */
 static inline int __attribute__((always_inline))
 rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n,
-                       struct rte_mempool_cache *cache, int flags)
+                       struct rte_mempool_cache *cache, __rte_unused int flags)
 {
        int ret;
-       ret = __mempool_generic_get(mp, obj_table, n, cache, flags);
+       ret = __mempool_generic_get(mp, obj_table, n, cache);
        if (ret == 0)
                __mempool_check_cookies(mp, obj_table, n, 1);
        return ret;
index 5fd8af2..817f77e 100644 (file)
@@ -118,7 +118,7 @@ stack_dequeue(struct rte_mempool *mp, void **obj_table,
 
        s->len -= n;
        rte_spinlock_unlock(&s->sl);
-       return n;
+       return 0;
 }
 
 static unsigned
similarity index 98%
rename from src/dpdk/lib/librte_ether/rte_ether.h
rename to src/dpdk/lib/librte_net/rte_ether.h
index 1d62d8e..ff3d065 100644 (file)
@@ -84,7 +84,7 @@ extern "C" {
  * See http://standards.ieee.org/regauth/groupmac/tutorial.html
  */
 struct ether_addr {
-       uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Address bytes in transmission order */
+       uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Addr bytes in tx order */
 } __attribute__((__packed__));
 
 #define ETHER_LOCAL_ADMIN_ADDR 0x02 /**< Locally assigned Eth. address. */
@@ -224,7 +224,7 @@ static inline int is_local_admin_ether_addr(const struct ether_addr *ea)
  */
 static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea)
 {
-       return is_unicast_ether_addr(ea) && (! is_zero_ether_addr(ea));
+       return is_unicast_ether_addr(ea) && (!is_zero_ether_addr(ea));
 }
 
 /**
@@ -236,7 +236,7 @@ static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea)
 static inline void eth_random_addr(uint8_t *addr)
 {
        uint64_t rand = rte_rand();
-       uint8_t *p = (uint8_t*)&rand;
+       uint8_t *p = (uint8_t *)&rand;
 
        rte_memcpy(addr, p, ETHER_ADDR_LEN);
        addr[0] &= ~ETHER_GROUP_ADDR;       /* clear multicast bit */
@@ -329,6 +329,7 @@ struct vxlan_hdr {
 #define ETHER_TYPE_ARP  0x0806 /**< Arp Protocol. */
 #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
 #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
+#define ETHER_TYPE_QINQ 0x88A8 /**< IEEE 802.1ad QinQ tagging. */
 #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
 #define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
 #define ETHER_TYPE_TEB  0x6558 /**< Transparent Ethernet Bridging. */
similarity index 60%
rename from src/dpdk/lib/librte_eal/common/include/rte_warnings.h
rename to src/dpdk/lib/librte_net/rte_gre.h
index 54b545c..46568ff 100644 (file)
@@ -1,8 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
+ *   Copyright 2016 6WIND S.A.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/**
- * @file
- * Definitions of warnings for use of various insecure functions
- */
-
-#ifndef _RTE_WARNINGS_H_
-#define _RTE_WARNINGS_H_
-
-#ifdef RTE_INSECURE_FUNCTION_WARNING
-
-/* we need to include all used standard header files so that they appear
- * _before_ we poison the function names.
- */
+#ifndef _RTE_GRE_H_
+#define _RTE_GRE_H_
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <errno.h>
-#ifdef RTE_EXEC_ENV_LINUXAPP
-#include <dirent.h>
-#endif
-
-/* the following function are deemed not fully secure for use e.g. they
- * do not always null-terminate arguments */
-#pragma GCC poison sprintf strtok snprintf vsnprintf
-#pragma GCC poison strlen strcpy strcat
-#pragma GCC poison sscanf
-
-/* other unsafe functions may be implemented as macros so just undef them */
-#ifdef strsep
-#undef strsep
-#else
-#pragma GCC poison strsep
-#endif
+#include <stdint.h>
+#include <rte_byteorder.h>
 
-#ifdef strncpy
-#undef strncpy
-#else
-#pragma GCC poison strncpy
+#ifdef __cplusplus
+extern "C" {
 #endif
 
-#ifdef strncat
-#undef strncat
-#else
-#pragma GCC poison strncat
+/**
+ * GRE Header
+ */
+struct gre_hdr {
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+       uint16_t res2:4; /**< Reserved */
+       uint16_t s:1;    /**< Sequence Number Present bit */
+       uint16_t k:1;    /**< Key Present bit */
+       uint16_t res1:1; /**< Reserved */
+       uint16_t c:1;    /**< Checksum Present bit */
+       uint16_t ver:3;  /**< Version Number */
+       uint16_t res3:5; /**< Reserved */
+#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+       uint16_t c:1;    /**< Checksum Present bit */
+       uint16_t res1:1; /**< Reserved */
+       uint16_t k:1;    /**< Key Present bit */
+       uint16_t s:1;    /**< Sequence Number Present bit */
+       uint16_t res2:4; /**< Reserved */
+       uint16_t res3:5; /**< Reserved */
+       uint16_t ver:3;  /**< Version Number */
 #endif
+       uint16_t proto;  /**< Protocol Type */
+} __attribute__((__packed__));
 
+#ifdef __cplusplus
+}
 #endif
 
-#endif /* RTE_WARNINGS_H */
+#endif /* RTE_GRE_H_ */
index 5b7554a..4491b86 100644 (file)
@@ -229,6 +229,77 @@ rte_raw_cksum(const void *buf, size_t len)
        return __rte_raw_cksum_reduce(sum);
 }
 
+/**
+ * Compute the raw (non complemented) checksum of a packet.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @param off
+ *   The offset in bytes to start the checksum.
+ * @param len
+ *   The length in bytes of the data to ckecksum.
+ * @param cksum
+ *   A pointer to the checksum, filled on success.
+ * @return
+ *   0 on success, -1 on error (bad length or offset).
+ */
+static inline int
+rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len,
+       uint16_t *cksum)
+{
+       const struct rte_mbuf *seg;
+       const char *buf;
+       uint32_t sum, tmp;
+       uint32_t seglen, done;
+
+       /* easy case: all data in the first segment */
+       if (off + len <= rte_pktmbuf_data_len(m)) {
+               *cksum = rte_raw_cksum(rte_pktmbuf_mtod_offset(m,
+                               const char *, off), len);
+               return 0;
+       }
+
+       if (unlikely(off + len > rte_pktmbuf_pkt_len(m)))
+               return -1; /* invalid params, return a dummy value */
+
+       /* else browse the segment to find offset */
+       seglen = 0;
+       for (seg = m; seg != NULL; seg = seg->next) {
+               seglen = rte_pktmbuf_data_len(seg);
+               if (off < seglen)
+                       break;
+               off -= seglen;
+       }
+       seglen -= off;
+       buf = rte_pktmbuf_mtod_offset(seg, const char *, off);
+       if (seglen >= len) {
+               /* all in one segment */
+               *cksum = rte_raw_cksum(buf, len);
+               return 0;
+       }
+
+       /* hard case: process checksum of several segments */
+       sum = 0;
+       done = 0;
+       for (;;) {
+               tmp = __rte_raw_cksum(buf, seglen, 0);
+               if (done & 1)
+                       tmp = rte_bswap16(tmp);
+               sum += tmp;
+               done += seglen;
+               if (done == len)
+                       break;
+               seg = seg->next;
+               buf = rte_pktmbuf_mtod(seg, const char *);
+               seglen = rte_pktmbuf_data_len(seg);
+               if (seglen > len - done)
+                       seglen = len - done;
+       }
+
+       *cksum = __rte_raw_cksum_reduce(sum);
+       return 0;
+}
+
 /**
  * Process the IPv4 checksum of an IPv4 header.
  *
diff --git a/src/dpdk/lib/librte_net/rte_net.c b/src/dpdk/lib/librte_net/rte_net.c
new file mode 100644 (file)
index 0000000..a8c7aff
--- /dev/null
@@ -0,0 +1,517 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+#include <rte_mbuf_ptype.h>
+#include <rte_byteorder.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_sctp.h>
+#include <rte_gre.h>
+#include <rte_net.h>
+
+/* get l3 packet type from ip6 next protocol */
+static uint32_t
+ptype_l3_ip6(uint8_t ip6_proto)
+{
+       static const uint32_t ip6_ext_proto_map[256] = {
+               [IPPROTO_HOPOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+               [IPPROTO_ROUTING] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+               [IPPROTO_FRAGMENT] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+               [IPPROTO_ESP] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+               [IPPROTO_AH] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+               [IPPROTO_DSTOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+       };
+
+       return RTE_PTYPE_L3_IPV6 + ip6_ext_proto_map[ip6_proto];
+}
+
+/* get l3 packet type from ip version and header length */
+static uint32_t
+ptype_l3_ip(uint8_t ipv_ihl)
+{
+       static const uint32_t ptype_l3_ip_proto_map[256] = {
+               [0x45] = RTE_PTYPE_L3_IPV4,
+               [0x46] = RTE_PTYPE_L3_IPV4_EXT,
+               [0x47] = RTE_PTYPE_L3_IPV4_EXT,
+               [0x48] = RTE_PTYPE_L3_IPV4_EXT,
+               [0x49] = RTE_PTYPE_L3_IPV4_EXT,
+               [0x4A] = RTE_PTYPE_L3_IPV4_EXT,
+               [0x4B] = RTE_PTYPE_L3_IPV4_EXT,
+               [0x4C] = RTE_PTYPE_L3_IPV4_EXT,
+               [0x4D] = RTE_PTYPE_L3_IPV4_EXT,
+               [0x4E] = RTE_PTYPE_L3_IPV4_EXT,
+               [0x4F] = RTE_PTYPE_L3_IPV4_EXT,
+       };
+
+       return ptype_l3_ip_proto_map[ipv_ihl];
+}
+
+/* get l4 packet type from proto */
+static uint32_t
+ptype_l4(uint8_t proto)
+{
+       static const uint32_t ptype_l4_proto[256] = {
+               [IPPROTO_UDP] = RTE_PTYPE_L4_UDP,
+               [IPPROTO_TCP] = RTE_PTYPE_L4_TCP,
+               [IPPROTO_SCTP] = RTE_PTYPE_L4_SCTP,
+       };
+
+       return ptype_l4_proto[proto];
+}
+
+/* get inner l3 packet type from ip6 next protocol */
+static uint32_t
+ptype_inner_l3_ip6(uint8_t ip6_proto)
+{
+       static const uint32_t ptype_inner_ip6_ext_proto_map[256] = {
+               [IPPROTO_HOPOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+                       RTE_PTYPE_INNER_L3_IPV6,
+               [IPPROTO_ROUTING] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+                       RTE_PTYPE_INNER_L3_IPV6,
+               [IPPROTO_FRAGMENT] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+                       RTE_PTYPE_INNER_L3_IPV6,
+               [IPPROTO_ESP] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+                       RTE_PTYPE_INNER_L3_IPV6,
+               [IPPROTO_AH] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+                       RTE_PTYPE_INNER_L3_IPV6,
+               [IPPROTO_DSTOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+                       RTE_PTYPE_INNER_L3_IPV6,
+       };
+
+       return RTE_PTYPE_INNER_L3_IPV6 +
+               ptype_inner_ip6_ext_proto_map[ip6_proto];
+}
+
+/* get inner l3 packet type from ip version and header length */
+static uint32_t
+ptype_inner_l3_ip(uint8_t ipv_ihl)
+{
+       static const uint32_t ptype_inner_l3_ip_proto_map[256] = {
+               [0x45] = RTE_PTYPE_INNER_L3_IPV4,
+               [0x46] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+               [0x47] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+               [0x48] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+               [0x49] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+               [0x4A] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+               [0x4B] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+               [0x4C] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+               [0x4D] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+               [0x4E] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+               [0x4F] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+       };
+
+       return ptype_inner_l3_ip_proto_map[ipv_ihl];
+}
+
+/* get inner l4 packet type from proto */
+static uint32_t
+ptype_inner_l4(uint8_t proto)
+{
+       static const uint32_t ptype_inner_l4_proto[256] = {
+               [IPPROTO_UDP] = RTE_PTYPE_INNER_L4_UDP,
+               [IPPROTO_TCP] = RTE_PTYPE_INNER_L4_TCP,
+               [IPPROTO_SCTP] = RTE_PTYPE_INNER_L4_SCTP,
+       };
+
+       return ptype_inner_l4_proto[proto];
+}
+
+/* get the tunnel packet type if any, update proto and off. */
+static uint32_t
+ptype_tunnel(uint16_t *proto, const struct rte_mbuf *m,
+       uint32_t *off)
+{
+       switch (*proto) {
+       case IPPROTO_GRE: {
+               static const uint8_t opt_len[16] = {
+                       [0x0] = 4,
+                       [0x1] = 8,
+                       [0x2] = 8,
+                       [0x8] = 8,
+                       [0x3] = 12,
+                       [0x9] = 12,
+                       [0xa] = 12,
+                       [0xb] = 16,
+               };
+               const struct gre_hdr *gh;
+               struct gre_hdr gh_copy;
+               uint16_t flags;
+
+               gh = rte_pktmbuf_read(m, *off, sizeof(*gh), &gh_copy);
+               if (unlikely(gh == NULL))
+                       return 0;
+
+               flags = rte_be_to_cpu_16(*(const uint16_t *)gh);
+               flags >>= 12;
+               if (opt_len[flags] == 0)
+                       return 0;
+
+               *off += opt_len[flags];
+               *proto = gh->proto;
+               if (*proto == rte_cpu_to_be_16(ETHER_TYPE_TEB))
+                       return RTE_PTYPE_TUNNEL_NVGRE;
+               else
+                       return RTE_PTYPE_TUNNEL_GRE;
+       }
+       case IPPROTO_IPIP:
+               *proto = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+               return RTE_PTYPE_TUNNEL_IP;
+       case IPPROTO_IPV6:
+               *proto = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+               return RTE_PTYPE_TUNNEL_IP; /* IP is also valid for IPv6 */
+       default:
+               return 0;
+       }
+}
+
+/* get the ipv4 header length */
+static uint8_t
+ip4_hlen(const struct ipv4_hdr *hdr)
+{
+       return (hdr->version_ihl & 0xf) * 4;
+}
+
+/* parse ipv6 extended headers, update offset and return next proto */
+static uint16_t
+skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
+       int *frag)
+{
+       struct ext_hdr {
+               uint8_t next_hdr;
+               uint8_t len;
+       };
+       const struct ext_hdr *xh;
+       struct ext_hdr xh_copy;
+       unsigned int i;
+
+       *frag = 0;
+
+#define MAX_EXT_HDRS 5
+       for (i = 0; i < MAX_EXT_HDRS; i++) {
+               switch (proto) {
+               case IPPROTO_HOPOPTS:
+               case IPPROTO_ROUTING:
+               case IPPROTO_DSTOPTS:
+                       xh = rte_pktmbuf_read(m, *off, sizeof(*xh),
+                               &xh_copy);
+                       if (xh == NULL)
+                               return 0;
+                       *off += (xh->len + 1) * 8;
+                       proto = xh->next_hdr;
+                       break;
+               case IPPROTO_FRAGMENT:
+                       xh = rte_pktmbuf_read(m, *off, sizeof(*xh),
+                               &xh_copy);
+                       if (xh == NULL)
+                               return 0;
+                       *off += 8;
+                       proto = xh->next_hdr;
+                       *frag = 1;
+                       return proto; /* this is always the last ext hdr */
+               case IPPROTO_NONE:
+                       return 0;
+               default:
+                       return proto;
+               }
+       }
+       return 0;
+}
+
+/* parse mbuf data to get packet type */
+uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
+       struct rte_net_hdr_lens *hdr_lens, uint32_t layers)
+{
+       struct rte_net_hdr_lens local_hdr_lens;
+       const struct ether_hdr *eh;
+       struct ether_hdr eh_copy;
+       uint32_t pkt_type = RTE_PTYPE_L2_ETHER;
+       uint32_t off = 0;
+       uint16_t proto;
+
+       if (hdr_lens == NULL)
+               hdr_lens = &local_hdr_lens;
+
+       eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy);
+       if (unlikely(eh == NULL))
+               return 0;
+       proto = eh->ether_type;
+       off = sizeof(*eh);
+       hdr_lens->l2_len = off;
+
+       if ((layers & RTE_PTYPE_L2_MASK) == 0)
+               return 0;
+
+       if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
+               goto l3; /* fast path if packet is IPv4 */
+
+       if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
+               const struct vlan_hdr *vh;
+               struct vlan_hdr vh_copy;
+
+               pkt_type = RTE_PTYPE_L2_ETHER_VLAN;
+               vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy);
+               if (unlikely(vh == NULL))
+                       return pkt_type;
+               off += sizeof(*vh);
+               hdr_lens->l2_len += sizeof(*vh);
+               proto = vh->eth_proto;
+       } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) {
+               const struct vlan_hdr *vh;
+               struct vlan_hdr vh_copy;
+
+               pkt_type = RTE_PTYPE_L2_ETHER_QINQ;
+               vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh),
+                       &vh_copy);
+               if (unlikely(vh == NULL))
+                       return pkt_type;
+               off += 2 * sizeof(*vh);
+               hdr_lens->l2_len += 2 * sizeof(*vh);
+               proto = vh->eth_proto;
+       }
+
+ l3:
+       if ((layers & RTE_PTYPE_L3_MASK) == 0)
+               return pkt_type;
+
+       if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+               const struct ipv4_hdr *ip4h;
+               struct ipv4_hdr ip4h_copy;
+
+               ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy);
+               if (unlikely(ip4h == NULL))
+                       return pkt_type;
+
+               pkt_type |= ptype_l3_ip(ip4h->version_ihl);
+               hdr_lens->l3_len = ip4_hlen(ip4h);
+               off += hdr_lens->l3_len;
+
+               if ((layers & RTE_PTYPE_L4_MASK) == 0)
+                       return pkt_type;
+
+               if (ip4h->fragment_offset & rte_cpu_to_be_16(
+                               IPV4_HDR_OFFSET_MASK | IPV4_HDR_MF_FLAG)) {
+                       pkt_type |= RTE_PTYPE_L4_FRAG;
+                       hdr_lens->l4_len = 0;
+                       return pkt_type;
+               }
+               proto = ip4h->next_proto_id;
+               pkt_type |= ptype_l4(proto);
+       } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) {
+               const struct ipv6_hdr *ip6h;
+               struct ipv6_hdr ip6h_copy;
+               int frag = 0;
+
+               ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy);
+               if (unlikely(ip6h == NULL))
+                       return pkt_type;
+
+               proto = ip6h->proto;
+               hdr_lens->l3_len = sizeof(*ip6h);
+               off += hdr_lens->l3_len;
+               pkt_type |= ptype_l3_ip6(proto);
+               if ((pkt_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT) {
+                       proto = skip_ip6_ext(proto, m, &off, &frag);
+                       hdr_lens->l3_len = off - hdr_lens->l2_len;
+               }
+               if (proto == 0)
+                       return pkt_type;
+
+               if ((layers & RTE_PTYPE_L4_MASK) == 0)
+                       return pkt_type;
+
+               if (frag) {
+                       pkt_type |= RTE_PTYPE_L4_FRAG;
+                       hdr_lens->l4_len = 0;
+                       return pkt_type;
+               }
+               pkt_type |= ptype_l4(proto);
+       }
+
+       if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP) {
+               hdr_lens->l4_len = sizeof(struct udp_hdr);
+               return pkt_type;
+       } else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP) {
+               const struct tcp_hdr *th;
+               struct tcp_hdr th_copy;
+
+               th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy);
+               if (unlikely(th == NULL))
+                       return pkt_type & (RTE_PTYPE_L2_MASK |
+                               RTE_PTYPE_L3_MASK);
+               hdr_lens->l4_len = (th->data_off & 0xf0) >> 2;
+               return pkt_type;
+       } else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) {
+               hdr_lens->l4_len = sizeof(struct sctp_hdr);
+               return pkt_type;
+       } else {
+               uint32_t prev_off = off;
+
+               hdr_lens->l4_len = 0;
+
+               if ((layers & RTE_PTYPE_TUNNEL_MASK) == 0)
+                       return pkt_type;
+
+               pkt_type |= ptype_tunnel(&proto, m, &off);
+               hdr_lens->tunnel_len = off - prev_off;
+       }
+
+       /* same job for inner header: we need to duplicate the code
+        * because the packet types do not have the same value.
+        */
+       if ((layers & RTE_PTYPE_INNER_L2_MASK) == 0)
+               return pkt_type;
+
+       if (proto == rte_cpu_to_be_16(ETHER_TYPE_TEB)) {
+               eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy);
+               if (unlikely(eh == NULL))
+                       return pkt_type;
+               pkt_type |= RTE_PTYPE_INNER_L2_ETHER;
+               proto = eh->ether_type;
+               off += sizeof(*eh);
+               hdr_lens->inner_l2_len = sizeof(*eh);
+       }
+
+       if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
+               const struct vlan_hdr *vh;
+               struct vlan_hdr vh_copy;
+
+               pkt_type &= ~RTE_PTYPE_INNER_L2_MASK;
+               pkt_type |= RTE_PTYPE_INNER_L2_ETHER_VLAN;
+               vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy);
+               if (unlikely(vh == NULL))
+                       return pkt_type;
+               off += sizeof(*vh);
+               hdr_lens->inner_l2_len += sizeof(*vh);
+               proto = vh->eth_proto;
+       } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) {
+               const struct vlan_hdr *vh;
+               struct vlan_hdr vh_copy;
+
+               pkt_type &= ~RTE_PTYPE_INNER_L2_MASK;
+               pkt_type |= RTE_PTYPE_INNER_L2_ETHER_QINQ;
+               vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh),
+                       &vh_copy);
+               if (unlikely(vh == NULL))
+                       return pkt_type;
+               off += 2 * sizeof(*vh);
+               hdr_lens->inner_l2_len += 2 * sizeof(*vh);
+               proto = vh->eth_proto;
+       }
+
+       if ((layers & RTE_PTYPE_INNER_L3_MASK) == 0)
+               return pkt_type;
+
+       if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+               const struct ipv4_hdr *ip4h;
+               struct ipv4_hdr ip4h_copy;
+
+               ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy);
+               if (unlikely(ip4h == NULL))
+                       return pkt_type;
+
+               pkt_type |= ptype_inner_l3_ip(ip4h->version_ihl);
+               hdr_lens->inner_l3_len = ip4_hlen(ip4h);
+               off += hdr_lens->inner_l3_len;
+
+               if ((layers & RTE_PTYPE_INNER_L4_MASK) == 0)
+                       return pkt_type;
+               if (ip4h->fragment_offset &
+                               rte_cpu_to_be_16(IPV4_HDR_OFFSET_MASK |
+                                       IPV4_HDR_MF_FLAG)) {
+                       pkt_type |= RTE_PTYPE_INNER_L4_FRAG;
+                       hdr_lens->inner_l4_len = 0;
+                       return pkt_type;
+               }
+               proto = ip4h->next_proto_id;
+               pkt_type |= ptype_inner_l4(proto);
+       } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) {
+               const struct ipv6_hdr *ip6h;
+               struct ipv6_hdr ip6h_copy;
+               int frag = 0;
+
+               ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy);
+               if (unlikely(ip6h == NULL))
+                       return pkt_type;
+
+               proto = ip6h->proto;
+               hdr_lens->inner_l3_len = sizeof(*ip6h);
+               off += hdr_lens->inner_l3_len;
+               pkt_type |= ptype_inner_l3_ip6(proto);
+               if ((pkt_type & RTE_PTYPE_INNER_L3_MASK) ==
+                               RTE_PTYPE_INNER_L3_IPV6_EXT) {
+                       uint32_t prev_off;
+
+                       prev_off = off;
+                       proto = skip_ip6_ext(proto, m, &off, &frag);
+                       hdr_lens->inner_l3_len += off - prev_off;
+               }
+               if (proto == 0)
+                       return pkt_type;
+
+               if ((layers & RTE_PTYPE_INNER_L4_MASK) == 0)
+                       return pkt_type;
+
+               if (frag) {
+                       pkt_type |= RTE_PTYPE_INNER_L4_FRAG;
+                       hdr_lens->inner_l4_len = 0;
+                       return pkt_type;
+               }
+               pkt_type |= ptype_inner_l4(proto);
+       }
+
+       if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == RTE_PTYPE_INNER_L4_UDP) {
+               hdr_lens->inner_l4_len = sizeof(struct udp_hdr);
+       } else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) ==
+                       RTE_PTYPE_INNER_L4_TCP) {
+               const struct tcp_hdr *th;
+               struct tcp_hdr th_copy;
+
+               th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy);
+               if (unlikely(th == NULL))
+                       return pkt_type & (RTE_PTYPE_INNER_L2_MASK |
+                               RTE_PTYPE_INNER_L3_MASK);
+               hdr_lens->inner_l4_len = (th->data_off & 0xf0) >> 2;
+       } else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) ==
+                       RTE_PTYPE_INNER_L4_SCTP) {
+               hdr_lens->inner_l4_len = sizeof(struct sctp_hdr);
+       } else {
+               hdr_lens->inner_l4_len = 0;
+       }
+
+       return pkt_type;
+}
diff --git a/src/dpdk/lib/librte_net/rte_net.h b/src/dpdk/lib/librte_net/rte_net.h
new file mode 100644 (file)
index 0000000..548eaed
--- /dev/null
@@ -0,0 +1,204 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_NET_PTYPE_H_
+#define _RTE_NET_PTYPE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
+#include <rte_sctp.h>
+
+/**
+ * Structure containing header lengths associated to a packet, filled
+ * by rte_net_get_ptype().
+ */
+struct rte_net_hdr_lens {
+       uint8_t l2_len;
+       uint8_t l3_len;
+       uint8_t l4_len;
+       uint8_t tunnel_len;
+       uint8_t inner_l2_len;
+       uint8_t inner_l3_len;
+       uint8_t inner_l4_len;
+};
+
+/**
+ * Parse an Ethernet packet to get its packet type.
+ *
+ * This function parses the network headers in mbuf data and return its
+ * packet type.
+ *
+ * If it is provided by the user, it also fills a rte_net_hdr_lens
+ * structure that contains the lengths of the parsed network
+ * headers. Each length field is valid only if the associated packet
+ * type is set. For instance, hdr_lens->l2_len is valid only if
+ * (retval & RTE_PTYPE_L2_MASK) != RTE_PTYPE_UNKNOWN.
+ *
+ * Supported packet types are:
+ *   L2: Ether, Vlan, QinQ
+ *   L3: IPv4, IPv6
+ *   L4: TCP, UDP, SCTP
+ *   Tunnels: IPv4, IPv6, Gre, Nvgre
+ *
+ * @param m
+ *   The packet mbuf to be parsed.
+ * @param hdr_lens
+ *   A pointer to a structure where the header lengths will be returned,
+ *   or NULL.
+ * @param layers
+ *   List of layers to parse. The function will stop at the first
+ *   empty layer. Examples:
+ *   - To parse all known layers, use RTE_PTYPE_ALL_MASK.
+ *   - To parse only L2 and L3, use RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK
+ * @return
+ *   The packet type of the packet.
+ */
+uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
+       struct rte_net_hdr_lens *hdr_lens, uint32_t layers);
+
+/**
+ * Prepare pseudo header checksum
+ *
+ * This function prepares pseudo header checksum for TSO and non-TSO tcp/udp in
+ * provided mbufs packet data and based on the requested offload flags.
+ *
+ * - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set
+ *   in packet data,
+ * - for TSO the IP payload length is not included in pseudo header.
+ *
+ * This function expects that used headers are in the first data segment of
+ * mbuf, are not fragmented and can be safely modified.
+ *
+ * @param m
+ *   The packet mbuf to be fixed.
+ * @param ol_flags
+ *   TX offloads flags to use with this packet.
+ * @return
+ *   0 if checksum is initialized properly
+ */
+static inline int
+rte_net_intel_cksum_flags_prepare(struct rte_mbuf *m, uint64_t ol_flags)
+{
+       struct ipv4_hdr *ipv4_hdr;
+       struct ipv6_hdr *ipv6_hdr;
+       struct tcp_hdr *tcp_hdr;
+       struct udp_hdr *udp_hdr;
+       uint64_t inner_l3_offset = m->l2_len;
+
+       if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
+               inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
+
+       if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
+               if (ol_flags & PKT_TX_IPV4) {
+                       ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+                                       inner_l3_offset);
+
+                       if (ol_flags & PKT_TX_IP_CKSUM)
+                               ipv4_hdr->hdr_checksum = 0;
+
+                       udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
+                                       m->l3_len);
+                       udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr,
+                                       ol_flags);
+               } else {
+                       ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+                                       inner_l3_offset);
+                       /* non-TSO udp */
+                       udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *,
+                                       inner_l3_offset + m->l3_len);
+                       udp_hdr->dgram_cksum = rte_ipv6_phdr_cksum(ipv6_hdr,
+                                       ol_flags);
+               }
+       } else if ((ol_flags & PKT_TX_TCP_CKSUM) ||
+                       (ol_flags & PKT_TX_TCP_SEG)) {
+               if (ol_flags & PKT_TX_IPV4) {
+                       ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+                                       inner_l3_offset);
+
+                       if (ol_flags & PKT_TX_IP_CKSUM)
+                               ipv4_hdr->hdr_checksum = 0;
+
+                       /* non-TSO tcp or TSO */
+                       tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
+                                       m->l3_len);
+                       tcp_hdr->cksum = rte_ipv4_phdr_cksum(ipv4_hdr,
+                                       ol_flags);
+               } else {
+                       ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+                                       inner_l3_offset);
+                       /* non-TSO tcp or TSO */
+                       tcp_hdr = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *,
+                                       inner_l3_offset + m->l3_len);
+                       tcp_hdr->cksum = rte_ipv6_phdr_cksum(ipv6_hdr,
+                                       ol_flags);
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * Prepare pseudo header checksum
+ *
+ * This function prepares pseudo header checksum for TSO and non-TSO tcp/udp in
+ * provided mbufs packet data.
+ *
+ * - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set
+ *   in packet data,
+ * - for TSO the IP payload length is not included in pseudo header.
+ *
+ * This function expects that used headers are in the first data segment of
+ * mbuf, are not fragmented and can be safely modified.
+ *
+ * @param m
+ *   The packet mbuf to be fixed.
+ * @return
+ *   0 if checksum is initialized properly
+ */
+static inline int
+rte_net_intel_cksum_prepare(struct rte_mbuf *m)
+{
+       return rte_net_intel_cksum_flags_prepare(m, m->ol_flags);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_NET_PTYPE_H_ */
index 84d1802..f366348 100644 (file)
@@ -87,6 +87,7 @@ extern "C" {
 
 #include <rte_port.h>
 #include <rte_table.h>
+#include <rte_common.h>
 
 struct rte_mbuf;
 
@@ -244,6 +245,7 @@ struct rte_pipeline_table_entry {
        /** Reserved action */
        enum rte_pipeline_action action;
 
+       RTE_STD_C11
        union {
                /** Output port ID (meta-data for "Send packet to output port"
                action) */
@@ -252,7 +254,7 @@ struct rte_pipeline_table_entry {
                uint32_t table_id;
        };
        /** Start of table entry area for user defined actions and meta-data */
-       uint8_t action_data[0];
+       __extension__ uint8_t action_data[0];
 };
 
 /**
diff --git a/src/dpdk/lib/librte_port/rte_port_fd.c b/src/dpdk/lib/librte_port/rte_port_fd.c
new file mode 100644 (file)
index 0000000..0d640f3
--- /dev/null
@@ -0,0 +1,552 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+
+#include "rte_port_fd.h"
+
+/*
+ * Port FD Reader
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val) \
+       do { port->stats.n_pkts_in += val; } while (0)
+#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val) \
+       do { port->stats.n_pkts_drop += val; } while (0)
+
+#else
+
+#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_fd_reader {
+       struct rte_port_in_stats stats;
+       int fd;
+       uint32_t mtu;
+       struct rte_mempool *mempool;
+};
+
+static void *
+rte_port_fd_reader_create(void *params, int socket_id)
+{
+       struct rte_port_fd_reader_params *conf =
+                       (struct rte_port_fd_reader_params *) params;
+       struct rte_port_fd_reader *port;
+
+       /* Check input parameters */
+       if (conf == NULL) {
+               RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__);
+               return NULL;
+       }
+       if (conf->fd < 0) {
+               RTE_LOG(ERR, PORT, "%s: Invalid file descriptor\n", __func__);
+               return NULL;
+       }
+       if (conf->mtu == 0) {
+               RTE_LOG(ERR, PORT, "%s: Invalid MTU\n", __func__);
+               return NULL;
+       }
+       if (conf->mempool == NULL) {
+               RTE_LOG(ERR, PORT, "%s: Invalid mempool\n", __func__);
+               return NULL;
+       }
+
+       /* Memory allocation */
+       port = rte_zmalloc_socket("PORT", sizeof(*port),
+                       RTE_CACHE_LINE_SIZE, socket_id);
+       if (port == NULL) {
+               RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+               return NULL;
+       }
+
+       /* Initialization */
+       port->fd = conf->fd;
+       port->mtu = conf->mtu;
+       port->mempool = conf->mempool;
+
+       return port;
+}
+
+static int
+rte_port_fd_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+       struct rte_port_fd_reader *p = (struct rte_port_fd_reader *) port;
+       uint32_t i;
+
+       if (rte_mempool_get_bulk(p->mempool, (void **) pkts, n_pkts) != 0)
+               return 0;
+
+       for (i = 0; i < n_pkts; i++) {
+               rte_mbuf_refcnt_set(pkts[i], 1);
+               rte_pktmbuf_reset(pkts[i]);
+       }
+
+       for (i = 0; i < n_pkts; i++) {
+               struct rte_mbuf *pkt = pkts[i];
+               void *pkt_data = rte_pktmbuf_mtod(pkt, void *);
+               ssize_t n_bytes;
+
+               n_bytes = read(p->fd, pkt_data, (size_t) p->mtu);
+               if (n_bytes <= 0)
+                       break;
+
+               pkt->data_len = n_bytes;
+               pkt->pkt_len = n_bytes;
+       }
+
+       for ( ; i < n_pkts; i++)
+               rte_pktmbuf_free(pkts[i]);
+
+       RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(p, i);
+
+       return n_pkts;
+}
+
+static int
+rte_port_fd_reader_free(void *port)
+{
+       if (port == NULL) {
+               RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       rte_free(port);
+
+       return 0;
+}
+
+static int rte_port_fd_reader_stats_read(void *port,
+               struct rte_port_in_stats *stats, int clear)
+{
+       struct rte_port_fd_reader *p =
+                       (struct rte_port_fd_reader *) port;
+
+       if (stats != NULL)
+               memcpy(stats, &p->stats, sizeof(p->stats));
+
+       if (clear)
+               memset(&p->stats, 0, sizeof(p->stats));
+
+       return 0;
+}
+
+/*
+ * Port FD Writer
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val) \
+       do { port->stats.n_pkts_in += val; } while (0)
+#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val) \
+       do { port->stats.n_pkts_drop += val; } while (0)
+
+#else
+
+#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_fd_writer {
+       struct rte_port_out_stats stats;
+
+       struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+       uint32_t tx_burst_sz;
+       uint16_t tx_buf_count;
+       uint32_t fd;
+};
+
+static void *
+rte_port_fd_writer_create(void *params, int socket_id)
+{
+       struct rte_port_fd_writer_params *conf =
+               (struct rte_port_fd_writer_params *) params;
+       struct rte_port_fd_writer *port;
+
+       /* Check input parameters */
+       if ((conf == NULL) ||
+               (conf->tx_burst_sz == 0) ||
+               (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+               (!rte_is_power_of_2(conf->tx_burst_sz))) {
+               RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+               return NULL;
+       }
+
+       /* Memory allocation */
+       port = rte_zmalloc_socket("PORT", sizeof(*port),
+               RTE_CACHE_LINE_SIZE, socket_id);
+       if (port == NULL) {
+               RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+               return NULL;
+       }
+
+       /* Initialization */
+       port->fd = conf->fd;
+       port->tx_burst_sz = conf->tx_burst_sz;
+       port->tx_buf_count = 0;
+
+       return port;
+}
+
+static inline void
+send_burst(struct rte_port_fd_writer *p)
+{
+       uint32_t i;
+
+       for (i = 0; i < p->tx_buf_count; i++) {
+               struct rte_mbuf *pkt = p->tx_buf[i];
+               void *pkt_data = rte_pktmbuf_mtod(pkt, void*);
+               size_t n_bytes = rte_pktmbuf_data_len(pkt);
+               ssize_t ret;
+
+               ret = write(p->fd, pkt_data, n_bytes);
+               if (ret < 0)
+                       break;
+       }
+
+       RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i);
+
+       for (i = 0; i < p->tx_buf_count; i++)
+               rte_pktmbuf_free(p->tx_buf[i]);
+
+       p->tx_buf_count = 0;
+}
+
+static int
+rte_port_fd_writer_tx(void *port, struct rte_mbuf *pkt)
+{
+       struct rte_port_fd_writer *p =
+               (struct rte_port_fd_writer *) port;
+
+       p->tx_buf[p->tx_buf_count++] = pkt;
+       RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1);
+       if (p->tx_buf_count >= p->tx_burst_sz)
+               send_burst(p);
+
+       return 0;
+}
+
+static int
+rte_port_fd_writer_tx_bulk(void *port,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask)
+{
+       struct rte_port_fd_writer *p =
+               (struct rte_port_fd_writer *) port;
+       uint32_t tx_buf_count = p->tx_buf_count;
+
+       if ((pkts_mask & (pkts_mask + 1)) == 0) {
+               uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+               uint32_t i;
+
+               for (i = 0; i < n_pkts; i++)
+                       p->tx_buf[tx_buf_count++] = pkts[i];
+               RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, n_pkts);
+       } else
+               for ( ; pkts_mask; ) {
+                       uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+                       uint64_t pkt_mask = 1LLU << pkt_index;
+                       struct rte_mbuf *pkt = pkts[pkt_index];
+
+                       p->tx_buf[tx_buf_count++] = pkt;
+                       RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1);
+                       pkts_mask &= ~pkt_mask;
+               }
+
+       p->tx_buf_count = tx_buf_count;
+       if (tx_buf_count >= p->tx_burst_sz)
+               send_burst(p);
+
+       return 0;
+}
+
+static int
+rte_port_fd_writer_flush(void *port)
+{
+       struct rte_port_fd_writer *p =
+               (struct rte_port_fd_writer *) port;
+
+       if (p->tx_buf_count > 0)
+               send_burst(p);
+
+       return 0;
+}
+
+static int
+rte_port_fd_writer_free(void *port)
+{
+       if (port == NULL) {
+               RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       rte_port_fd_writer_flush(port);
+       rte_free(port);
+
+       return 0;
+}
+
+static int rte_port_fd_writer_stats_read(void *port,
+               struct rte_port_out_stats *stats, int clear)
+{
+       struct rte_port_fd_writer *p =
+               (struct rte_port_fd_writer *) port;
+
+       if (stats != NULL)
+               memcpy(stats, &p->stats, sizeof(p->stats));
+
+       if (clear)
+               memset(&p->stats, 0, sizeof(p->stats));
+
+       return 0;
+}
+
+/*
+ * Port FD Writer Nodrop
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \
+       do { port->stats.n_pkts_in += val; } while (0)
+#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \
+       do { port->stats.n_pkts_drop += val; } while (0)
+
+#else
+
+#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_fd_writer_nodrop {
+       struct rte_port_out_stats stats;
+
+       struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+       uint32_t tx_burst_sz;
+       uint16_t tx_buf_count;
+       uint64_t n_retries;
+       uint32_t fd;
+};
+
+static void *
+rte_port_fd_writer_nodrop_create(void *params, int socket_id)
+{
+       struct rte_port_fd_writer_nodrop_params *conf =
+                       (struct rte_port_fd_writer_nodrop_params *) params;
+       struct rte_port_fd_writer_nodrop *port;
+
+       /* Check input parameters */
+       if ((conf == NULL) ||
+               (conf->fd < 0) ||
+               (conf->tx_burst_sz == 0) ||
+               (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+               (!rte_is_power_of_2(conf->tx_burst_sz))) {
+               RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+               return NULL;
+       }
+
+       /* Memory allocation */
+       port = rte_zmalloc_socket("PORT", sizeof(*port),
+               RTE_CACHE_LINE_SIZE, socket_id);
+       if (port == NULL) {
+               RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+               return NULL;
+       }
+
+       /* Initialization */
+       port->fd = conf->fd;
+       port->tx_burst_sz = conf->tx_burst_sz;
+       port->tx_buf_count = 0;
+
+       /*
+        * When n_retries is 0 it means that we should wait for every packet to
+        * send no matter how many retries should it take. To limit number of
+        * branches in fast path, we use UINT64_MAX instead of branching.
+        */
+       port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries;
+
+       return port;
+}
+
+static inline void
+send_burst_nodrop(struct rte_port_fd_writer_nodrop *p)
+{
+       uint64_t n_retries;
+       uint32_t i;
+
+       n_retries = 0;
+       for (i = 0; (i < p->tx_buf_count) && (n_retries < p->n_retries); i++) {
+               struct rte_mbuf *pkt = p->tx_buf[i];
+               void *pkt_data = rte_pktmbuf_mtod(pkt, void*);
+               size_t n_bytes = rte_pktmbuf_data_len(pkt);
+
+               for ( ; n_retries < p->n_retries; n_retries++) {
+                       ssize_t ret;
+
+                       ret = write(p->fd, pkt_data, n_bytes);
+                       if (ret)
+                               break;
+               }
+       }
+
+       RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i);
+
+       for (i = 0; i < p->tx_buf_count; i++)
+               rte_pktmbuf_free(p->tx_buf[i]);
+
+       p->tx_buf_count = 0;
+}
+
+static int
+rte_port_fd_writer_nodrop_tx(void *port, struct rte_mbuf *pkt)
+{
+       struct rte_port_fd_writer_nodrop *p =
+               (struct rte_port_fd_writer_nodrop *) port;
+
+       p->tx_buf[p->tx_buf_count++] = pkt;
+       RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1);
+       if (p->tx_buf_count >= p->tx_burst_sz)
+               send_burst_nodrop(p);
+
+       return 0;
+}
+
+static int
+rte_port_fd_writer_nodrop_tx_bulk(void *port,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask)
+{
+       struct rte_port_fd_writer_nodrop *p =
+               (struct rte_port_fd_writer_nodrop *) port;
+       uint32_t tx_buf_count = p->tx_buf_count;
+
+       if ((pkts_mask & (pkts_mask + 1)) == 0) {
+               uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+               uint32_t i;
+
+               for (i = 0; i < n_pkts; i++)
+                       p->tx_buf[tx_buf_count++] = pkts[i];
+               RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts);
+       } else
+               for ( ; pkts_mask; ) {
+                       uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+                       uint64_t pkt_mask = 1LLU << pkt_index;
+                       struct rte_mbuf *pkt = pkts[pkt_index];
+
+                       p->tx_buf[tx_buf_count++] = pkt;
+                       RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1);
+                       pkts_mask &= ~pkt_mask;
+               }
+
+       p->tx_buf_count = tx_buf_count;
+       if (tx_buf_count >= p->tx_burst_sz)
+               send_burst_nodrop(p);
+
+       return 0;
+}
+
+static int
+rte_port_fd_writer_nodrop_flush(void *port)
+{
+       struct rte_port_fd_writer_nodrop *p =
+               (struct rte_port_fd_writer_nodrop *) port;
+
+       if (p->tx_buf_count > 0)
+               send_burst_nodrop(p);
+
+       return 0;
+}
+
+static int
+rte_port_fd_writer_nodrop_free(void *port)
+{
+       if (port == NULL) {
+               RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       rte_port_fd_writer_nodrop_flush(port);
+       rte_free(port);
+
+return 0;
+}
+
+static int rte_port_fd_writer_nodrop_stats_read(void *port,
+               struct rte_port_out_stats *stats, int clear)
+{
+       struct rte_port_fd_writer_nodrop *p =
+               (struct rte_port_fd_writer_nodrop *) port;
+
+       if (stats != NULL)
+               memcpy(stats, &p->stats, sizeof(p->stats));
+
+       if (clear)
+               memset(&p->stats, 0, sizeof(p->stats));
+
+       return 0;
+}
+
+/*
+ * Summary of port operations
+ */
+struct rte_port_in_ops rte_port_fd_reader_ops = {
+       .f_create = rte_port_fd_reader_create,
+       .f_free = rte_port_fd_reader_free,
+       .f_rx = rte_port_fd_reader_rx,
+       .f_stats = rte_port_fd_reader_stats_read,
+};
+
+struct rte_port_out_ops rte_port_fd_writer_ops = {
+       .f_create = rte_port_fd_writer_create,
+       .f_free = rte_port_fd_writer_free,
+       .f_tx = rte_port_fd_writer_tx,
+       .f_tx_bulk = rte_port_fd_writer_tx_bulk,
+       .f_flush = rte_port_fd_writer_flush,
+       .f_stats = rte_port_fd_writer_stats_read,
+};
+
+struct rte_port_out_ops rte_port_fd_writer_nodrop_ops = {
+       .f_create = rte_port_fd_writer_nodrop_create,
+       .f_free = rte_port_fd_writer_nodrop_free,
+       .f_tx = rte_port_fd_writer_nodrop_tx,
+       .f_tx_bulk = rte_port_fd_writer_nodrop_tx_bulk,
+       .f_flush = rte_port_fd_writer_nodrop_flush,
+       .f_stats = rte_port_fd_writer_nodrop_stats_read,
+};
diff --git a/src/dpdk/lib/librte_port/rte_port_fd.h b/src/dpdk/lib/librte_port/rte_port_fd.h
new file mode 100644 (file)
index 0000000..77a2d31
--- /dev/null
@@ -0,0 +1,105 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_PORT_FD_H__
+#define __INCLUDE_RTE_PORT_FD_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Port FD Device
+ *
+ * fd_reader: input port built on top of valid non-blocking file descriptor
+ * fd_writer: output port built on top of valid non-blocking file descriptor
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_mempool.h>
+#include "rte_port.h"
+
+/** fd_reader port parameters */
+struct rte_port_fd_reader_params {
+       /** File descriptor */
+       int fd;
+
+       /** Maximum Transfer Unit (MTU) */
+       uint32_t mtu;
+
+       /** Pre-initialized buffer pool */
+       struct rte_mempool *mempool;
+};
+
+/** fd_reader port operations */
+extern struct rte_port_in_ops rte_port_fd_reader_ops;
+
+/** fd_writer port parameters */
+struct rte_port_fd_writer_params {
+       /** File descriptor */
+       int fd;
+
+       /**< Recommended write burst size. The actual burst size can be
+        * bigger or smaller than this value.
+        */
+       uint32_t tx_burst_sz;
+};
+
+/** fd_writer port operations */
+extern struct rte_port_out_ops rte_port_fd_writer_ops;
+
+/** fd_writer_nodrop port parameters */
+struct rte_port_fd_writer_nodrop_params {
+       /** File descriptor */
+       int fd;
+
+       /**< Recommended write burst size. The actual burst size can be
+        * bigger or smaller than this value.
+        */
+       uint32_t tx_burst_sz;
+
+       /** Maximum number of retries, 0 for no limit */
+       uint32_t n_retries;
+};
+
+/** fd_writer_nodrop port operations */
+extern struct rte_port_out_ops rte_port_fd_writer_nodrop_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index 4db8a8a..be585a7 100644 (file)
@@ -55,7 +55,7 @@ struct rte_port_source_params {
        struct rte_mempool *mempool;
 
        /** The full path of the pcap file to read packets from */
-       char *file_name;
+       const char *file_name;
        /** The number of bytes to be read from each packet in the
         *  pcap file. If this value is 0, the whole packet is read;
         *  if it is bigger than packet size, the generated packets
@@ -69,7 +69,7 @@ extern struct rte_port_in_ops rte_port_source_ops;
 /** sink port parameters */
 struct rte_port_sink_params {
        /** The full path of the pcap file to write the packets to */
-       char *file_name;
+       const char *file_name;
        /** The maximum number of packets write to the pcap file.
         *  If this value is 0, the "infinite" write will be carried
         *  out.
index 0e22e69..e359aff 100644 (file)
@@ -106,7 +106,7 @@ extern "C" {
 
 enum rte_ring_queue_behavior {
        RTE_RING_QUEUE_FIXED = 0, /* Enq/Deq a fixed number of items from a ring */
-       RTE_RING_QUEUE_VARIABLE   /* Enq/Deq as many items a possible from ring */
+       RTE_RING_QUEUE_VARIABLE   /* Enq/Deq as many items as possible from ring */
 };
 
 #ifdef RTE_LIBRTE_RING_DEBUG
@@ -187,7 +187,7 @@ struct rte_ring {
        struct rte_ring_debug_stats stats[RTE_MAX_LCORE];
 #endif
 
-       void * ring[0] __rte_cache_aligned; /**< Memory space of ring starts here.
+       void *ring[] __rte_cache_aligned;   /**< Memory space of ring starts here.
                                             * not volatile so need to be careful
                                             * about compiler re-ordering */
 };
@@ -341,7 +341,7 @@ void rte_ring_free(struct rte_ring *r);
 int rte_ring_set_water_mark(struct rte_ring *r, unsigned count);
 
 /**
- * Dump the status of the ring to the console.
+ * Dump the status of the ring to a file.
  *
  * @param f
  *   A pointer to a file for output
index 8f1f8ce..94b69a9 100644 (file)
@@ -792,7 +792,7 @@ rte_table_acl_lookup(
 
                pkts_mask &= ~pkt_mask;
 
-               if (action_table_pos != RTE_ACL_INVALID_USERDATA) {
+               if (action_table_pos != 0) {
                        pkts_out_mask |= pkt_mask;
                        entries[pkt_pos] = (void *)
                                &acl->memory[action_table_pos *
index 9d17516..57505a6 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -363,6 +363,35 @@ struct rte_table_hash_key32_ext_params {
 /** Extendible bucket hash table operations */
 extern struct rte_table_ops rte_table_hash_key32_ext_ops;
 
+/** Cuckoo hash table parameters */
+struct rte_table_hash_cuckoo_params {
+    /** Key size (number of bytes */
+               uint32_t key_size;
+
+       /** Maximum number of hash table entries */
+       uint32_t n_keys;
+
+       /** Hash function used to calculate hash */
+       rte_table_hash_op_hash f_hash;
+
+       /** Seed value or Init value used by f_hash */
+       uint32_t seed;
+
+       /** Byte offset within packet meta-data where the 4-byte key signature
+       is located. Valid for pre-computed key signature tables, ignored for
+       do-sig tables. */
+       uint32_t signature_offset;
+
+       /** Byte offset within packet meta-data where the key is located */
+       uint32_t key_offset;
+
+       /** Hash table name */
+       const char *name;
+};
+
+/** Cuckoo hash table operations */
+extern struct rte_table_ops rte_table_hash_cuckoo_dosig_ops;
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/dpdk/lib/librte_table/rte_table_hash_cuckoo.c b/src/dpdk/lib/librte_table/rte_table_hash_cuckoo.c
new file mode 100644 (file)
index 0000000..ff7baee
--- /dev/null
@@ -0,0 +1,382 @@
+/*-
+ *      BSD LICENSE
+ *
+ *      Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *      All rights reserved.
+ *
+ *      Redistribution and use in source and binary forms, with or without
+ *      modification, are permitted provided that the following conditions
+ *      are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *              notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *              notice, this list of conditions and the following disclaimer in
+ *              the documentation and/or other materials provided with the
+ *              distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *              contributors may be used to endorse or promote products derived
+ *              from this software without specific prior written permission.
+ *
+ *      THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *      "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *      LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *      A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *      OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *      SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *      LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *      DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *      THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *      (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *      OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include <rte_hash.h>
+#include "rte_table_hash.h"
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(table, val) \
+       (table->stats.n_pkts_in += val)
+#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(table, val) \
+       (table->stats.n_pkts_lookup_miss += val)
+
+#else
+
+#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+
+struct rte_table_hash {
+       struct rte_table_stats stats;
+
+       /* Input parameters */
+       uint32_t key_size;
+       uint32_t entry_size;
+       uint32_t n_keys;
+       rte_table_hash_op_hash f_hash;
+       uint32_t seed;
+       uint32_t signature_offset;
+       uint32_t key_offset;
+       const char *name;
+
+       /* cuckoo hash table object */
+       struct rte_hash *h_table;
+
+       /* Lookup table */
+       uint8_t memory[0] __rte_cache_aligned; };
+
+static int
+check_params_create_hash_cuckoo(const struct
+rte_table_hash_cuckoo_params *params) {
+       /* Check for valid parameters */
+       if (params == NULL) {
+               RTE_LOG(ERR, TABLE, "NULL Input Parameters.\n");
+               return -EINVAL;
+       }
+
+       if (params->key_size == 0) {
+               RTE_LOG(ERR, TABLE, "Invalid key_size.\n");
+               return -EINVAL;
+       }
+
+       if (params->n_keys == 0) {
+               RTE_LOG(ERR, TABLE, "Invalid n_keys.\n");
+               return -EINVAL;
+       }
+
+       if (params->f_hash == NULL) {
+               RTE_LOG(ERR, TABLE, "f_hash is NULL.\n");
+               return -EINVAL;
+       }
+
+       if (params->name == NULL) {
+               RTE_LOG(ERR, TABLE, "Table name is NULL.\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void *
+rte_table_hash_cuckoo_create(void *params,
+                       int socket_id,
+                       uint32_t entry_size)
+{
+       struct rte_hash *rte_hash_handle;
+       struct rte_table_hash *t;
+       uint32_t total_size, total_cl_size;
+
+       /* Check input parameters */
+       struct rte_table_hash_cuckoo_params *p =
+               (struct rte_table_hash_cuckoo_params *) params;
+
+       if (check_params_create_hash_cuckoo(params))
+               return NULL;
+
+       /* Memory allocation */
+       total_cl_size =
+               (sizeof(struct rte_table_hash) +
+                RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE;
+       total_cl_size += (p->n_keys * entry_size +
+                       RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE;
+       total_size = total_cl_size * RTE_CACHE_LINE_SIZE;
+
+       t = rte_zmalloc_socket("TABLE",
+                       total_size,
+                       RTE_CACHE_LINE_SIZE,
+                       socket_id);
+       if (t == NULL) {
+               RTE_LOG(ERR, TABLE,
+                       "%s: Cannot allocate %u bytes for Cuckoo hash table\n",
+                       __func__,
+                       (uint32_t)sizeof(struct rte_table_hash));
+               return NULL;
+       }
+
+       /* Create cuckoo hash table */
+       struct rte_hash_parameters hash_cuckoo_params = {
+               .entries = p->n_keys,
+               .key_len = p->key_size,
+               .hash_func = (rte_hash_function)(p->f_hash),
+               .hash_func_init_val = p->seed,
+               .socket_id = socket_id,
+               .name = p->name
+       };
+
+       rte_hash_handle = rte_hash_find_existing(p->name);
+       if (rte_hash_handle == NULL) {
+               rte_hash_handle = rte_hash_create(&hash_cuckoo_params);
+               if (NULL == rte_hash_handle) {
+                       RTE_LOG(ERR, TABLE,
+                               "%s: failed to create cuckoo hash table. keysize: %u",
+                               __func__, hash_cuckoo_params.key_len);
+                       rte_free(t);
+                       return NULL;
+               }
+       }
+
+       /* initialize the cuckoo hash parameters */
+       t->key_size = p->key_size;
+       t->entry_size = entry_size;
+       t->n_keys = p->n_keys;
+       t->f_hash = p->f_hash;
+       t->seed = p->seed;
+       t->signature_offset = p->signature_offset;
+       t->key_offset = p->key_offset;
+       t->name = p->name;
+       t->h_table = rte_hash_handle;
+
+       RTE_LOG(INFO, TABLE,
+               "%s: Cuckoo Hash table memory footprint is %u bytes\n",
+               __func__, total_size);
+       return t;
+}
+
+static int
+rte_table_hash_cuckoo_free(void *table) {
+       if (table == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       struct rte_table_hash *t = (struct rte_table_hash *)table;
+
+       rte_hash_free(t->h_table);
+       rte_free(t);
+
+       return 0;
+}
+
+static int
+rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry,
+               int *key_found, void **entry_ptr) {
+       int pos = 0;
+
+       if (table == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       if (key == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       if (entry == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       struct rte_table_hash *t = (struct rte_table_hash *)table;
+
+       /*  Find Existing entries */
+       pos = rte_hash_lookup(t->h_table, key);
+       if (pos >= 0) {
+               uint8_t *existing_entry;
+
+               *key_found = 1;
+               existing_entry = &t->memory[pos * t->entry_size];
+               memcpy(existing_entry, entry, t->entry_size);
+               *entry_ptr = existing_entry;
+
+               return 0;
+} else if (pos == -ENOENT) {
+       /* Entry not found. Adding new entry */
+               uint8_t *new_entry;
+
+               pos = rte_hash_add_key(t->h_table, key);
+               if (pos < 0) {
+                       RTE_LOG(ERR, TABLE,
+                               "%s: Entry not added, status : %u\n",
+                               __func__, pos);
+                       return pos;
+               }
+
+               new_entry = &t->memory[pos * t->entry_size];
+               memcpy(new_entry, entry, t->entry_size);
+
+               *key_found = 0;
+               *entry_ptr = new_entry;
+               return 0;
+       }
+       return pos;
+}
+
+static int
+rte_table_hash_cuckoo_entry_delete(void *table, void *key,
+               int *key_found, __rte_unused void *entry) {
+       int pos = 0;
+
+       if (table == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       if (key == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       struct rte_table_hash *t = (struct rte_table_hash *)table;
+
+       pos = rte_hash_del_key(t->h_table, key);
+       if (pos >= 0) {
+               *key_found = 1;
+               uint8_t *entry_ptr = &t->memory[pos * t->entry_size];
+
+               if (entry)
+                       memcpy(entry, entry_ptr, t->entry_size);
+
+               memset(&t->memory[pos * t->entry_size], 0, t->entry_size);
+       }
+
+       return pos;
+}
+
+
+static int
+rte_table_hash_cuckoo_lookup_dosig(void *table,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *)table;
+       uint64_t pkts_mask_out = 0;
+       uint32_t i;
+
+       __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+
+       RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(t, n_pkts_in);
+
+       if ((pkts_mask & (pkts_mask + 1)) == 0) {
+               const uint8_t *keys[64];
+               int32_t positions[64], status;
+
+               /* Keys for bulk lookup */
+               for (i = 0; i < n_pkts_in; i++)
+                       keys[i] = RTE_MBUF_METADATA_UINT8_PTR(pkts[i],
+                                       t->key_offset);
+
+               /* Bulk Lookup */
+               status = rte_hash_lookup_bulk(t->h_table,
+                               (const void **) keys,
+                               n_pkts_in,
+                               positions);
+
+               if (status == 0) {
+                       for (i = 0; i < n_pkts_in; i++) {
+                               if (likely(positions[i] >= 0)) {
+                                       uint64_t pkt_mask = 1LLU << i;
+
+                                       entries[i] = &t->memory[positions[i]
+                                               * t->entry_size];
+                                       pkts_mask_out |= pkt_mask;
+                               }
+                       }
+               }
+       } else {
+               for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX
+                                       - __builtin_clzll(pkts_mask)); i++) {
+                       uint64_t pkt_mask = 1LLU << i;
+
+                       if (pkt_mask & pkts_mask) {
+                               struct rte_mbuf *pkt = pkts[i];
+                               uint8_t *key = RTE_MBUF_METADATA_UINT8_PTR(pkt,
+                                               t->key_offset);
+                               int pos;
+
+                               pos = rte_hash_lookup(t->h_table, key);
+                               if (likely(pos >= 0)) {
+                                       entries[i] = &t->memory[pos
+                                               * t->entry_size];
+                                       pkts_mask_out |= pkt_mask;
+                               }
+                       }
+               }
+       }
+
+       *lookup_hit_mask = pkts_mask_out;
+       RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(t,
+                       n_pkts_in - __builtin_popcountll(pkts_mask_out));
+
+       return 0;
+
+}
+
+static int
+rte_table_hash_cuckoo_stats_read(void *table, struct rte_table_stats *stats,
+       int clear)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+
+       if (stats != NULL)
+               memcpy(stats, &t->stats, sizeof(t->stats));
+
+       if (clear)
+               memset(&t->stats, 0, sizeof(t->stats));
+
+       return 0;
+}
+
+struct rte_table_ops rte_table_hash_cuckoo_dosig_ops = {
+       .f_create = rte_table_hash_cuckoo_create,
+       .f_free = rte_table_hash_cuckoo_free,
+       .f_add = rte_table_hash_cuckoo_entry_add,
+       .f_delete = rte_table_hash_cuckoo_entry_delete,
+       .f_add_bulk = NULL,
+       .f_delete_bulk = NULL,
+       .f_lookup = rte_table_hash_cuckoo_lookup_dosig,
+       .f_stats = rte_table_hash_cuckoo_stats_read,
+};
index b7e000f..08d4d77 100644 (file)
@@ -130,7 +130,7 @@ rte_table_hash_create_key16_lru(void *params,
        /* Check input parameters */
        if ((check_params_create_lru(p) != 0) ||
                ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
-               ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
+               ((sizeof(struct rte_bucket_4_16) % 64) != 0))
                return NULL;
        n_entries_per_bucket = 4;
        key_size = 16;
@@ -344,7 +344,7 @@ rte_table_hash_create_key16_ext(void *params,
        /* Check input parameters */
        if ((check_params_create_ext(p) != 0) ||
                ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
-               ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
+               ((sizeof(struct rte_bucket_4_16) % 64) != 0))
                return NULL;
 
        n_entries_per_bucket = 4;
index a7aba49..161f6b7 100644 (file)
@@ -129,7 +129,7 @@ rte_table_hash_create_key32_lru(void *params,
        /* Check input parameters */
        if ((check_params_create_lru(p) != 0) ||
                ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
-               ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) {
+               ((sizeof(struct rte_bucket_4_32) % 64) != 0)) {
                return NULL;
        }
        n_entries_per_bucket = 4;
@@ -337,7 +337,7 @@ rte_table_hash_create_key32_ext(void *params,
        /* Check input parameters */
        if ((check_params_create_ext(p) != 0) ||
                ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
-               ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0))
+               ((sizeof(struct rte_bucket_4_32) % 64) != 0))
                return NULL;
 
        n_entries_per_bucket = 4;
index e2e2bdc..b04f60d 100644 (file)
@@ -125,7 +125,7 @@ rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size)
        /* Check input parameters */
        if ((check_params_create_lru(p) != 0) ||
                ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
-               ((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0)) {
+               ((sizeof(struct rte_bucket_4_8) % 64) != 0)) {
                return NULL;
        }
        n_entries_per_bucket = 4;
@@ -332,7 +332,7 @@ rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size)
        /* Check input parameters */
        if ((check_params_create_ext(p) != 0) ||
                ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
-               ((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0))
+               ((sizeof(struct rte_bucket_4_8) % 64) != 0))
                return NULL;
 
        n_entries_per_bucket = 4;