New upstream version 18.02
[deb_dpdk.git] / drivers / net / dpaa2 / dpaa2_rxtx.c
index 8ecd238..183293c 100644 (file)
-/*-
- *   BSD LICENSE
+/* SPDX-License-Identifier: BSD-3-Clause
  *
  *   Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved.
- *   Copyright 2016 NXP.
+ *   Copyright 2016 NXP
  *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Freescale Semiconductor, Inc nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <time.h>
 #include <net/if.h>
 
 #include <rte_mbuf.h>
-#include <rte_ethdev.h>
+#include <rte_ethdev_driver.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_string_fns.h>
 #include <rte_dev.h>
 
+#include <rte_fslmc.h>
 #include <fslmc_logs.h>
 #include <fslmc_vfio.h>
 #include <dpaa2_hw_pvt.h>
 #include <dpaa2_hw_dpio.h>
 #include <dpaa2_hw_mempool.h>
+#include <dpaa2_eventdev.h>
 
 #include "dpaa2_ethdev.h"
 #include "base/dpaa2_hw_dpni_annot.h"
 
+#define DPAA2_MBUF_TO_CONTIG_FD(_mbuf, _fd, _bpid)  do { \
+       DPAA2_SET_FD_ADDR(_fd, DPAA2_MBUF_VADDR_TO_IOVA(_mbuf)); \
+       DPAA2_SET_FD_LEN(_fd, _mbuf->data_len); \
+       DPAA2_SET_ONLY_FD_BPID(_fd, _bpid); \
+       DPAA2_SET_FD_OFFSET(_fd, _mbuf->data_off); \
+       DPAA2_SET_FD_ASAL(_fd, DPAA2_ASAL_VAL); \
+} while (0)
+
+static inline void __attribute__((hot))
+dpaa2_dev_rx_parse_frc(struct rte_mbuf *m, uint16_t frc)
+{
+       PMD_RX_LOG(DEBUG, "frc = 0x%x   ", frc);
+
+       m->packet_type = RTE_PTYPE_UNKNOWN;
+       switch (frc) {
+       case DPAA2_PKT_TYPE_ETHER:
+               m->packet_type = RTE_PTYPE_L2_ETHER;
+               break;
+       case DPAA2_PKT_TYPE_IPV4:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                       RTE_PTYPE_L3_IPV4;
+               break;
+       case DPAA2_PKT_TYPE_IPV6:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                       RTE_PTYPE_L3_IPV6;
+               break;
+       case DPAA2_PKT_TYPE_IPV4_EXT:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                       RTE_PTYPE_L3_IPV4_EXT;
+               break;
+       case DPAA2_PKT_TYPE_IPV6_EXT:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                       RTE_PTYPE_L3_IPV6_EXT;
+               break;
+       case DPAA2_PKT_TYPE_IPV4_TCP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                       RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP;
+               break;
+       case DPAA2_PKT_TYPE_IPV6_TCP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                       RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP;
+               break;
+       case DPAA2_PKT_TYPE_IPV4_UDP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                       RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP;
+               break;
+       case DPAA2_PKT_TYPE_IPV6_UDP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                       RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP;
+               break;
+       case DPAA2_PKT_TYPE_IPV4_SCTP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                       RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP;
+               break;
+       case DPAA2_PKT_TYPE_IPV6_SCTP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                       RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP;
+               break;
+       case DPAA2_PKT_TYPE_IPV4_ICMP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                       RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_ICMP;
+               break;
+       case DPAA2_PKT_TYPE_IPV6_ICMP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                       RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_ICMP;
+               break;
+       case DPAA2_PKT_TYPE_VLAN_1:
+       case DPAA2_PKT_TYPE_VLAN_2:
+               m->ol_flags |= PKT_RX_VLAN;
+               break;
+       /* More switch cases can be added */
+       /* TODO: Add handling for checksum error check from FRC */
+       default:
+               m->packet_type = RTE_PTYPE_UNKNOWN;
+       }
+}
+
 static inline uint32_t __attribute__((hot))
-dpaa2_dev_rx_parse(uint64_t hw_annot_addr)
+dpaa2_dev_rx_parse_slow(uint64_t hw_annot_addr)
 {
        uint32_t pkt_type = RTE_PTYPE_UNKNOWN;
        struct dpaa2_annot_hdr *annotation =
                        (struct dpaa2_annot_hdr *)hw_annot_addr;
 
        PMD_RX_LOG(DEBUG, "annotation = 0x%lx   ", annotation->word4);
-
        if (BIT_ISSET_AT_POS(annotation->word3, L2_ARP_PRESENT)) {
                pkt_type = RTE_PTYPE_L2_ETHER_ARP;
                goto parse_done;
@@ -114,21 +166,48 @@ parse_done:
        return pkt_type;
 }
 
-static inline void __attribute__((hot))
-dpaa2_dev_rx_offload(uint64_t hw_annot_addr, struct rte_mbuf *mbuf)
+static inline uint32_t __attribute__((hot))
+dpaa2_dev_rx_parse(struct rte_mbuf *mbuf, uint64_t hw_annot_addr)
 {
        struct dpaa2_annot_hdr *annotation =
-               (struct dpaa2_annot_hdr *)hw_annot_addr;
+                       (struct dpaa2_annot_hdr *)hw_annot_addr;
+
+       PMD_RX_LOG(DEBUG, "annotation = 0x%lx   ", annotation->word4);
 
+       /* Check offloads first */
        if (BIT_ISSET_AT_POS(annotation->word3,
                             L2_VLAN_1_PRESENT | L2_VLAN_N_PRESENT))
                mbuf->ol_flags |= PKT_RX_VLAN;
 
        if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L3CE))
                mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
-
-       if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L4CE))
+       else if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L4CE))
                mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
+
+       /* Return some common types from parse processing */
+       switch (annotation->word4) {
+       case DPAA2_L3_IPv4:
+               return RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4;
+       case DPAA2_L3_IPv6:
+               return  RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6;
+       case DPAA2_L3_IPv4_TCP:
+               return  RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4 |
+                               RTE_PTYPE_L4_TCP;
+       case DPAA2_L3_IPv4_UDP:
+               return  RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4 |
+                               RTE_PTYPE_L4_UDP;
+       case DPAA2_L3_IPv6_TCP:
+               return  RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6 |
+                               RTE_PTYPE_L4_TCP;
+       case DPAA2_L3_IPv6_UDP:
+               return  RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6 |
+                               RTE_PTYPE_L4_UDP;
+       default:
+               PMD_RX_LOG(DEBUG, "Slow parse the parsing results\n");
+               break;
+       }
+
+       return dpaa2_dev_rx_parse_slow(hw_annot_addr);
 }
 
 static inline struct rte_mbuf *__attribute__((hot))
@@ -159,13 +238,14 @@ eth_sg_fd_to_mbuf(const struct qbman_fd *fd)
        first_seg->pkt_len = DPAA2_GET_FD_LEN(fd);
        first_seg->nb_segs = 1;
        first_seg->next = NULL;
-
-       first_seg->packet_type = dpaa2_dev_rx_parse(
+       if (dpaa2_svr_family == SVR_LX2160A)
+               dpaa2_dev_rx_parse_frc(first_seg,
+                               DPAA2_GET_FD_FRC_PARSE_SUM(fd));
+       else
+               first_seg->packet_type = dpaa2_dev_rx_parse(first_seg,
                         (uint64_t)DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd))
                         + DPAA2_FD_PTA_SIZE);
-       dpaa2_dev_rx_offload((uint64_t)DPAA2_IOVA_TO_VADDR(
-                       DPAA2_GET_FD_ADDR(fd)) +
-                       DPAA2_FD_PTA_SIZE, first_seg);
+
        rte_mbuf_refcnt_set(first_seg, 1);
        cur_seg = first_seg;
        while (!DPAA2_SG_IS_FINAL(sge)) {
@@ -206,20 +286,22 @@ eth_fd_to_mbuf(const struct qbman_fd *fd)
        mbuf->data_off = DPAA2_GET_FD_OFFSET(fd);
        mbuf->data_len = DPAA2_GET_FD_LEN(fd);
        mbuf->pkt_len = mbuf->data_len;
+       mbuf->next = NULL;
+       rte_mbuf_refcnt_set(mbuf, 1);
 
        /* Parse the packet */
-       /* parse results are after the private - sw annotation area */
-       mbuf->packet_type = dpaa2_dev_rx_parse(
+       /* parse results for LX2 are there in FRC field of FD.
+        * For other DPAA2 platforms , parse results are after
+        * the private - sw annotation area
+        */
+
+       if (dpaa2_svr_family == SVR_LX2160A)
+               dpaa2_dev_rx_parse_frc(mbuf, DPAA2_GET_FD_FRC_PARSE_SUM(fd));
+       else
+               mbuf->packet_type = dpaa2_dev_rx_parse(mbuf,
                        (uint64_t)DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd))
                         + DPAA2_FD_PTA_SIZE);
 
-       dpaa2_dev_rx_offload((uint64_t)DPAA2_IOVA_TO_VADDR(
-                            DPAA2_GET_FD_ADDR(fd)) +
-                            DPAA2_FD_PTA_SIZE, mbuf);
-
-       mbuf->next = NULL;
-       rte_mbuf_refcnt_set(mbuf, 1);
-
        PMD_RX_LOG(DEBUG, "to mbuf - mbuf =%p, mbuf->buf_addr =%p, off = %d,"
                "fd_off=%d fd =%lx, meta = %d  bpid =%d, len=%d\n",
                mbuf, mbuf->buf_addr, mbuf->data_off,
@@ -238,9 +320,11 @@ eth_mbuf_to_sg_fd(struct rte_mbuf *mbuf,
        struct qbman_sge *sgt, *sge = NULL;
        int i;
 
-       /* First Prepare FD to be transmited*/
-       /* Resetting the buffer pool id and offset field*/
-       fd->simple.bpid_offset = 0;
+       if (unlikely(mbuf->ol_flags & PKT_TX_VLAN_PKT)) {
+               int ret = rte_vlan_insert(&mbuf);
+               if (ret)
+                       return ret;
+       }
 
        temp = rte_pktmbuf_alloc(mbuf->pool);
        if (temp == NULL) {
@@ -250,8 +334,8 @@ eth_mbuf_to_sg_fd(struct rte_mbuf *mbuf,
 
        DPAA2_SET_FD_ADDR(fd, DPAA2_MBUF_VADDR_TO_IOVA(temp));
        DPAA2_SET_FD_LEN(fd, mbuf->pkt_len);
+       DPAA2_SET_ONLY_FD_BPID(fd, bpid);
        DPAA2_SET_FD_OFFSET(fd, temp->data_off);
-       DPAA2_SET_FD_BPID(fd, bpid);
        DPAA2_SET_FD_ASAL(fd, DPAA2_ASAL_VAL);
        DPAA2_FD_SET_FORMAT(fd, qbman_fd_sg);
        /*Set Scatter gather table and Scatter gather entries*/
@@ -308,14 +392,14 @@ static void __attribute__ ((noinline)) __attribute__((hot))
 eth_mbuf_to_fd(struct rte_mbuf *mbuf,
               struct qbman_fd *fd, uint16_t bpid)
 {
-       /*Resetting the buffer pool id and offset field*/
-       fd->simple.bpid_offset = 0;
+       if (unlikely(mbuf->ol_flags & PKT_TX_VLAN_PKT)) {
+               if (rte_vlan_insert(&mbuf)) {
+                       rte_pktmbuf_free(mbuf);
+                       return;
+               }
+       }
 
-       DPAA2_SET_FD_ADDR(fd, DPAA2_MBUF_VADDR_TO_IOVA(mbuf));
-       DPAA2_SET_FD_LEN(fd, mbuf->data_len);
-       DPAA2_SET_FD_BPID(fd, bpid);
-       DPAA2_SET_FD_OFFSET(fd, mbuf->data_off);
-       DPAA2_SET_FD_ASAL(fd, DPAA2_ASAL_VAL);
+       DPAA2_MBUF_TO_CONTIG_FD(mbuf, fd, bpid);
 
        PMD_TX_LOG(DEBUG, "mbuf =%p, mbuf->buf_addr =%p, off = %d,"
                "fd_off=%d fd =%lx, meta = %d  bpid =%d, len=%d\n",
@@ -347,6 +431,12 @@ eth_copy_mbuf_to_fd(struct rte_mbuf *mbuf,
        struct rte_mbuf *m;
        void *mb = NULL;
 
+       if (unlikely(mbuf->ol_flags & PKT_TX_VLAN_PKT)) {
+               int ret = rte_vlan_insert(&mbuf);
+               if (ret)
+                       return ret;
+       }
+
        if (rte_dpaa2_mbuf_alloc_bulk(
                rte_dpaa2_bpid_info[bpid].bp_list->mp, &mb, 1)) {
                PMD_TX_LOG(WARNING, "Unable to allocated DPAA2 buffer");
@@ -363,14 +453,7 @@ eth_copy_mbuf_to_fd(struct rte_mbuf *mbuf,
        m->packet_type = mbuf->packet_type;
        m->tx_offload = mbuf->tx_offload;
 
-       /*Resetting the buffer pool id and offset field*/
-       fd->simple.bpid_offset = 0;
-
-       DPAA2_SET_FD_ADDR(fd, DPAA2_MBUF_VADDR_TO_IOVA(m));
-       DPAA2_SET_FD_LEN(fd, mbuf->data_len);
-       DPAA2_SET_FD_BPID(fd, bpid);
-       DPAA2_SET_FD_OFFSET(fd, mbuf->data_off);
-       DPAA2_SET_FD_ASAL(fd, DPAA2_ASAL_VAL);
+       DPAA2_MBUF_TO_CONTIG_FD(m, fd, bpid);
 
        PMD_TX_LOG(DEBUG, " mbuf %p BMAN buf addr %p",
                   (void *)mbuf, mbuf->buf_addr);
@@ -390,12 +473,12 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
        /* Function receive frames for a given device and VQ*/
        struct dpaa2_queue *dpaa2_q = (struct dpaa2_queue *)queue;
-       struct qbman_result *dq_storage;
+       struct qbman_result *dq_storage, *dq_storage1 = NULL;
        uint32_t fqid = dpaa2_q->fqid;
        int ret, num_rx = 0;
-       uint8_t is_last = 0, status;
+       uint8_t pending, status;
        struct qbman_swp *swp;
-       const struct qbman_fd *fd[DPAA2_DQRR_RING_SIZE];
+       const struct qbman_fd *fd, *next_fd;
        struct qbman_pull_desc pulldesc;
        struct queue_storage_info_t *q_storage = dpaa2_q->q_storage;
        struct rte_eth_dev *dev = dpaa2_q->dev;
@@ -408,13 +491,14 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                }
        }
        swp = DPAA2_PER_LCORE_PORTAL;
-       if (!q_storage->active_dqs) {
+       if (unlikely(!q_storage->active_dqs)) {
                q_storage->toggle = 0;
                dq_storage = q_storage->dq_storage[q_storage->toggle];
+               q_storage->last_num_pkts = (nb_pkts > DPAA2_DQRR_RING_SIZE) ?
+                                              DPAA2_DQRR_RING_SIZE : nb_pkts;
                qbman_pull_desc_clear(&pulldesc);
                qbman_pull_desc_set_numframes(&pulldesc,
-                                             (nb_pkts > DPAA2_DQRR_RING_SIZE) ?
-                                              DPAA2_DQRR_RING_SIZE : nb_pkts);
+                                             q_storage->last_num_pkts);
                qbman_pull_desc_set_fq(&pulldesc, fqid);
                qbman_pull_desc_set_storage(&pulldesc, dq_storage,
                        (dma_addr_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
@@ -437,7 +521,22 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
                set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage);
        }
+
        dq_storage = q_storage->active_dqs;
+       rte_prefetch0((void *)((uint64_t)(dq_storage)));
+       rte_prefetch0((void *)((uint64_t)(dq_storage + 1)));
+
+       /* Prepare next pull descriptor. This will give space for the
+        * prefething done on DQRR entries
+        */
+       q_storage->toggle ^= 1;
+       dq_storage1 = q_storage->dq_storage[q_storage->toggle];
+       qbman_pull_desc_clear(&pulldesc);
+       qbman_pull_desc_set_numframes(&pulldesc, DPAA2_DQRR_RING_SIZE);
+       qbman_pull_desc_set_fq(&pulldesc, fqid);
+       qbman_pull_desc_set_storage(&pulldesc, dq_storage1,
+               (dma_addr_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1);
+
        /* Check if the previous issued command is completed.
         * Also seems like the SWP is shared between the Ethernet Driver
         * and the SEC driver.
@@ -446,33 +545,37 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                ;
        if (dq_storage == get_swp_active_dqs(q_storage->active_dpio_id))
                clear_swp_active_dqs(q_storage->active_dpio_id);
-       while (!is_last) {
+
+       pending = 1;
+
+       do {
                /* Loop until the dq_storage is updated with
                 * new token by QBMAN
                 */
                while (!qbman_check_new_result(dq_storage))
                        ;
-               rte_prefetch0((void *)((uint64_t)(dq_storage + 1)));
+               rte_prefetch0((void *)((uint64_t)(dq_storage + 2)));
                /* Check whether Last Pull command is Expired and
                 * setting Condition for Loop termination
                 */
                if (qbman_result_DQ_is_pull_complete(dq_storage)) {
-                       is_last = 1;
+                       pending = 0;
                        /* Check for valid frame. */
-                       status = (uint8_t)qbman_result_DQ_flags(dq_storage);
+                       status = qbman_result_DQ_flags(dq_storage);
                        if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0))
                                continue;
                }
-               fd[num_rx] = qbman_result_DQ_fd(dq_storage);
+               fd = qbman_result_DQ_fd(dq_storage);
 
+               next_fd = qbman_result_DQ_fd(dq_storage + 1);
                /* Prefetch Annotation address for the parse results */
-               rte_prefetch0((void *)((uint64_t)DPAA2_GET_FD_ADDR(fd[num_rx])
+               rte_prefetch0((void *)(DPAA2_GET_FD_ADDR(next_fd)
                                + DPAA2_FD_PTA_SIZE + 16));
 
-               if (unlikely(DPAA2_FD_GET_FORMAT(fd[num_rx]) == qbman_fd_sg))
-                       bufs[num_rx] = eth_sg_fd_to_mbuf(fd[num_rx]);
+               if (unlikely(DPAA2_FD_GET_FORMAT(fd) == qbman_fd_sg))
+                       bufs[num_rx] = eth_sg_fd_to_mbuf(fd);
                else
-                       bufs[num_rx] = eth_fd_to_mbuf(fd[num_rx]);
+                       bufs[num_rx] = eth_fd_to_mbuf(fd);
                bufs[num_rx]->port = dev->data->port_id;
 
                if (dev->data->dev_conf.rxmode.hw_vlan_strip)
@@ -480,7 +583,7 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
                dq_storage++;
                num_rx++;
-       }
+       } while (pending);
 
        if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
                while (!qbman_check_command_complete(
@@ -488,14 +591,7 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                        ;
                clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
        }
-       q_storage->toggle ^= 1;
-       dq_storage = q_storage->dq_storage[q_storage->toggle];
-       qbman_pull_desc_clear(&pulldesc);
-       qbman_pull_desc_set_numframes(&pulldesc, DPAA2_DQRR_RING_SIZE);
-       qbman_pull_desc_set_fq(&pulldesc, fqid);
-       qbman_pull_desc_set_storage(&pulldesc, dq_storage,
-                       (dma_addr_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
-       /* Issue a volatile dequeue command. */
+       /* issue a volatile dequeue command for next pull */
        while (1) {
                if (qbman_swp_pull(swp, &pulldesc)) {
                        PMD_RX_LOG(WARNING, "VDQ command is not issued."
@@ -504,13 +600,12 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                }
                break;
        }
-       q_storage->active_dqs = dq_storage;
+       q_storage->active_dqs = dq_storage1;
        q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
-       set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage);
+       set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
 
        dpaa2_q->rx_pkts += num_rx;
 
-       /* Return the total number of packets received to DPAA2 app */
        return num_rx;
 }
 
@@ -521,7 +616,8 @@ dpaa2_dev_process_parallel_event(struct qbman_swp *swp,
                                 struct dpaa2_queue *rxq,
                                 struct rte_event *ev)
 {
-       ev->mbuf = eth_fd_to_mbuf(fd);
+       rte_prefetch0((void *)(DPAA2_GET_FD_ADDR(fd) +
+               DPAA2_FD_PTA_SIZE + 16));
 
        ev->flow_id = rxq->ev.flow_id;
        ev->sub_event_type = rxq->ev.sub_event_type;
@@ -531,9 +627,40 @@ dpaa2_dev_process_parallel_event(struct qbman_swp *swp,
        ev->queue_id = rxq->ev.queue_id;
        ev->priority = rxq->ev.priority;
 
+       ev->mbuf = eth_fd_to_mbuf(fd);
+
        qbman_swp_dqrr_consume(swp, dq);
 }
 
+void __attribute__((hot))
+dpaa2_dev_process_atomic_event(struct qbman_swp *swp __attribute__((unused)),
+                              const struct qbman_fd *fd,
+                              const struct qbman_result *dq,
+                              struct dpaa2_queue *rxq,
+                              struct rte_event *ev)
+{
+       uint8_t dqrr_index;
+
+       rte_prefetch0((void *)(DPAA2_GET_FD_ADDR(fd) +
+               DPAA2_FD_PTA_SIZE + 16));
+
+       ev->flow_id = rxq->ev.flow_id;
+       ev->sub_event_type = rxq->ev.sub_event_type;
+       ev->event_type = RTE_EVENT_TYPE_ETHDEV;
+       ev->op = RTE_EVENT_OP_NEW;
+       ev->sched_type = rxq->ev.sched_type;
+       ev->queue_id = rxq->ev.queue_id;
+       ev->priority = rxq->ev.priority;
+
+       ev->mbuf = eth_fd_to_mbuf(fd);
+
+       dqrr_index = qbman_get_dqrr_idx(dq);
+       ev->mbuf->seqn = dqrr_index + 1;
+       DPAA2_PER_LCORE_DQRR_SIZE++;
+       DPAA2_PER_LCORE_DQRR_HELD |= 1 << dqrr_index;
+       DPAA2_PER_LCORE_DQRR_MBUF(dqrr_index) = ev->mbuf;
+}
+
 /*
  * Callback to handle sending packets through WRIOP based interface
  */
@@ -554,6 +681,7 @@ dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
        uint16_t bpid;
        struct rte_eth_dev *dev = dpaa2_q->dev;
        struct dpaa2_dev_priv *priv = dev->data->dev_private;
+       uint32_t flags[MAX_TX_RING_SLOTS] = {0};
 
        if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
                ret = dpaa2_affine_qbman_swp();
@@ -572,7 +700,6 @@ dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
        qbman_eq_desc_set_response(&eqdesc, 0, 0);
        qbman_eq_desc_set_qd(&eqdesc, priv->qdid,
                             dpaa2_q->flow_id, dpaa2_q->tc_index);
-
        /*Clear the unused FD fields before sending*/
        while (nb_pkts) {
                /*Check if the queue is congested*/
@@ -587,11 +714,39 @@ dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                frames_to_send = (nb_pkts >> 3) ? MAX_TX_RING_SLOTS : nb_pkts;
 
                for (loop = 0; loop < frames_to_send; loop++) {
+                       if ((*bufs)->seqn) {
+                               uint8_t dqrr_index = (*bufs)->seqn - 1;
+
+                               flags[loop] = QBMAN_ENQUEUE_FLAG_DCA |
+                                               dqrr_index;
+                               DPAA2_PER_LCORE_DQRR_SIZE--;
+                               DPAA2_PER_LCORE_DQRR_HELD &= ~(1 << dqrr_index);
+                               (*bufs)->seqn = DPAA2_INVALID_MBUF_SEQN;
+                       }
+
                        fd_arr[loop].simple.frc = 0;
                        DPAA2_RESET_FD_CTRL((&fd_arr[loop]));
                        DPAA2_SET_FD_FLC((&fd_arr[loop]), NULL);
-                       if (RTE_MBUF_DIRECT(*bufs)) {
+                       if (likely(RTE_MBUF_DIRECT(*bufs))) {
                                mp = (*bufs)->pool;
+                               /* Check the basic scenario and set
+                                * the FD appropriately here itself.
+                                */
+                               if (likely(mp && mp->ops_index ==
+                                   priv->bp_list->dpaa2_ops_index &&
+                                   (*bufs)->nb_segs == 1 &&
+                                   rte_mbuf_refcnt_read((*bufs)) == 1)) {
+                                       if (unlikely((*bufs)->ol_flags
+                                               & PKT_TX_VLAN_PKT)) {
+                                               ret = rte_vlan_insert(bufs);
+                                               if (ret)
+                                                       goto send_n_return;
+                                       }
+                                       DPAA2_MBUF_TO_CONTIG_FD((*bufs),
+                                       &fd_arr[loop], mempool_to_bpid(mp));
+                                       bufs++;
+                                       continue;
+                               }
                        } else {
                                mi = rte_mbuf_from_indirect(*bufs);
                                mp = mi->pool;
@@ -636,13 +791,14 @@ dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                loop = 0;
                while (loop < frames_to_send) {
                        loop += qbman_swp_enqueue_multiple(swp, &eqdesc,
-                                       &fd_arr[loop], frames_to_send - loop);
+                                       &fd_arr[loop], &flags[loop],
+                                       frames_to_send - loop);
                }
 
                num_tx += frames_to_send;
-               dpaa2_q->tx_pkts += frames_to_send;
                nb_pkts -= frames_to_send;
        }
+       dpaa2_q->tx_pkts += num_tx;
        return num_tx;
 
 send_n_return:
@@ -652,12 +808,14 @@ send_n_return:
 
                while (i < loop) {
                        i += qbman_swp_enqueue_multiple(swp, &eqdesc,
-                                                       &fd_arr[i], loop - i);
+                                                       &fd_arr[i],
+                                                       &flags[loop],
+                                                       loop - i);
                }
                num_tx += loop;
-               dpaa2_q->tx_pkts += loop;
        }
 skip_tx:
+       dpaa2_q->tx_pkts += num_tx;
        return num_tx;
 }