Imported Upstream version 16.07-rc1
[deb_dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx_vec_sse.c
similarity index 70%
rename from drivers/net/ixgbe/ixgbe_rxtx_vec.c
rename to drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
index 5040704..4f95deb 100644 (file)
@@ -37,6 +37,7 @@
 
 #include "ixgbe_ethdev.h"
 #include "ixgbe_rxtx.h"
+#include "ixgbe_rxtx_vec_common.h"
 
 #include <tmmintrin.h>
 
@@ -140,10 +141,9 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
  */
 #ifdef RTE_IXGBE_RX_OLFLAGS_ENABLE
 
-#define VTAG_SHIFT     (3)
-
 static inline void
-desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
+desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
+       struct rte_mbuf **rx_pkts)
 {
        __m128i ptype0, ptype1, vtag0, vtag1;
        union {
@@ -151,11 +151,6 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
                uint64_t dword;
        } vol;
 
-       /* pkt type + vlan olflags mask */
-       const __m128i pkttype_msk = _mm_set_epi16(
-                       0x0000, 0x0000, 0x0000, 0x0000,
-                       PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT);
-
        /* mask everything except rss type */
        const __m128i rsstype_msk = _mm_set_epi16(
                        0x0000, 0x0000, 0x0000, 0x0000,
@@ -167,6 +162,19 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
                        PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0,
                        PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0);
 
+       /* mask everything except vlan present bit */
+       const __m128i vlan_msk = _mm_set_epi16(
+                       0x0000, 0x0000,
+                       0x0000, 0x0000,
+                       IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
+                       IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP);
+       /* map vlan present (0x8) to ol_flags */
+       const __m128i vlan_map = _mm_set_epi8(
+               0, 0, 0, 0,
+               0, 0, 0, vlan_flags,
+               0, 0, 0, 0,
+               0, 0, 0, 0);
+
        ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);
        ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]);
        vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]);
@@ -177,8 +185,8 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
        ptype0 = _mm_shuffle_epi8(rss_flags, ptype0);
 
        vtag1 = _mm_unpacklo_epi32(vtag0, vtag1);
-       vtag1 = _mm_srli_epi16(vtag1, VTAG_SHIFT);
-       vtag1 = _mm_and_si128(vtag1, pkttype_msk);
+       vtag1 = _mm_and_si128(vtag1, vlan_msk);
+       vtag1 = _mm_shuffle_epi8(vlan_map, vtag1);
 
        vtag1 = _mm_or_si128(ptype0, vtag1);
        vol.dword = _mm_cvtsi128_si64(vtag1);
@@ -220,6 +228,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                                0, 0            /* ignore pkt_type field */
                        );
        __m128i dd_check, eop_check;
+       uint8_t vlan_flags;
 
        /* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
        nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);
@@ -228,18 +237,21 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
        nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
 
        /* Just the act of getting into the function from the application is
-        * going to cost about 7 cycles */
+        * going to cost about 7 cycles
+        */
        rxdp = rxq->rx_ring + rxq->rx_tail;
 
        _mm_prefetch((const void *)rxdp, _MM_HINT_T0);
 
        /* See if we need to rearm the RX queue - gives the prefetch a bit
-        * of time to act */
+        * of time to act
+        */
        if (rxq->rxrearm_nb > RTE_IXGBE_RXQ_REARM_THRESH)
                ixgbe_rxq_rearm(rxq);
 
        /* Before we start moving massive data around, check to see if
-        * there is actually a packet available */
+        * there is actually a packet available
+        */
        if (!(rxdp->wb.upper.status_error &
                                rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
                return 0;
@@ -262,9 +274,14 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                );
 
        /* Cache is empty -> need to scan the buffer rings, but first move
-        * the next 'n' mbufs into the cache */
+        * the next 'n' mbufs into the cache
+        */
        sw_ring = &rxq->sw_ring[rxq->rx_tail];
 
+       /* ensure these 2 flags are in the lower 8 bits */
+       RTE_BUILD_BUG_ON((PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED) > UINT8_MAX);
+       vlan_flags = rxq->vlan_flags & UINT8_MAX;
+
        /* A. load 4 packet in one loop
         * [A*. mask out 4 unused dirty field in desc]
         * B. copy 4 mbuf point from swring to rx_pkts
@@ -302,10 +319,10 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
 
                if (split_packet) {
-                       rte_prefetch0(&rx_pkts[pos]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
                }
 
                /* avoid compiler reorder optimization */
@@ -325,7 +342,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
 
                /* set ol_flags with vlan packet type */
-               desc_to_olflags_v(descs, &rx_pkts[pos]);
+               desc_to_olflags_v(descs, vlan_flags, &rx_pkts[pos]);
 
                /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
                pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
@@ -359,7 +376,8 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                        /* the staterr values are not in order, as the count
                         * count of dd bits doesn't care. However, for end of
                         * packet tracking, we do care, so shuffle. This also
-                        * compresses the 32-bit values to 8-bit */
+                        * compresses the 32-bit values to 8-bit
+                        */
                        eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
                        /* store the resulting 32-bit value */
                        *(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
@@ -414,69 +432,6 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
        return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
-static inline uint16_t
-reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
-               uint16_t nb_bufs, uint8_t *split_flags)
-{
-       struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
-       struct rte_mbuf *start = rxq->pkt_first_seg;
-       struct rte_mbuf *end =  rxq->pkt_last_seg;
-       unsigned pkt_idx, buf_idx;
-
-       for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
-               if (end != NULL) {
-                       /* processing a split packet */
-                       end->next = rx_bufs[buf_idx];
-                       rx_bufs[buf_idx]->data_len += rxq->crc_len;
-
-                       start->nb_segs++;
-                       start->pkt_len += rx_bufs[buf_idx]->data_len;
-                       end = end->next;
-
-                       if (!split_flags[buf_idx]) {
-                               /* it's the last packet of the set */
-                               start->hash = end->hash;
-                               start->ol_flags = end->ol_flags;
-                               /* we need to strip crc for the whole packet */
-                               start->pkt_len -= rxq->crc_len;
-                               if (end->data_len > rxq->crc_len)
-                                       end->data_len -= rxq->crc_len;
-                               else {
-                                       /* free up last mbuf */
-                                       struct rte_mbuf *secondlast = start;
-
-                                       start->nb_segs--;
-                                       while (secondlast->next != end)
-                                               secondlast = secondlast->next;
-                                       secondlast->data_len -= (rxq->crc_len -
-                                                       end->data_len);
-                                       secondlast->next = NULL;
-                                       rte_pktmbuf_free_seg(end);
-                                       end = secondlast;
-                               }
-                               pkts[pkt_idx++] = start;
-                               start = end = NULL;
-                       }
-               } else {
-                       /* not processing a split packet */
-                       if (!split_flags[buf_idx]) {
-                               /* not a split packet, save and skip */
-                               pkts[pkt_idx++] = rx_bufs[buf_idx];
-                               continue;
-                       }
-                       end = start = rx_bufs[buf_idx];
-                       rx_bufs[buf_idx]->data_len += rxq->crc_len;
-                       rx_bufs[buf_idx]->pkt_len += rxq->crc_len;
-               }
-       }
-
-       /* save the partial packet for next time */
-       rxq->pkt_first_seg = start;
-       rxq->pkt_last_seg = end;
-       memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts)));
-       return pkt_idx;
-}
-
 /*
  * vPMD receive routine that reassembles scattered packets
  *
@@ -535,76 +490,11 @@ vtx(volatile union ixgbe_adv_tx_desc *txdp,
                struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
 {
        int i;
+
        for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt)
                vtx1(txdp, *pkt, flags);
 }
 
-static inline int __attribute__((always_inline))
-ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
-{
-       struct ixgbe_tx_entry_v *txep;
-       uint32_t status;
-       uint32_t n;
-       uint32_t i;
-       int nb_free = 0;
-       struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
-
-       /* check DD bit on threshold descriptor */
-       status = txq->tx_ring[txq->tx_next_dd].wb.status;
-       if (!(status & IXGBE_ADVTXD_STAT_DD))
-               return 0;
-
-       n = txq->tx_rs_thresh;
-
-       /*
-        * first buffer to free from S/W ring is at index
-        * tx_next_dd - (tx_rs_thresh-1)
-        */
-       txep = &txq->sw_ring_v[txq->tx_next_dd - (n - 1)];
-       m = __rte_pktmbuf_prefree_seg(txep[0].mbuf);
-       if (likely(m != NULL)) {
-               free[0] = m;
-               nb_free = 1;
-               for (i = 1; i < n; i++) {
-                       m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
-                       if (likely(m != NULL)) {
-                               if (likely(m->pool == free[0]->pool))
-                                       free[nb_free++] = m;
-                               else {
-                                       rte_mempool_put_bulk(free[0]->pool,
-                                                       (void *)free, nb_free);
-                                       free[0] = m;
-                                       nb_free = 1;
-                               }
-                       }
-               }
-               rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
-       } else {
-               for (i = 1; i < n; i++) {
-                       m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
-                       if (m != NULL)
-                               rte_mempool_put(m->pool, m);
-               }
-       }
-
-       /* buffers were freed, update counters */
-       txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-       txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-       if (txq->tx_next_dd >= txq->nb_tx_desc)
-               txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-       return txq->tx_rs_thresh;
-}
-
-static inline void __attribute__((always_inline))
-tx_backlog_entry(struct ixgbe_tx_entry_v *txep,
-                struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
-{
-       int i;
-       for (i = 0; i < (int)nb_pkts; ++i)
-               txep[i].mbuf = tx_pkts[i];
-}
-
 uint16_t
 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
                       uint16_t nb_pkts)
@@ -675,91 +565,25 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 static void __attribute__((cold))
 ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq)
 {
-       unsigned i;
-       struct ixgbe_tx_entry_v *txe;
-       const uint16_t max_desc = (uint16_t)(txq->nb_tx_desc - 1);
-
-       if (txq->sw_ring == NULL || txq->nb_tx_free == max_desc)
-               return;
-
-       /* release the used mbufs in sw_ring */
-       for (i = txq->tx_next_dd - (txq->tx_rs_thresh - 1);
-            i != txq->tx_tail;
-            i = (i + 1) & max_desc) {
-               txe = &txq->sw_ring_v[i];
-               rte_pktmbuf_free_seg(txe->mbuf);
-       }
-       txq->nb_tx_free = max_desc;
-
-       /* reset tx_entry */
-       for (i = 0; i < txq->nb_tx_desc; i++) {
-               txe = &txq->sw_ring_v[i];
-               txe->mbuf = NULL;
-       }
+       _ixgbe_tx_queue_release_mbufs_vec(txq);
 }
 
 void __attribute__((cold))
 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
 {
-       const unsigned mask = rxq->nb_rx_desc - 1;
-       unsigned i;
-
-       if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc)
-               return;
-
-       /* free all mbufs that are valid in the ring */
-       for (i = rxq->rx_tail; i != rxq->rxrearm_start; i = (i + 1) & mask)
-               rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
-       rxq->rxrearm_nb = rxq->nb_rx_desc;
-
-       /* set all entries to NULL */
-       memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc);
+       _ixgbe_rx_queue_release_mbufs_vec(rxq);
 }
 
 static void __attribute__((cold))
 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
 {
-       if (txq == NULL)
-               return;
-
-       if (txq->sw_ring != NULL) {
-               rte_free(txq->sw_ring_v - 1);
-               txq->sw_ring_v = NULL;
-       }
+       _ixgbe_tx_free_swring_vec(txq);
 }
 
 static void __attribute__((cold))
 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
 {
-       static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
-       struct ixgbe_tx_entry_v *txe = txq->sw_ring_v;
-       uint16_t i;
-
-       /* Zero out HW ring memory */
-       for (i = 0; i < txq->nb_tx_desc; i++)
-               txq->tx_ring[i] = zeroed_desc;
-
-       /* Initialize SW ring entries */
-       for (i = 0; i < txq->nb_tx_desc; i++) {
-               volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
-               txd->wb.status = IXGBE_TXD_STAT_DD;
-               txe[i].mbuf = NULL;
-       }
-
-       txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-       txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
-
-       txq->tx_tail = 0;
-       txq->nb_tx_used = 0;
-       /*
-        * Always allow 1 descriptor to be un-allocated to avoid
-        * a H/W race condition
-        */
-       txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
-       txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
-       txq->ctx_curr = 0;
-       memset((void *)&txq->ctx_cache, 0,
-               IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
+       _ixgbe_reset_tx_queue_vec(txq);
 }
 
 static const struct ixgbe_txq_ops vec_txq_ops = {
@@ -771,63 +595,17 @@ static const struct ixgbe_txq_ops vec_txq_ops = {
 int __attribute__((cold))
 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
 {
-       uintptr_t p;
-       struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
-
-       mb_def.nb_segs = 1;
-       mb_def.data_off = RTE_PKTMBUF_HEADROOM;
-       mb_def.port = rxq->port_id;
-       rte_mbuf_refcnt_set(&mb_def, 1);
-
-       /* prevent compiler reordering: rearm_data covers previous fields */
-       rte_compiler_barrier();
-       p = (uintptr_t)&mb_def.rearm_data;
-       rxq->mbuf_initializer = *(uint64_t *)p;
-       return 0;
+       return ixgbe_rxq_vec_setup_default(rxq);
 }
 
 int __attribute__((cold))
 ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq)
 {
-       if (txq->sw_ring_v == NULL)
-               return -1;
-
-       /* leave the first one for overflow */
-       txq->sw_ring_v = txq->sw_ring_v + 1;
-       txq->ops = &vec_txq_ops;
-
-       return 0;
+       return ixgbe_txq_vec_setup_default(txq, &vec_txq_ops);
 }
 
 int __attribute__((cold))
 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
 {
-#ifndef RTE_LIBRTE_IEEE1588
-       struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
-       struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf;
-
-#ifndef RTE_IXGBE_RX_OLFLAGS_ENABLE
-       /* whithout rx ol_flags, no VP flag report */
-       if (rxmode->hw_vlan_strip != 0 ||
-           rxmode->hw_vlan_extend != 0)
-               return -1;
-#endif
-
-       /* no fdir support */
-       if (fconf->mode != RTE_FDIR_MODE_NONE)
-               return -1;
-
-       /*
-        * - no csum error report support
-        * - no header split support
-        */
-       if (rxmode->hw_ip_checksum == 1 ||
-           rxmode->header_split == 1)
-               return -1;
-
-       return 0;
-#else
-       RTE_SET_USED(dev);
-       return -1;
-#endif
+       return ixgbe_rx_vec_dev_conf_condition_check_default(dev);
 }