#include "ixgbe_ethdev.h"
#include "ixgbe_rxtx.h"
+#include "ixgbe_rxtx_vec_common.h"
#include <tmmintrin.h>
*/
#ifdef RTE_IXGBE_RX_OLFLAGS_ENABLE
-#define VTAG_SHIFT (3)
-
static inline void
-desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
+desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
+ struct rte_mbuf **rx_pkts)
{
__m128i ptype0, ptype1, vtag0, vtag1;
union {
uint64_t dword;
} vol;
- /* pkt type + vlan olflags mask */
- const __m128i pkttype_msk = _mm_set_epi16(
- 0x0000, 0x0000, 0x0000, 0x0000,
- PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT);
-
/* mask everything except rss type */
const __m128i rsstype_msk = _mm_set_epi16(
0x0000, 0x0000, 0x0000, 0x0000,
PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0,
PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0);
+ /* mask everything except vlan present bit */
+ const __m128i vlan_msk = _mm_set_epi16(
+ 0x0000, 0x0000,
+ 0x0000, 0x0000,
+ IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
+ IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP);
+ /* map vlan present (0x8) to ol_flags */
+ const __m128i vlan_map = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, vlan_flags,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0);
+
ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);
ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]);
vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]);
ptype0 = _mm_shuffle_epi8(rss_flags, ptype0);
vtag1 = _mm_unpacklo_epi32(vtag0, vtag1);
- vtag1 = _mm_srli_epi16(vtag1, VTAG_SHIFT);
- vtag1 = _mm_and_si128(vtag1, pkttype_msk);
+ vtag1 = _mm_and_si128(vtag1, vlan_msk);
+ vtag1 = _mm_shuffle_epi8(vlan_map, vtag1);
vtag1 = _mm_or_si128(ptype0, vtag1);
vol.dword = _mm_cvtsi128_si64(vtag1);
0, 0 /* ignore pkt_type field */
);
__m128i dd_check, eop_check;
+ uint8_t vlan_flags;
/* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);
nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
/* Just the act of getting into the function from the application is
- * going to cost about 7 cycles */
+ * going to cost about 7 cycles
+ */
rxdp = rxq->rx_ring + rxq->rx_tail;
_mm_prefetch((const void *)rxdp, _MM_HINT_T0);
/* See if we need to rearm the RX queue - gives the prefetch a bit
- * of time to act */
+ * of time to act
+ */
if (rxq->rxrearm_nb > RTE_IXGBE_RXQ_REARM_THRESH)
ixgbe_rxq_rearm(rxq);
/* Before we start moving massive data around, check to see if
- * there is actually a packet available */
+ * there is actually a packet available
+ */
if (!(rxdp->wb.upper.status_error &
rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
return 0;
);
/* Cache is empty -> need to scan the buffer rings, but first move
- * the next 'n' mbufs into the cache */
+ * the next 'n' mbufs into the cache
+ */
sw_ring = &rxq->sw_ring[rxq->rx_tail];
+ /* ensure these 2 flags are in the lower 8 bits */
+ RTE_BUILD_BUG_ON((PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED) > UINT8_MAX);
+ vlan_flags = rxq->vlan_flags & UINT8_MAX;
+
/* A. load 4 packet in one loop
* [A*. mask out 4 unused dirty field in desc]
* B. copy 4 mbuf point from swring to rx_pkts
_mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
if (split_packet) {
- rte_prefetch0(&rx_pkts[pos]->cacheline1);
- rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
- rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
- rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
+ rte_mbuf_prefetch_part2(rx_pkts[pos]);
+ rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+ rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+ rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
}
/* avoid compiler reorder optimization */
sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
/* set ol_flags with vlan packet type */
- desc_to_olflags_v(descs, &rx_pkts[pos]);
+ desc_to_olflags_v(descs, vlan_flags, &rx_pkts[pos]);
/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
/* the staterr values are not in order, as the count
* count of dd bits doesn't care. However, for end of
* packet tracking, we do care, so shuffle. This also
- * compresses the 32-bit values to 8-bit */
+ * compresses the 32-bit values to 8-bit
+ */
eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
/* store the resulting 32-bit value */
*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
}
-static inline uint16_t
-reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
- uint16_t nb_bufs, uint8_t *split_flags)
-{
- struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
- struct rte_mbuf *start = rxq->pkt_first_seg;
- struct rte_mbuf *end = rxq->pkt_last_seg;
- unsigned pkt_idx, buf_idx;
-
- for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
- if (end != NULL) {
- /* processing a split packet */
- end->next = rx_bufs[buf_idx];
- rx_bufs[buf_idx]->data_len += rxq->crc_len;
-
- start->nb_segs++;
- start->pkt_len += rx_bufs[buf_idx]->data_len;
- end = end->next;
-
- if (!split_flags[buf_idx]) {
- /* it's the last packet of the set */
- start->hash = end->hash;
- start->ol_flags = end->ol_flags;
- /* we need to strip crc for the whole packet */
- start->pkt_len -= rxq->crc_len;
- if (end->data_len > rxq->crc_len)
- end->data_len -= rxq->crc_len;
- else {
- /* free up last mbuf */
- struct rte_mbuf *secondlast = start;
-
- start->nb_segs--;
- while (secondlast->next != end)
- secondlast = secondlast->next;
- secondlast->data_len -= (rxq->crc_len -
- end->data_len);
- secondlast->next = NULL;
- rte_pktmbuf_free_seg(end);
- end = secondlast;
- }
- pkts[pkt_idx++] = start;
- start = end = NULL;
- }
- } else {
- /* not processing a split packet */
- if (!split_flags[buf_idx]) {
- /* not a split packet, save and skip */
- pkts[pkt_idx++] = rx_bufs[buf_idx];
- continue;
- }
- end = start = rx_bufs[buf_idx];
- rx_bufs[buf_idx]->data_len += rxq->crc_len;
- rx_bufs[buf_idx]->pkt_len += rxq->crc_len;
- }
- }
-
- /* save the partial packet for next time */
- rxq->pkt_first_seg = start;
- rxq->pkt_last_seg = end;
- memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts)));
- return pkt_idx;
-}
-
/*
* vPMD receive routine that reassembles scattered packets
*
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
{
int i;
+
for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt)
vtx1(txdp, *pkt, flags);
}
-static inline int __attribute__((always_inline))
-ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
-{
- struct ixgbe_tx_entry_v *txep;
- uint32_t status;
- uint32_t n;
- uint32_t i;
- int nb_free = 0;
- struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
-
- /* check DD bit on threshold descriptor */
- status = txq->tx_ring[txq->tx_next_dd].wb.status;
- if (!(status & IXGBE_ADVTXD_STAT_DD))
- return 0;
-
- n = txq->tx_rs_thresh;
-
- /*
- * first buffer to free from S/W ring is at index
- * tx_next_dd - (tx_rs_thresh-1)
- */
- txep = &txq->sw_ring_v[txq->tx_next_dd - (n - 1)];
- m = __rte_pktmbuf_prefree_seg(txep[0].mbuf);
- if (likely(m != NULL)) {
- free[0] = m;
- nb_free = 1;
- for (i = 1; i < n; i++) {
- m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
- if (likely(m != NULL)) {
- if (likely(m->pool == free[0]->pool))
- free[nb_free++] = m;
- else {
- rte_mempool_put_bulk(free[0]->pool,
- (void *)free, nb_free);
- free[0] = m;
- nb_free = 1;
- }
- }
- }
- rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
- } else {
- for (i = 1; i < n; i++) {
- m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
- if (m != NULL)
- rte_mempool_put(m->pool, m);
- }
- }
-
- /* buffers were freed, update counters */
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
- txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
- if (txq->tx_next_dd >= txq->nb_tx_desc)
- txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
- return txq->tx_rs_thresh;
-}
-
-static inline void __attribute__((always_inline))
-tx_backlog_entry(struct ixgbe_tx_entry_v *txep,
- struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
-{
- int i;
- for (i = 0; i < (int)nb_pkts; ++i)
- txep[i].mbuf = tx_pkts[i];
-}
-
uint16_t
ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
static void __attribute__((cold))
ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq)
{
- unsigned i;
- struct ixgbe_tx_entry_v *txe;
- const uint16_t max_desc = (uint16_t)(txq->nb_tx_desc - 1);
-
- if (txq->sw_ring == NULL || txq->nb_tx_free == max_desc)
- return;
-
- /* release the used mbufs in sw_ring */
- for (i = txq->tx_next_dd - (txq->tx_rs_thresh - 1);
- i != txq->tx_tail;
- i = (i + 1) & max_desc) {
- txe = &txq->sw_ring_v[i];
- rte_pktmbuf_free_seg(txe->mbuf);
- }
- txq->nb_tx_free = max_desc;
-
- /* reset tx_entry */
- for (i = 0; i < txq->nb_tx_desc; i++) {
- txe = &txq->sw_ring_v[i];
- txe->mbuf = NULL;
- }
+ _ixgbe_tx_queue_release_mbufs_vec(txq);
}
void __attribute__((cold))
ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
{
- const unsigned mask = rxq->nb_rx_desc - 1;
- unsigned i;
-
- if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc)
- return;
-
- /* free all mbufs that are valid in the ring */
- for (i = rxq->rx_tail; i != rxq->rxrearm_start; i = (i + 1) & mask)
- rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
- rxq->rxrearm_nb = rxq->nb_rx_desc;
-
- /* set all entries to NULL */
- memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc);
+ _ixgbe_rx_queue_release_mbufs_vec(rxq);
}
static void __attribute__((cold))
ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
{
- if (txq == NULL)
- return;
-
- if (txq->sw_ring != NULL) {
- rte_free(txq->sw_ring_v - 1);
- txq->sw_ring_v = NULL;
- }
+ _ixgbe_tx_free_swring_vec(txq);
}
static void __attribute__((cold))
ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
{
- static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
- struct ixgbe_tx_entry_v *txe = txq->sw_ring_v;
- uint16_t i;
-
- /* Zero out HW ring memory */
- for (i = 0; i < txq->nb_tx_desc; i++)
- txq->tx_ring[i] = zeroed_desc;
-
- /* Initialize SW ring entries */
- for (i = 0; i < txq->nb_tx_desc; i++) {
- volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
- txd->wb.status = IXGBE_TXD_STAT_DD;
- txe[i].mbuf = NULL;
- }
-
- txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
- txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
-
- txq->tx_tail = 0;
- txq->nb_tx_used = 0;
- /*
- * Always allow 1 descriptor to be un-allocated to avoid
- * a H/W race condition
- */
- txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
- txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
- txq->ctx_curr = 0;
- memset((void *)&txq->ctx_cache, 0,
- IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
+ _ixgbe_reset_tx_queue_vec(txq);
}
static const struct ixgbe_txq_ops vec_txq_ops = {
int __attribute__((cold))
ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
{
- uintptr_t p;
- struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
-
- mb_def.nb_segs = 1;
- mb_def.data_off = RTE_PKTMBUF_HEADROOM;
- mb_def.port = rxq->port_id;
- rte_mbuf_refcnt_set(&mb_def, 1);
-
- /* prevent compiler reordering: rearm_data covers previous fields */
- rte_compiler_barrier();
- p = (uintptr_t)&mb_def.rearm_data;
- rxq->mbuf_initializer = *(uint64_t *)p;
- return 0;
+ return ixgbe_rxq_vec_setup_default(rxq);
}
int __attribute__((cold))
ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq)
{
- if (txq->sw_ring_v == NULL)
- return -1;
-
- /* leave the first one for overflow */
- txq->sw_ring_v = txq->sw_ring_v + 1;
- txq->ops = &vec_txq_ops;
-
- return 0;
+ return ixgbe_txq_vec_setup_default(txq, &vec_txq_ops);
}
int __attribute__((cold))
ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
{
-#ifndef RTE_LIBRTE_IEEE1588
- struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
- struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf;
-
-#ifndef RTE_IXGBE_RX_OLFLAGS_ENABLE
- /* whithout rx ol_flags, no VP flag report */
- if (rxmode->hw_vlan_strip != 0 ||
- rxmode->hw_vlan_extend != 0)
- return -1;
-#endif
-
- /* no fdir support */
- if (fconf->mode != RTE_FDIR_MODE_NONE)
- return -1;
-
- /*
- * - no csum error report support
- * - no header split support
- */
- if (rxmode->hw_ip_checksum == 1 ||
- rxmode->header_split == 1)
- return -1;
-
- return 0;
-#else
- RTE_SET_USED(dev);
- return -1;
-#endif
+ return ixgbe_rx_vec_dev_conf_condition_check_default(dev);
}