4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5 * Copyright 2014 6WIND S.A.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK ( \
89 PKT_TX_OUTER_IP_CKSUM)
91 static inline struct rte_mbuf *
92 rte_rxmbuf_alloc(struct rte_mempool *mp)
96 m = __rte_mbuf_raw_alloc(mp);
97 __rte_mbuf_sanity_check_raw(m, 0);
103 #define RTE_PMD_USE_PREFETCH
106 #ifdef RTE_PMD_USE_PREFETCH
108 * Prefetch a cache line into all cache levels.
110 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
112 #define rte_ixgbe_prefetch(p) do {} while (0)
115 /*********************************************************************
119 **********************************************************************/
122 * Check for descriptors with their DD bit set and free mbufs.
123 * Return the total number of buffers freed.
125 static inline int __attribute__((always_inline))
126 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
128 struct ixgbe_tx_entry *txep;
131 struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
133 /* check DD bit on threshold descriptor */
134 status = txq->tx_ring[txq->tx_next_dd].wb.status;
135 if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
139 * first buffer to free from S/W ring is at index
140 * tx_next_dd - (tx_rs_thresh-1)
142 txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
144 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
145 /* free buffers one at a time */
146 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
149 if (unlikely(m == NULL))
152 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
153 (nb_free > 0 && m->pool != free[0]->pool)) {
154 rte_mempool_put_bulk(free[0]->pool,
155 (void **)free, nb_free);
163 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
165 /* buffers were freed, update counters */
166 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
167 txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
168 if (txq->tx_next_dd >= txq->nb_tx_desc)
169 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
171 return txq->tx_rs_thresh;
174 /* Populate 4 descriptors with data from 4 mbufs */
176 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
178 uint64_t buf_dma_addr;
182 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
183 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
184 pkt_len = (*pkts)->data_len;
186 /* write data to descriptor */
187 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
189 txdp->read.cmd_type_len =
190 rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
192 txdp->read.olinfo_status =
193 rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
195 rte_prefetch0(&(*pkts)->pool);
199 /* Populate 1 descriptor with data from 1 mbuf */
201 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
203 uint64_t buf_dma_addr;
206 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
207 pkt_len = (*pkts)->data_len;
209 /* write data to descriptor */
210 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
211 txdp->read.cmd_type_len =
212 rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
213 txdp->read.olinfo_status =
214 rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
215 rte_prefetch0(&(*pkts)->pool);
219 * Fill H/W descriptor ring with mbuf data.
220 * Copy mbuf pointers to the S/W ring.
223 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
226 volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
227 struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
228 const int N_PER_LOOP = 4;
229 const int N_PER_LOOP_MASK = N_PER_LOOP-1;
230 int mainpart, leftover;
234 * Process most of the packets in chunks of N pkts. Any
235 * leftover packets will get processed one at a time.
237 mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
238 leftover = (nb_pkts & ((uint32_t) N_PER_LOOP_MASK));
239 for (i = 0; i < mainpart; i += N_PER_LOOP) {
240 /* Copy N mbuf pointers to the S/W ring */
241 for (j = 0; j < N_PER_LOOP; ++j) {
242 (txep + i + j)->mbuf = *(pkts + i + j);
244 tx4(txdp + i, pkts + i);
247 if (unlikely(leftover > 0)) {
248 for (i = 0; i < leftover; ++i) {
249 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
250 tx1(txdp + mainpart + i, pkts + mainpart + i);
255 static inline uint16_t
256 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
259 struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
260 volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
264 * Begin scanning the H/W ring for done descriptors when the
265 * number of available descriptors drops below tx_free_thresh. For
266 * each done descriptor, free the associated buffer.
268 if (txq->nb_tx_free < txq->tx_free_thresh)
269 ixgbe_tx_free_bufs(txq);
271 /* Only use descriptors that are available */
272 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
273 if (unlikely(nb_pkts == 0))
276 /* Use exactly nb_pkts descriptors */
277 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
280 * At this point, we know there are enough descriptors in the
281 * ring to transmit all the packets. This assumes that each
282 * mbuf contains a single segment, and that no new offloads
283 * are expected, which would require a new context descriptor.
287 * See if we're going to wrap-around. If so, handle the top
288 * of the descriptor ring first, then do the bottom. If not,
289 * the processing looks just like the "bottom" part anyway...
291 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
292 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
293 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
296 * We know that the last descriptor in the ring will need to
297 * have its RS bit set because tx_rs_thresh has to be
298 * a divisor of the ring size
300 tx_r[txq->tx_next_rs].read.cmd_type_len |=
301 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
302 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
307 /* Fill H/W descriptor ring with mbuf data */
308 ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
309 txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
312 * Determine if RS bit should be set
313 * This is what we actually want:
314 * if ((txq->tx_tail - 1) >= txq->tx_next_rs)
315 * but instead of subtracting 1 and doing >=, we can just do
316 * greater than without subtracting.
318 if (txq->tx_tail > txq->tx_next_rs) {
319 tx_r[txq->tx_next_rs].read.cmd_type_len |=
320 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
321 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
323 if (txq->tx_next_rs >= txq->nb_tx_desc)
324 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
328 * Check for wrap-around. This would only happen if we used
329 * up to the last descriptor in the ring, no more, no less.
331 if (txq->tx_tail >= txq->nb_tx_desc)
334 /* update tail pointer */
336 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
342 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
347 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
348 if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
349 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
351 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
355 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
356 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
357 nb_tx = (uint16_t)(nb_tx + ret);
358 nb_pkts = (uint16_t)(nb_pkts - ret);
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
371 uint32_t type_tucmd_mlhl;
372 uint32_t mss_l4len_idx = 0;
374 uint32_t vlan_macip_lens;
375 union ixgbe_tx_offload tx_offload_mask;
376 uint32_t seqnum_seed = 0;
378 ctx_idx = txq->ctx_curr;
379 tx_offload_mask.data[0] = 0;
380 tx_offload_mask.data[1] = 0;
383 /* Specify which HW CTX to upload. */
384 mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
386 if (ol_flags & PKT_TX_VLAN_PKT) {
387 tx_offload_mask.vlan_tci |= ~0;
390 /* check if TCP segmentation required for this packet */
391 if (ol_flags & PKT_TX_TCP_SEG) {
392 /* implies IP cksum in IPv4 */
393 if (ol_flags & PKT_TX_IP_CKSUM)
394 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
395 IXGBE_ADVTXD_TUCMD_L4T_TCP |
396 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
398 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
399 IXGBE_ADVTXD_TUCMD_L4T_TCP |
400 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
402 tx_offload_mask.l2_len |= ~0;
403 tx_offload_mask.l3_len |= ~0;
404 tx_offload_mask.l4_len |= ~0;
405 tx_offload_mask.tso_segsz |= ~0;
406 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
407 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
408 } else { /* no TSO, check if hardware checksum is needed */
409 if (ol_flags & PKT_TX_IP_CKSUM) {
410 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
411 tx_offload_mask.l2_len |= ~0;
412 tx_offload_mask.l3_len |= ~0;
415 switch (ol_flags & PKT_TX_L4_MASK) {
416 case PKT_TX_UDP_CKSUM:
417 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
418 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
419 mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
420 tx_offload_mask.l2_len |= ~0;
421 tx_offload_mask.l3_len |= ~0;
423 case PKT_TX_TCP_CKSUM:
424 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
425 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426 mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
427 tx_offload_mask.l2_len |= ~0;
428 tx_offload_mask.l3_len |= ~0;
430 case PKT_TX_SCTP_CKSUM:
431 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
432 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
433 mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
434 tx_offload_mask.l2_len |= ~0;
435 tx_offload_mask.l3_len |= ~0;
438 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
439 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
444 if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
445 tx_offload_mask.outer_l2_len |= ~0;
446 tx_offload_mask.outer_l3_len |= ~0;
447 tx_offload_mask.l2_len |= ~0;
448 seqnum_seed |= tx_offload.outer_l3_len
449 << IXGBE_ADVTXD_OUTER_IPLEN;
450 seqnum_seed |= tx_offload.l2_len
451 << IXGBE_ADVTXD_TUNNEL_LEN;
454 txq->ctx_cache[ctx_idx].flags = ol_flags;
455 txq->ctx_cache[ctx_idx].tx_offload.data[0] =
456 tx_offload_mask.data[0] & tx_offload.data[0];
457 txq->ctx_cache[ctx_idx].tx_offload.data[1] =
458 tx_offload_mask.data[1] & tx_offload.data[1];
459 txq->ctx_cache[ctx_idx].tx_offload_mask = tx_offload_mask;
461 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
462 vlan_macip_lens = tx_offload.l3_len;
463 if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
464 vlan_macip_lens |= (tx_offload.outer_l2_len <<
465 IXGBE_ADVTXD_MACLEN_SHIFT);
467 vlan_macip_lens |= (tx_offload.l2_len <<
468 IXGBE_ADVTXD_MACLEN_SHIFT);
469 vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
470 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
471 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
472 ctx_txd->seqnum_seed = seqnum_seed;
476 * Check which hardware context can be used. Use the existing match
477 * or create a new context descriptor.
479 static inline uint32_t
480 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
481 union ixgbe_tx_offload tx_offload)
483 /* If match with the current used context */
484 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
485 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
486 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
487 & tx_offload.data[0])) &&
488 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
489 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
490 & tx_offload.data[1])))) {
491 return txq->ctx_curr;
494 /* What if match with the next context */
496 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
497 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
498 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
499 & tx_offload.data[0])) &&
500 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
501 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
502 & tx_offload.data[1])))) {
503 return txq->ctx_curr;
506 /* Mismatch, use the previous context */
507 return IXGBE_CTX_NUM;
510 static inline uint32_t
511 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
514 if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
515 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
516 if (ol_flags & PKT_TX_IP_CKSUM)
517 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
518 if (ol_flags & PKT_TX_TCP_SEG)
519 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
523 static inline uint32_t
524 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
526 uint32_t cmdtype = 0;
527 if (ol_flags & PKT_TX_VLAN_PKT)
528 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
529 if (ol_flags & PKT_TX_TCP_SEG)
530 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
531 if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
532 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
536 /* Default RS bit threshold values */
537 #ifndef DEFAULT_TX_RS_THRESH
538 #define DEFAULT_TX_RS_THRESH 32
540 #ifndef DEFAULT_TX_FREE_THRESH
541 #define DEFAULT_TX_FREE_THRESH 32
544 /* Reset transmit descriptors after they have been used */
546 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
548 struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
549 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
550 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
551 uint16_t nb_tx_desc = txq->nb_tx_desc;
552 uint16_t desc_to_clean_to;
553 uint16_t nb_tx_to_clean;
556 /* Determine the last descriptor needing to be cleaned */
557 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
558 if (desc_to_clean_to >= nb_tx_desc)
559 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
561 /* Check to make sure the last descriptor to clean is done */
562 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
563 status = txr[desc_to_clean_to].wb.status;
564 if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD)))
566 PMD_TX_FREE_LOG(DEBUG,
567 "TX descriptor %4u is not done"
568 "(port=%d queue=%d)",
570 txq->port_id, txq->queue_id);
571 /* Failed to clean any descriptors, better luck next time */
575 /* Figure out how many descriptors will be cleaned */
576 if (last_desc_cleaned > desc_to_clean_to)
577 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
580 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
583 PMD_TX_FREE_LOG(DEBUG,
584 "Cleaning %4u TX descriptors: %4u to %4u "
585 "(port=%d queue=%d)",
586 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
587 txq->port_id, txq->queue_id);
590 * The last descriptor to clean is done, so that means all the
591 * descriptors from the last descriptor that was cleaned
592 * up to the last descriptor with the RS bit set
593 * are done. Only reset the threshold descriptor.
595 txr[desc_to_clean_to].wb.status = 0;
597 /* Update the txq to reflect the last descriptor that was cleaned */
598 txq->last_desc_cleaned = desc_to_clean_to;
599 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
606 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
609 struct ixgbe_tx_queue *txq;
610 struct ixgbe_tx_entry *sw_ring;
611 struct ixgbe_tx_entry *txe, *txn;
612 volatile union ixgbe_adv_tx_desc *txr;
613 volatile union ixgbe_adv_tx_desc *txd, *txp;
614 struct rte_mbuf *tx_pkt;
615 struct rte_mbuf *m_seg;
616 uint64_t buf_dma_addr;
617 uint32_t olinfo_status;
618 uint32_t cmd_type_len;
629 union ixgbe_tx_offload tx_offload;
631 tx_offload.data[0] = 0;
632 tx_offload.data[1] = 0;
634 sw_ring = txq->sw_ring;
636 tx_id = txq->tx_tail;
637 txe = &sw_ring[tx_id];
640 /* Determine if the descriptor ring needs to be cleaned. */
641 if (txq->nb_tx_free < txq->tx_free_thresh)
642 ixgbe_xmit_cleanup(txq);
644 rte_prefetch0(&txe->mbuf->pool);
647 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
650 pkt_len = tx_pkt->pkt_len;
653 * Determine how many (if any) context descriptors
654 * are needed for offload functionality.
656 ol_flags = tx_pkt->ol_flags;
658 /* If hardware offload required */
659 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
661 tx_offload.l2_len = tx_pkt->l2_len;
662 tx_offload.l3_len = tx_pkt->l3_len;
663 tx_offload.l4_len = tx_pkt->l4_len;
664 tx_offload.vlan_tci = tx_pkt->vlan_tci;
665 tx_offload.tso_segsz = tx_pkt->tso_segsz;
666 tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
667 tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
669 /* If new context need be built or reuse the exist ctx. */
670 ctx = what_advctx_update(txq, tx_ol_req,
672 /* Only allocate context descriptor if required*/
673 new_ctx = (ctx == IXGBE_CTX_NUM);
678 * Keep track of how many descriptors are used this loop
679 * This will always be the number of segments + the number of
680 * Context descriptors required to transmit the packet
682 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
685 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
686 /* set RS on the previous packet in the burst */
687 txp->read.cmd_type_len |=
688 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
691 * The number of descriptors that must be allocated for a
692 * packet is the number of segments of that packet, plus 1
693 * Context Descriptor for the hardware offload, if any.
694 * Determine the last TX descriptor to allocate in the TX ring
695 * for the packet, starting from the current position (tx_id)
698 tx_last = (uint16_t) (tx_id + nb_used - 1);
701 if (tx_last >= txq->nb_tx_desc)
702 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
704 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
705 " tx_first=%u tx_last=%u",
706 (unsigned) txq->port_id,
707 (unsigned) txq->queue_id,
713 * Make sure there are enough TX descriptors available to
714 * transmit the entire packet.
715 * nb_used better be less than or equal to txq->tx_rs_thresh
717 if (nb_used > txq->nb_tx_free) {
718 PMD_TX_FREE_LOG(DEBUG,
719 "Not enough free TX descriptors "
720 "nb_used=%4u nb_free=%4u "
721 "(port=%d queue=%d)",
722 nb_used, txq->nb_tx_free,
723 txq->port_id, txq->queue_id);
725 if (ixgbe_xmit_cleanup(txq) != 0) {
726 /* Could not clean any descriptors */
732 /* nb_used better be <= txq->tx_rs_thresh */
733 if (unlikely(nb_used > txq->tx_rs_thresh)) {
734 PMD_TX_FREE_LOG(DEBUG,
735 "The number of descriptors needed to "
736 "transmit the packet exceeds the "
737 "RS bit threshold. This will impact "
739 "nb_used=%4u nb_free=%4u "
741 "(port=%d queue=%d)",
742 nb_used, txq->nb_tx_free,
744 txq->port_id, txq->queue_id);
746 * Loop here until there are enough TX
747 * descriptors or until the ring cannot be
750 while (nb_used > txq->nb_tx_free) {
751 if (ixgbe_xmit_cleanup(txq) != 0) {
753 * Could not clean any
765 * By now there are enough free TX descriptors to transmit
770 * Set common flags of all TX Data Descriptors.
772 * The following bits must be set in all Data Descriptors:
773 * - IXGBE_ADVTXD_DTYP_DATA
774 * - IXGBE_ADVTXD_DCMD_DEXT
776 * The following bits must be set in the first Data Descriptor
777 * and are ignored in the other ones:
778 * - IXGBE_ADVTXD_DCMD_IFCS
779 * - IXGBE_ADVTXD_MAC_1588
780 * - IXGBE_ADVTXD_DCMD_VLE
782 * The following bits must only be set in the last Data
784 * - IXGBE_TXD_CMD_EOP
786 * The following bits can be set in any Data Descriptor, but
787 * are only set in the last Data Descriptor:
790 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
791 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
793 #ifdef RTE_LIBRTE_IEEE1588
794 if (ol_flags & PKT_TX_IEEE1588_TMST)
795 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
801 if (ol_flags & PKT_TX_TCP_SEG) {
802 /* when TSO is on, paylen in descriptor is the
803 * not the packet len but the tcp payload len */
804 pkt_len -= (tx_offload.l2_len +
805 tx_offload.l3_len + tx_offload.l4_len);
809 * Setup the TX Advanced Context Descriptor if required
812 volatile struct ixgbe_adv_tx_context_desc *
815 ctx_txd = (volatile struct
816 ixgbe_adv_tx_context_desc *)
819 txn = &sw_ring[txe->next_id];
820 rte_prefetch0(&txn->mbuf->pool);
822 if (txe->mbuf != NULL) {
823 rte_pktmbuf_free_seg(txe->mbuf);
827 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
830 txe->last_id = tx_last;
831 tx_id = txe->next_id;
836 * Setup the TX Advanced Data Descriptor,
837 * This path will go through
838 * whatever new/reuse the context descriptor
840 cmd_type_len |= tx_desc_ol_flags_to_cmdtype(ol_flags);
841 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
842 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
845 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
850 txn = &sw_ring[txe->next_id];
851 rte_prefetch0(&txn->mbuf->pool);
853 if (txe->mbuf != NULL)
854 rte_pktmbuf_free_seg(txe->mbuf);
858 * Set up Transmit Data Descriptor.
860 slen = m_seg->data_len;
861 buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
862 txd->read.buffer_addr =
863 rte_cpu_to_le_64(buf_dma_addr);
864 txd->read.cmd_type_len =
865 rte_cpu_to_le_32(cmd_type_len | slen);
866 txd->read.olinfo_status =
867 rte_cpu_to_le_32(olinfo_status);
868 txe->last_id = tx_last;
869 tx_id = txe->next_id;
872 } while (m_seg != NULL);
875 * The last packet data descriptor needs End Of Packet (EOP)
877 cmd_type_len |= IXGBE_TXD_CMD_EOP;
878 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
879 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
881 /* Set RS bit only on threshold packets' last descriptor */
882 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
883 PMD_TX_FREE_LOG(DEBUG,
884 "Setting RS bit on TXD id="
885 "%4u (port=%d queue=%d)",
886 tx_last, txq->port_id, txq->queue_id);
888 cmd_type_len |= IXGBE_TXD_CMD_RS;
890 /* Update txq RS bit counters */
896 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
900 /* set RS on last packet in the burst */
902 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
907 * Set the Transmit Descriptor Tail (TDT)
909 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
910 (unsigned) txq->port_id, (unsigned) txq->queue_id,
911 (unsigned) tx_id, (unsigned) nb_tx);
912 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
913 txq->tx_tail = tx_id;
918 /*********************************************************************
922 **********************************************************************/
923 #define IXGBE_PACKET_TYPE_IPV4 0X01
924 #define IXGBE_PACKET_TYPE_IPV4_TCP 0X11
925 #define IXGBE_PACKET_TYPE_IPV4_UDP 0X21
926 #define IXGBE_PACKET_TYPE_IPV4_SCTP 0X41
927 #define IXGBE_PACKET_TYPE_IPV4_EXT 0X03
928 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP 0X43
929 #define IXGBE_PACKET_TYPE_IPV6 0X04
930 #define IXGBE_PACKET_TYPE_IPV6_TCP 0X14
931 #define IXGBE_PACKET_TYPE_IPV6_UDP 0X24
932 #define IXGBE_PACKET_TYPE_IPV6_EXT 0X0C
933 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP 0X1C
934 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP 0X2C
935 #define IXGBE_PACKET_TYPE_IPV4_IPV6 0X05
936 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP 0X15
937 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP 0X25
938 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
939 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
940 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
942 #define IXGBE_PACKET_TYPE_NVGRE 0X00
943 #define IXGBE_PACKET_TYPE_NVGRE_IPV4 0X01
944 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP 0X11
945 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP 0X21
946 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP 0X41
947 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT 0X03
948 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP 0X43
949 #define IXGBE_PACKET_TYPE_NVGRE_IPV6 0X04
950 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP 0X14
951 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP 0X24
952 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT 0X0C
953 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP 0X1C
954 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP 0X2C
955 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6 0X05
956 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP 0X15
957 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP 0X25
958 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT 0X0D
959 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
960 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
962 #define IXGBE_PACKET_TYPE_VXLAN 0X80
963 #define IXGBE_PACKET_TYPE_VXLAN_IPV4 0X81
964 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP 0x91
965 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP 0xA1
966 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP 0xC1
967 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT 0x83
968 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP 0XC3
969 #define IXGBE_PACKET_TYPE_VXLAN_IPV6 0X84
970 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP 0X94
971 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP 0XA4
972 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT 0X8C
973 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP 0X9C
974 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP 0XAC
975 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6 0X85
976 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP 0X95
977 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP 0XA5
978 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT 0X8D
979 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
980 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
982 #define IXGBE_PACKET_TYPE_MAX 0X80
983 #define IXGBE_PACKET_TYPE_TN_MAX 0X100
984 #define IXGBE_PACKET_TYPE_SHIFT 0X04
986 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
987 static inline uint32_t
988 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
991 * Use 2 different table for normal packet and tunnel packet
994 static const uint32_t
995 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
996 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
998 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
999 RTE_PTYPE_L3_IPV4_EXT,
1000 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1002 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1003 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1004 RTE_PTYPE_INNER_L3_IPV6,
1005 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1006 RTE_PTYPE_L3_IPV6_EXT,
1007 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1008 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1009 RTE_PTYPE_INNER_L3_IPV6_EXT,
1010 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1011 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1012 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1013 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1014 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1015 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1016 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1017 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1018 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1019 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1020 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1021 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1022 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1023 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1024 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1025 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1026 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1027 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1028 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1029 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1030 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1031 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1032 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1033 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1034 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1035 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1036 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1037 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1040 static const uint32_t
1041 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1042 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1043 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1044 RTE_PTYPE_INNER_L2_ETHER,
1045 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1046 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1047 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1048 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1049 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1050 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1051 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1052 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1053 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1054 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1055 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1056 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1057 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1058 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1059 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1060 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1061 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1062 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1063 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1064 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1065 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1066 RTE_PTYPE_INNER_L4_TCP,
1067 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1068 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1069 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1070 RTE_PTYPE_INNER_L4_TCP,
1071 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1072 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1073 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1074 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1075 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1076 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1077 RTE_PTYPE_INNER_L4_TCP,
1078 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1079 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1080 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1081 RTE_PTYPE_INNER_L3_IPV4,
1082 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1083 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1084 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1085 RTE_PTYPE_INNER_L4_UDP,
1086 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1087 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1088 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1089 RTE_PTYPE_INNER_L4_UDP,
1090 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1091 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1092 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1093 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1094 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1095 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1096 RTE_PTYPE_INNER_L4_UDP,
1097 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1098 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1099 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1100 RTE_PTYPE_INNER_L3_IPV4,
1101 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1102 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1103 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1104 RTE_PTYPE_INNER_L4_SCTP,
1105 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1106 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1107 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1108 RTE_PTYPE_INNER_L4_SCTP,
1110 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1111 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1112 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1113 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1114 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1115 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1116 RTE_PTYPE_INNER_L3_IPV4,
1117 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1118 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1119 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1120 RTE_PTYPE_INNER_L3_IPV4_EXT,
1121 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1122 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1123 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1124 RTE_PTYPE_INNER_L3_IPV6,
1125 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1126 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1127 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1128 RTE_PTYPE_INNER_L3_IPV4,
1129 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1130 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1131 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1132 RTE_PTYPE_INNER_L3_IPV6_EXT,
1133 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1134 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1135 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1136 RTE_PTYPE_INNER_L3_IPV4,
1137 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1138 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1139 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1140 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1141 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1142 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1143 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1144 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1145 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1146 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1147 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1148 RTE_PTYPE_INNER_L3_IPV4,
1149 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1150 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1151 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1152 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1153 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1154 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1155 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1156 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1157 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1158 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1159 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1160 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1161 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1162 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1163 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1164 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1165 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1166 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1167 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1168 RTE_PTYPE_INNER_L3_IPV4,
1169 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1170 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1171 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1172 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1173 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1174 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1175 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1176 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1177 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1178 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1179 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1180 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1181 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1182 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1183 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1184 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1187 if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1188 return RTE_PTYPE_UNKNOWN;
1190 pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1192 /* For tunnel packet */
1193 if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1194 /* Remove the tunnel bit to save the space. */
1195 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1196 return ptype_table_tn[pkt_info];
1200 * For x550, if it's not tunnel,
1201 * tunnel type bit should be set to 0.
1202 * Reuse 82599's mask.
1204 pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1206 return ptype_table[pkt_info];
1209 static inline uint64_t
1210 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1212 static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1213 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1214 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1215 PKT_RX_RSS_HASH, 0, 0, 0,
1216 0, 0, 0, PKT_RX_FDIR,
1218 #ifdef RTE_LIBRTE_IEEE1588
1219 static uint64_t ip_pkt_etqf_map[8] = {
1220 0, 0, 0, PKT_RX_IEEE1588_PTP,
1224 if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1225 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1226 ip_rss_types_map[pkt_info & 0XF];
1228 return ip_rss_types_map[pkt_info & 0XF];
1230 return ip_rss_types_map[pkt_info & 0XF];
1234 static inline uint64_t
1235 rx_desc_status_to_pkt_flags(uint32_t rx_status)
1240 * Check if VLAN present only.
1241 * Do not check whether L3/L4 rx checksum done by NIC or not,
1242 * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1244 pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
1246 #ifdef RTE_LIBRTE_IEEE1588
1247 if (rx_status & IXGBE_RXD_STAT_TMST)
1248 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1253 static inline uint64_t
1254 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1259 * Bit 31: IPE, IPv4 checksum error
1260 * Bit 30: L4I, L4I integrity error
1262 static uint64_t error_to_pkt_flags_map[4] = {
1263 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1264 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1266 pkt_flags = error_to_pkt_flags_map[(rx_status >>
1267 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1269 if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1270 (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1271 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1278 * LOOK_AHEAD defines how many desc statuses to check beyond the
1279 * current descriptor.
1280 * It must be a pound define for optimal performance.
1281 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1282 * function only works with LOOK_AHEAD=8.
1284 #define LOOK_AHEAD 8
1285 #if (LOOK_AHEAD != 8)
1286 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1289 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1291 volatile union ixgbe_adv_rx_desc *rxdp;
1292 struct ixgbe_rx_entry *rxep;
1293 struct rte_mbuf *mb;
1297 uint32_t s[LOOK_AHEAD];
1298 uint32_t pkt_info[LOOK_AHEAD];
1299 int i, j, nb_rx = 0;
1302 /* get references to current descriptor and S/W ring entry */
1303 rxdp = &rxq->rx_ring[rxq->rx_tail];
1304 rxep = &rxq->sw_ring[rxq->rx_tail];
1306 status = rxdp->wb.upper.status_error;
1307 /* check to make sure there is at least 1 packet to receive */
1308 if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1312 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1313 * reference packets that are ready to be received.
1315 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1316 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
1318 /* Read desc statuses backwards to avoid race condition */
1319 for (j = LOOK_AHEAD-1; j >= 0; --j)
1320 s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1322 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1323 pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1326 /* Compute how many status bits were set */
1328 for (j = 0; j < LOOK_AHEAD; ++j)
1329 nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1333 /* Translate descriptor info to mbuf format */
1334 for (j = 0; j < nb_dd; ++j) {
1336 pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1338 mb->data_len = pkt_len;
1339 mb->pkt_len = pkt_len;
1340 mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1342 /* convert descriptor fields to rte mbuf flags */
1343 pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
1344 pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1345 pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1346 ((uint16_t)pkt_info[j]);
1347 mb->ol_flags = pkt_flags;
1349 ixgbe_rxd_pkt_info_to_pkt_type
1350 (pkt_info[j], rxq->pkt_type_mask);
1352 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1353 mb->hash.rss = rte_le_to_cpu_32(
1354 rxdp[j].wb.lower.hi_dword.rss);
1355 else if (pkt_flags & PKT_RX_FDIR) {
1356 mb->hash.fdir.hash = rte_le_to_cpu_16(
1357 rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1358 IXGBE_ATR_HASH_MASK;
1359 mb->hash.fdir.id = rte_le_to_cpu_16(
1360 rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1364 /* Move mbuf pointers from the S/W ring to the stage */
1365 for (j = 0; j < LOOK_AHEAD; ++j) {
1366 rxq->rx_stage[i + j] = rxep[j].mbuf;
1369 /* stop if all requested packets could not be received */
1370 if (nb_dd != LOOK_AHEAD)
1374 /* clear software ring entries so we can cleanup correctly */
1375 for (i = 0; i < nb_rx; ++i) {
1376 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1384 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1386 volatile union ixgbe_adv_rx_desc *rxdp;
1387 struct ixgbe_rx_entry *rxep;
1388 struct rte_mbuf *mb;
1393 /* allocate buffers in bulk directly into the S/W ring */
1394 alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1395 rxep = &rxq->sw_ring[alloc_idx];
1396 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1397 rxq->rx_free_thresh);
1398 if (unlikely(diag != 0))
1401 rxdp = &rxq->rx_ring[alloc_idx];
1402 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1403 /* populate the static rte mbuf fields */
1408 mb->port = rxq->port_id;
1411 rte_mbuf_refcnt_set(mb, 1);
1412 mb->data_off = RTE_PKTMBUF_HEADROOM;
1414 /* populate the descriptors */
1415 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1416 rxdp[i].read.hdr_addr = 0;
1417 rxdp[i].read.pkt_addr = dma_addr;
1420 /* update state of internal queue structure */
1421 rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1422 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1423 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1429 static inline uint16_t
1430 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1433 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1436 /* how many packets are ready to return? */
1437 nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1439 /* copy mbuf pointers to the application's packet list */
1440 for (i = 0; i < nb_pkts; ++i)
1441 rx_pkts[i] = stage[i];
1443 /* update internal queue state */
1444 rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1445 rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1450 static inline uint16_t
1451 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1454 struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1457 /* Any previously recv'd pkts will be returned from the Rx stage */
1458 if (rxq->rx_nb_avail)
1459 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1461 /* Scan the H/W ring for packets to receive */
1462 nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1464 /* update internal queue state */
1465 rxq->rx_next_avail = 0;
1466 rxq->rx_nb_avail = nb_rx;
1467 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1469 /* if required, allocate new buffers to replenish descriptors */
1470 if (rxq->rx_tail > rxq->rx_free_trigger) {
1471 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1473 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1475 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1476 "queue_id=%u", (unsigned) rxq->port_id,
1477 (unsigned) rxq->queue_id);
1479 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1480 rxq->rx_free_thresh;
1483 * Need to rewind any previous receives if we cannot
1484 * allocate new buffers to replenish the old ones.
1486 rxq->rx_nb_avail = 0;
1487 rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1488 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1489 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1494 /* update tail pointer */
1496 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1499 if (rxq->rx_tail >= rxq->nb_rx_desc)
1502 /* received any packets this loop? */
1503 if (rxq->rx_nb_avail)
1504 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1509 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1511 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1516 if (unlikely(nb_pkts == 0))
1519 if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1520 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1522 /* request is relatively large, chunk it up */
1526 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1527 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1528 nb_rx = (uint16_t)(nb_rx + ret);
1529 nb_pkts = (uint16_t)(nb_pkts - ret);
1538 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1541 struct ixgbe_rx_queue *rxq;
1542 volatile union ixgbe_adv_rx_desc *rx_ring;
1543 volatile union ixgbe_adv_rx_desc *rxdp;
1544 struct ixgbe_rx_entry *sw_ring;
1545 struct ixgbe_rx_entry *rxe;
1546 struct rte_mbuf *rxm;
1547 struct rte_mbuf *nmb;
1548 union ixgbe_adv_rx_desc rxd;
1561 rx_id = rxq->rx_tail;
1562 rx_ring = rxq->rx_ring;
1563 sw_ring = rxq->sw_ring;
1564 while (nb_rx < nb_pkts) {
1566 * The order of operations here is important as the DD status
1567 * bit must not be read after any other descriptor fields.
1568 * rx_ring and rxdp are pointing to volatile data so the order
1569 * of accesses cannot be reordered by the compiler. If they were
1570 * not volatile, they could be reordered which could lead to
1571 * using invalid descriptor fields when read from rxd.
1573 rxdp = &rx_ring[rx_id];
1574 staterr = rxdp->wb.upper.status_error;
1575 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1582 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1583 * is likely to be invalid and to be dropped by the various
1584 * validation checks performed by the network stack.
1586 * Allocate a new mbuf to replenish the RX ring descriptor.
1587 * If the allocation fails:
1588 * - arrange for that RX descriptor to be the first one
1589 * being parsed the next time the receive function is
1590 * invoked [on the same queue].
1592 * - Stop parsing the RX ring and return immediately.
1594 * This policy do not drop the packet received in the RX
1595 * descriptor for which the allocation of a new mbuf failed.
1596 * Thus, it allows that packet to be later retrieved if
1597 * mbuf have been freed in the mean time.
1598 * As a side effect, holding RX descriptors instead of
1599 * systematically giving them back to the NIC may lead to
1600 * RX ring exhaustion situations.
1601 * However, the NIC can gracefully prevent such situations
1602 * to happen by sending specific "back-pressure" flow control
1603 * frames to its peer(s).
1605 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1606 "ext_err_stat=0x%08x pkt_len=%u",
1607 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1608 (unsigned) rx_id, (unsigned) staterr,
1609 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1611 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1613 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1614 "queue_id=%u", (unsigned) rxq->port_id,
1615 (unsigned) rxq->queue_id);
1616 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1621 rxe = &sw_ring[rx_id];
1623 if (rx_id == rxq->nb_rx_desc)
1626 /* Prefetch next mbuf while processing current one. */
1627 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1630 * When next RX descriptor is on a cache-line boundary,
1631 * prefetch the next 4 RX descriptors and the next 8 pointers
1634 if ((rx_id & 0x3) == 0) {
1635 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1636 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1642 rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1643 rxdp->read.hdr_addr = 0;
1644 rxdp->read.pkt_addr = dma_addr;
1647 * Initialize the returned mbuf.
1648 * 1) setup generic mbuf fields:
1649 * - number of segments,
1652 * - RX port identifier.
1653 * 2) integrate hardware offload data, if any:
1654 * - RSS flag & hash,
1655 * - IP checksum flag,
1656 * - VLAN TCI, if any,
1659 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1661 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1662 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1665 rxm->pkt_len = pkt_len;
1666 rxm->data_len = pkt_len;
1667 rxm->port = rxq->port_id;
1669 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1670 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1671 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1673 pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1674 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1675 pkt_flags = pkt_flags |
1676 ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1677 rxm->ol_flags = pkt_flags;
1679 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1680 rxq->pkt_type_mask);
1682 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1683 rxm->hash.rss = rte_le_to_cpu_32(
1684 rxd.wb.lower.hi_dword.rss);
1685 else if (pkt_flags & PKT_RX_FDIR) {
1686 rxm->hash.fdir.hash = rte_le_to_cpu_16(
1687 rxd.wb.lower.hi_dword.csum_ip.csum) &
1688 IXGBE_ATR_HASH_MASK;
1689 rxm->hash.fdir.id = rte_le_to_cpu_16(
1690 rxd.wb.lower.hi_dword.csum_ip.ip_id);
1693 * Store the mbuf address into the next entry of the array
1694 * of returned packets.
1696 rx_pkts[nb_rx++] = rxm;
1698 rxq->rx_tail = rx_id;
1701 * If the number of free RX descriptors is greater than the RX free
1702 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1704 * Update the RDT with the value of the last processed RX descriptor
1705 * minus 1, to guarantee that the RDT register is never equal to the
1706 * RDH register, which creates a "full" ring situtation from the
1707 * hardware point of view...
1709 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1710 if (nb_hold > rxq->rx_free_thresh) {
1711 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1712 "nb_hold=%u nb_rx=%u",
1713 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1714 (unsigned) rx_id, (unsigned) nb_hold,
1716 rx_id = (uint16_t) ((rx_id == 0) ?
1717 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1718 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1721 rxq->nb_rx_hold = nb_hold;
1726 * Detect an RSC descriptor.
1728 static inline uint32_t
1729 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1731 return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1732 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1736 * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1738 * Fill the following info in the HEAD buffer of the Rx cluster:
1739 * - RX port identifier
1740 * - hardware offload data, if any:
1742 * - IP checksum flag
1743 * - VLAN TCI, if any
1745 * @head HEAD of the packet cluster
1746 * @desc HW descriptor to get data from
1747 * @rxq Pointer to the Rx queue
1750 ixgbe_fill_cluster_head_buf(
1751 struct rte_mbuf *head,
1752 union ixgbe_adv_rx_desc *desc,
1753 struct ixgbe_rx_queue *rxq,
1759 head->port = rxq->port_id;
1761 /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1762 * set in the pkt_flags field.
1764 head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1765 pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1766 pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1767 pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1768 pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1769 head->ol_flags = pkt_flags;
1771 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1773 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1774 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1775 else if (pkt_flags & PKT_RX_FDIR) {
1776 head->hash.fdir.hash =
1777 rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1778 & IXGBE_ATR_HASH_MASK;
1779 head->hash.fdir.id =
1780 rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1785 * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1787 * @rx_queue Rx queue handle
1788 * @rx_pkts table of received packets
1789 * @nb_pkts size of rx_pkts table
1790 * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1792 * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1793 * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1795 * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1796 * 1) When non-EOP RSC completion arrives:
1797 * a) Update the HEAD of the current RSC aggregation cluster with the new
1798 * segment's data length.
1799 * b) Set the "next" pointer of the current segment to point to the segment
1800 * at the NEXTP index.
1801 * c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1802 * in the sw_rsc_ring.
1803 * 2) When EOP arrives we just update the cluster's total length and offload
1804 * flags and deliver the cluster up to the upper layers. In our case - put it
1805 * in the rx_pkts table.
1807 * Returns the number of received packets/clusters (according to the "bulk
1808 * receive" interface).
1810 static inline uint16_t
1811 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1814 struct ixgbe_rx_queue *rxq = rx_queue;
1815 volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1816 struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1817 struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1818 uint16_t rx_id = rxq->rx_tail;
1820 uint16_t nb_hold = rxq->nb_rx_hold;
1821 uint16_t prev_id = rxq->rx_tail;
1823 while (nb_rx < nb_pkts) {
1825 struct ixgbe_rx_entry *rxe;
1826 struct ixgbe_scattered_rx_entry *sc_entry;
1827 struct ixgbe_scattered_rx_entry *next_sc_entry;
1828 struct ixgbe_rx_entry *next_rxe = NULL;
1829 struct rte_mbuf *first_seg;
1830 struct rte_mbuf *rxm;
1831 struct rte_mbuf *nmb;
1832 union ixgbe_adv_rx_desc rxd;
1835 volatile union ixgbe_adv_rx_desc *rxdp;
1840 * The code in this whole file uses the volatile pointer to
1841 * ensure the read ordering of the status and the rest of the
1842 * descriptor fields (on the compiler level only!!!). This is so
1843 * UGLY - why not to just use the compiler barrier instead? DPDK
1844 * even has the rte_compiler_barrier() for that.
1846 * But most importantly this is just wrong because this doesn't
1847 * ensure memory ordering in a general case at all. For
1848 * instance, DPDK is supposed to work on Power CPUs where
1849 * compiler barrier may just not be enough!
1851 * I tried to write only this function properly to have a
1852 * starting point (as a part of an LRO/RSC series) but the
1853 * compiler cursed at me when I tried to cast away the
1854 * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1855 * keeping it the way it is for now.
1857 * The code in this file is broken in so many other places and
1858 * will just not work on a big endian CPU anyway therefore the
1859 * lines below will have to be revisited together with the rest
1863 * - Get rid of "volatile" crap and let the compiler do its
1865 * - Use the proper memory barrier (rte_rmb()) to ensure the
1866 * memory ordering below.
1868 rxdp = &rx_ring[rx_id];
1869 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1871 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1876 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1877 "staterr=0x%x data_len=%u",
1878 rxq->port_id, rxq->queue_id, rx_id, staterr,
1879 rte_le_to_cpu_16(rxd.wb.upper.length));
1882 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1884 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1885 "port_id=%u queue_id=%u",
1886 rxq->port_id, rxq->queue_id);
1888 rte_eth_devices[rxq->port_id].data->
1889 rx_mbuf_alloc_failed++;
1893 else if (nb_hold > rxq->rx_free_thresh) {
1894 uint16_t next_rdt = rxq->rx_free_trigger;
1896 if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1898 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1900 nb_hold -= rxq->rx_free_thresh;
1902 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1903 "port_id=%u queue_id=%u",
1904 rxq->port_id, rxq->queue_id);
1906 rte_eth_devices[rxq->port_id].data->
1907 rx_mbuf_alloc_failed++;
1913 rxe = &sw_ring[rx_id];
1914 eop = staterr & IXGBE_RXDADV_STAT_EOP;
1916 next_id = rx_id + 1;
1917 if (next_id == rxq->nb_rx_desc)
1920 /* Prefetch next mbuf while processing current one. */
1921 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
1924 * When next RX descriptor is on a cache-line boundary,
1925 * prefetch the next 4 RX descriptors and the next 4 pointers
1928 if ((next_id & 0x3) == 0) {
1929 rte_ixgbe_prefetch(&rx_ring[next_id]);
1930 rte_ixgbe_prefetch(&sw_ring[next_id]);
1937 rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1939 * Update RX descriptor with the physical address of the
1940 * new data buffer of the new allocated mbuf.
1944 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1945 rxdp->read.hdr_addr = 0;
1946 rxdp->read.pkt_addr = dma;
1951 * Set data length & data buffer address of mbuf.
1953 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1954 rxm->data_len = data_len;
1959 * Get next descriptor index:
1960 * - For RSC it's in the NEXTP field.
1961 * - For a scattered packet - it's just a following
1964 if (ixgbe_rsc_count(&rxd))
1966 (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1967 IXGBE_RXDADV_NEXTP_SHIFT;
1971 next_sc_entry = &sw_sc_ring[nextp_id];
1972 next_rxe = &sw_ring[nextp_id];
1973 rte_ixgbe_prefetch(next_rxe);
1976 sc_entry = &sw_sc_ring[rx_id];
1977 first_seg = sc_entry->fbuf;
1978 sc_entry->fbuf = NULL;
1981 * If this is the first buffer of the received packet,
1982 * set the pointer to the first mbuf of the packet and
1983 * initialize its context.
1984 * Otherwise, update the total length and the number of segments
1985 * of the current scattered packet, and update the pointer to
1986 * the last mbuf of the current packet.
1988 if (first_seg == NULL) {
1990 first_seg->pkt_len = data_len;
1991 first_seg->nb_segs = 1;
1993 first_seg->pkt_len += data_len;
1994 first_seg->nb_segs++;
2001 * If this is not the last buffer of the received packet, update
2002 * the pointer to the first mbuf at the NEXTP entry in the
2003 * sw_sc_ring and continue to parse the RX ring.
2005 if (!eop && next_rxe) {
2006 rxm->next = next_rxe->mbuf;
2007 next_sc_entry->fbuf = first_seg;
2012 * This is the last buffer of the received packet - return
2013 * the current cluster to the user.
2017 /* Initialize the first mbuf of the returned packet */
2018 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2021 * Deal with the case, when HW CRC srip is disabled.
2022 * That can't happen when LRO is enabled, but still could
2023 * happen for scattered RX mode.
2025 first_seg->pkt_len -= rxq->crc_len;
2026 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2027 struct rte_mbuf *lp;
2029 for (lp = first_seg; lp->next != rxm; lp = lp->next)
2032 first_seg->nb_segs--;
2033 lp->data_len -= rxq->crc_len - rxm->data_len;
2035 rte_pktmbuf_free_seg(rxm);
2037 rxm->data_len -= rxq->crc_len;
2039 /* Prefetch data of first segment, if configured to do so. */
2040 rte_packet_prefetch((char *)first_seg->buf_addr +
2041 first_seg->data_off);
2044 * Store the mbuf address into the next entry of the array
2045 * of returned packets.
2047 rx_pkts[nb_rx++] = first_seg;
2051 * Record index of the next RX descriptor to probe.
2053 rxq->rx_tail = rx_id;
2056 * If the number of free RX descriptors is greater than the RX free
2057 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2059 * Update the RDT with the value of the last processed RX descriptor
2060 * minus 1, to guarantee that the RDT register is never equal to the
2061 * RDH register, which creates a "full" ring situtation from the
2062 * hardware point of view...
2064 if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2065 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2066 "nb_hold=%u nb_rx=%u",
2067 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2070 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
2074 rxq->nb_rx_hold = nb_hold;
2079 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2082 return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2086 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2089 return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2092 /*********************************************************************
2094 * Queue management functions
2096 **********************************************************************/
2098 static void __attribute__((cold))
2099 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2103 if (txq->sw_ring != NULL) {
2104 for (i = 0; i < txq->nb_tx_desc; i++) {
2105 if (txq->sw_ring[i].mbuf != NULL) {
2106 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2107 txq->sw_ring[i].mbuf = NULL;
2113 static void __attribute__((cold))
2114 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2117 txq->sw_ring != NULL)
2118 rte_free(txq->sw_ring);
2121 static void __attribute__((cold))
2122 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2124 if (txq != NULL && txq->ops != NULL) {
2125 txq->ops->release_mbufs(txq);
2126 txq->ops->free_swring(txq);
2131 void __attribute__((cold))
2132 ixgbe_dev_tx_queue_release(void *txq)
2134 ixgbe_tx_queue_release(txq);
2137 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2138 static void __attribute__((cold))
2139 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2141 static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2142 struct ixgbe_tx_entry *txe = txq->sw_ring;
2145 /* Zero out HW ring memory */
2146 for (i = 0; i < txq->nb_tx_desc; i++) {
2147 txq->tx_ring[i] = zeroed_desc;
2150 /* Initialize SW ring entries */
2151 prev = (uint16_t) (txq->nb_tx_desc - 1);
2152 for (i = 0; i < txq->nb_tx_desc; i++) {
2153 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2154 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2157 txe[prev].next_id = i;
2161 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2162 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2165 txq->nb_tx_used = 0;
2167 * Always allow 1 descriptor to be un-allocated to avoid
2168 * a H/W race condition
2170 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2171 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2173 memset((void*)&txq->ctx_cache, 0,
2174 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2177 static const struct ixgbe_txq_ops def_txq_ops = {
2178 .release_mbufs = ixgbe_tx_queue_release_mbufs,
2179 .free_swring = ixgbe_tx_free_swring,
2180 .reset = ixgbe_reset_tx_queue,
2183 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2184 * the queue parameters. Used in tx_queue_setup by primary process and then
2185 * in dev_init by secondary process when attaching to an existing ethdev.
2187 void __attribute__((cold))
2188 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2190 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2191 if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2192 && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2193 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2194 #ifdef RTE_IXGBE_INC_VECTOR
2195 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2196 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2197 ixgbe_txq_vec_setup(txq) == 0)) {
2198 PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2199 dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2202 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2204 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2206 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2207 (unsigned long)txq->txq_flags,
2208 (unsigned long)IXGBE_SIMPLE_FLAGS);
2210 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2211 (unsigned long)txq->tx_rs_thresh,
2212 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2213 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2217 int __attribute__((cold))
2218 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2221 unsigned int socket_id,
2222 const struct rte_eth_txconf *tx_conf)
2224 const struct rte_memzone *tz;
2225 struct ixgbe_tx_queue *txq;
2226 struct ixgbe_hw *hw;
2227 uint16_t tx_rs_thresh, tx_free_thresh;
2229 PMD_INIT_FUNC_TRACE();
2230 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2233 * Validate number of transmit descriptors.
2234 * It must not exceed hardware maximum, and must be multiple
2237 if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2238 (nb_desc > IXGBE_MAX_RING_DESC) ||
2239 (nb_desc < IXGBE_MIN_RING_DESC)) {
2244 * The following two parameters control the setting of the RS bit on
2245 * transmit descriptors.
2246 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2247 * descriptors have been used.
2248 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2249 * descriptors are used or if the number of descriptors required
2250 * to transmit a packet is greater than the number of free TX
2252 * The following constraints must be satisfied:
2253 * tx_rs_thresh must be greater than 0.
2254 * tx_rs_thresh must be less than the size of the ring minus 2.
2255 * tx_rs_thresh must be less than or equal to tx_free_thresh.
2256 * tx_rs_thresh must be a divisor of the ring size.
2257 * tx_free_thresh must be greater than 0.
2258 * tx_free_thresh must be less than the size of the ring minus 3.
2259 * One descriptor in the TX ring is used as a sentinel to avoid a
2260 * H/W race condition, hence the maximum threshold constraints.
2261 * When set to zero use default values.
2263 tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2264 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2265 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2266 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2267 if (tx_rs_thresh >= (nb_desc - 2)) {
2268 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2269 "of TX descriptors minus 2. (tx_rs_thresh=%u "
2270 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2271 (int)dev->data->port_id, (int)queue_idx);
2274 if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2275 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2276 "(tx_rs_thresh=%u port=%d queue=%d)",
2277 DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2278 (int)dev->data->port_id, (int)queue_idx);
2281 if (tx_free_thresh >= (nb_desc - 3)) {
2282 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2283 "tx_free_thresh must be less than the number of "
2284 "TX descriptors minus 3. (tx_free_thresh=%u "
2285 "port=%d queue=%d)",
2286 (unsigned int)tx_free_thresh,
2287 (int)dev->data->port_id, (int)queue_idx);
2290 if (tx_rs_thresh > tx_free_thresh) {
2291 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2292 "tx_free_thresh. (tx_free_thresh=%u "
2293 "tx_rs_thresh=%u port=%d queue=%d)",
2294 (unsigned int)tx_free_thresh,
2295 (unsigned int)tx_rs_thresh,
2296 (int)dev->data->port_id,
2300 if ((nb_desc % tx_rs_thresh) != 0) {
2301 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2302 "number of TX descriptors. (tx_rs_thresh=%u "
2303 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2304 (int)dev->data->port_id, (int)queue_idx);
2309 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2310 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2311 * by the NIC and all descriptors are written back after the NIC
2312 * accumulates WTHRESH descriptors.
2314 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2315 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2316 "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2317 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2318 (int)dev->data->port_id, (int)queue_idx);
2322 /* Free memory prior to re-allocation if needed... */
2323 if (dev->data->tx_queues[queue_idx] != NULL) {
2324 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2325 dev->data->tx_queues[queue_idx] = NULL;
2328 /* First allocate the tx queue data structure */
2329 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2330 RTE_CACHE_LINE_SIZE, socket_id);
2335 * Allocate TX ring hardware descriptors. A memzone large enough to
2336 * handle the maximum ring size is allocated in order to allow for
2337 * resizing in later calls to the queue setup function.
2339 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2340 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2341 IXGBE_ALIGN, socket_id);
2343 ixgbe_tx_queue_release(txq);
2347 txq->nb_tx_desc = nb_desc;
2348 txq->tx_rs_thresh = tx_rs_thresh;
2349 txq->tx_free_thresh = tx_free_thresh;
2350 txq->pthresh = tx_conf->tx_thresh.pthresh;
2351 txq->hthresh = tx_conf->tx_thresh.hthresh;
2352 txq->wthresh = tx_conf->tx_thresh.wthresh;
2353 txq->queue_id = queue_idx;
2354 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2355 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2356 txq->port_id = dev->data->port_id;
2357 txq->txq_flags = tx_conf->txq_flags;
2358 txq->ops = &def_txq_ops;
2359 txq->tx_deferred_start = tx_conf->tx_deferred_start;
2362 * Modification to set VFTDT for virtual function if vf is detected
2364 if (hw->mac.type == ixgbe_mac_82599_vf ||
2365 hw->mac.type == ixgbe_mac_X540_vf ||
2366 hw->mac.type == ixgbe_mac_X550_vf ||
2367 hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2368 hw->mac.type == ixgbe_mac_X550EM_a_vf)
2369 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2371 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2373 txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2374 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2376 /* Allocate software ring */
2377 txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2378 sizeof(struct ixgbe_tx_entry) * nb_desc,
2379 RTE_CACHE_LINE_SIZE, socket_id);
2380 if (txq->sw_ring == NULL) {
2381 ixgbe_tx_queue_release(txq);
2384 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2385 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2387 /* set up vector or scalar TX function as appropriate */
2388 ixgbe_set_tx_function(dev, txq);
2390 txq->ops->reset(txq);
2392 dev->data->tx_queues[queue_idx] = txq;
2399 * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2401 * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2402 * in the sw_rsc_ring is not set to NULL but rather points to the next
2403 * mbuf of this RSC aggregation (that has not been completed yet and still
2404 * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2405 * will just free first "nb_segs" segments of the cluster explicitly by calling
2406 * an rte_pktmbuf_free_seg().
2408 * @m scattered cluster head
2410 static void __attribute__((cold))
2411 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2413 uint8_t i, nb_segs = m->nb_segs;
2414 struct rte_mbuf *next_seg;
2416 for (i = 0; i < nb_segs; i++) {
2418 rte_pktmbuf_free_seg(m);
2423 static void __attribute__((cold))
2424 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2428 #ifdef RTE_IXGBE_INC_VECTOR
2429 /* SSE Vector driver has a different way of releasing mbufs. */
2430 if (rxq->rx_using_sse) {
2431 ixgbe_rx_queue_release_mbufs_vec(rxq);
2436 if (rxq->sw_ring != NULL) {
2437 for (i = 0; i < rxq->nb_rx_desc; i++) {
2438 if (rxq->sw_ring[i].mbuf != NULL) {
2439 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2440 rxq->sw_ring[i].mbuf = NULL;
2443 if (rxq->rx_nb_avail) {
2444 for (i = 0; i < rxq->rx_nb_avail; ++i) {
2445 struct rte_mbuf *mb;
2446 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2447 rte_pktmbuf_free_seg(mb);
2449 rxq->rx_nb_avail = 0;
2453 if (rxq->sw_sc_ring)
2454 for (i = 0; i < rxq->nb_rx_desc; i++)
2455 if (rxq->sw_sc_ring[i].fbuf) {
2456 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2457 rxq->sw_sc_ring[i].fbuf = NULL;
2461 static void __attribute__((cold))
2462 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2465 ixgbe_rx_queue_release_mbufs(rxq);
2466 rte_free(rxq->sw_ring);
2467 rte_free(rxq->sw_sc_ring);
2472 void __attribute__((cold))
2473 ixgbe_dev_rx_queue_release(void *rxq)
2475 ixgbe_rx_queue_release(rxq);
2479 * Check if Rx Burst Bulk Alloc function can be used.
2481 * 0: the preconditions are satisfied and the bulk allocation function
2483 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2484 * function must be used.
2486 static inline int __attribute__((cold))
2487 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2492 * Make sure the following pre-conditions are satisfied:
2493 * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2494 * rxq->rx_free_thresh < rxq->nb_rx_desc
2495 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2496 * rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2497 * Scattered packets are not supported. This should be checked
2498 * outside of this function.
2500 if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2501 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2502 "rxq->rx_free_thresh=%d, "
2503 "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2504 rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2506 } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2507 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2508 "rxq->rx_free_thresh=%d, "
2509 "rxq->nb_rx_desc=%d",
2510 rxq->rx_free_thresh, rxq->nb_rx_desc);
2512 } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2513 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2514 "rxq->nb_rx_desc=%d, "
2515 "rxq->rx_free_thresh=%d",
2516 rxq->nb_rx_desc, rxq->rx_free_thresh);
2518 } else if (!(rxq->nb_rx_desc <
2519 (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2520 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2521 "rxq->nb_rx_desc=%d, "
2522 "IXGBE_MAX_RING_DESC=%d, "
2523 "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2524 rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2525 RTE_PMD_IXGBE_RX_MAX_BURST);
2532 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2533 static void __attribute__((cold))
2534 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2536 static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2538 uint16_t len = rxq->nb_rx_desc;
2541 * By default, the Rx queue setup function allocates enough memory for
2542 * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires
2543 * extra memory at the end of the descriptor ring to be zero'd out. A
2544 * pre-condition for using the Rx burst bulk alloc function is that the
2545 * number of descriptors is less than or equal to
2546 * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2547 * constraints here to see if we need to zero out memory after the end
2548 * of the H/W descriptor ring.
2550 if (adapter->rx_bulk_alloc_allowed)
2551 /* zero out extra memory */
2552 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2555 * Zero out HW ring memory. Zero out extra memory at the end of
2556 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2557 * reads extra memory as zeros.
2559 for (i = 0; i < len; i++) {
2560 rxq->rx_ring[i] = zeroed_desc;
2564 * initialize extra software ring entries. Space for these extra
2565 * entries is always allocated
2567 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2568 for (i = rxq->nb_rx_desc; i < len; ++i) {
2569 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2572 rxq->rx_nb_avail = 0;
2573 rxq->rx_next_avail = 0;
2574 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2576 rxq->nb_rx_hold = 0;
2577 rxq->pkt_first_seg = NULL;
2578 rxq->pkt_last_seg = NULL;
2580 #ifdef RTE_IXGBE_INC_VECTOR
2581 rxq->rxrearm_start = 0;
2582 rxq->rxrearm_nb = 0;
2586 int __attribute__((cold))
2587 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2590 unsigned int socket_id,
2591 const struct rte_eth_rxconf *rx_conf,
2592 struct rte_mempool *mp)
2594 const struct rte_memzone *rz;
2595 struct ixgbe_rx_queue *rxq;
2596 struct ixgbe_hw *hw;
2598 struct ixgbe_adapter *adapter =
2599 (struct ixgbe_adapter *)dev->data->dev_private;
2601 PMD_INIT_FUNC_TRACE();
2602 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2605 * Validate number of receive descriptors.
2606 * It must not exceed hardware maximum, and must be multiple
2609 if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2610 (nb_desc > IXGBE_MAX_RING_DESC) ||
2611 (nb_desc < IXGBE_MIN_RING_DESC)) {
2615 /* Free memory prior to re-allocation if needed... */
2616 if (dev->data->rx_queues[queue_idx] != NULL) {
2617 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2618 dev->data->rx_queues[queue_idx] = NULL;
2621 /* First allocate the rx queue data structure */
2622 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2623 RTE_CACHE_LINE_SIZE, socket_id);
2627 rxq->nb_rx_desc = nb_desc;
2628 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2629 rxq->queue_id = queue_idx;
2630 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2631 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2632 rxq->port_id = dev->data->port_id;
2633 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2635 rxq->drop_en = rx_conf->rx_drop_en;
2636 rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2639 * The packet type in RX descriptor is different for different NICs.
2640 * Some bits are used for x550 but reserved for other NICS.
2641 * So set different masks for different NICs.
2643 if (hw->mac.type == ixgbe_mac_X550 ||
2644 hw->mac.type == ixgbe_mac_X550EM_x ||
2645 hw->mac.type == ixgbe_mac_X550EM_a ||
2646 hw->mac.type == ixgbe_mac_X550_vf ||
2647 hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2648 hw->mac.type == ixgbe_mac_X550EM_a_vf)
2649 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2651 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2654 * Allocate RX ring hardware descriptors. A memzone large enough to
2655 * handle the maximum ring size is allocated in order to allow for
2656 * resizing in later calls to the queue setup function.
2658 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2659 RX_RING_SZ, IXGBE_ALIGN, socket_id);
2661 ixgbe_rx_queue_release(rxq);
2666 * Zero init all the descriptors in the ring.
2668 memset (rz->addr, 0, RX_RING_SZ);
2671 * Modified to setup VFRDT for Virtual Function
2673 if (hw->mac.type == ixgbe_mac_82599_vf ||
2674 hw->mac.type == ixgbe_mac_X540_vf ||
2675 hw->mac.type == ixgbe_mac_X550_vf ||
2676 hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2677 hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2679 IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2681 IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2685 IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2687 IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2690 rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2691 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2694 * Certain constraints must be met in order to use the bulk buffer
2695 * allocation Rx burst function. If any of Rx queues doesn't meet them
2696 * the feature should be disabled for the whole port.
2698 if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2699 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2700 "preconditions - canceling the feature for "
2701 "the whole port[%d]",
2702 rxq->queue_id, rxq->port_id);
2703 adapter->rx_bulk_alloc_allowed = false;
2707 * Allocate software ring. Allow for space at the end of the
2708 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2709 * function does not access an invalid memory region.
2712 if (adapter->rx_bulk_alloc_allowed)
2713 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2715 rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2716 sizeof(struct ixgbe_rx_entry) * len,
2717 RTE_CACHE_LINE_SIZE, socket_id);
2718 if (!rxq->sw_ring) {
2719 ixgbe_rx_queue_release(rxq);
2724 * Always allocate even if it's not going to be needed in order to
2725 * simplify the code.
2727 * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2728 * be requested in ixgbe_dev_rx_init(), which is called later from
2732 rte_zmalloc_socket("rxq->sw_sc_ring",
2733 sizeof(struct ixgbe_scattered_rx_entry) * len,
2734 RTE_CACHE_LINE_SIZE, socket_id);
2735 if (!rxq->sw_sc_ring) {
2736 ixgbe_rx_queue_release(rxq);
2740 PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2741 "dma_addr=0x%"PRIx64,
2742 rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2743 rxq->rx_ring_phys_addr);
2745 if (!rte_is_power_of_2(nb_desc)) {
2746 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2747 "preconditions - canceling the feature for "
2748 "the whole port[%d]",
2749 rxq->queue_id, rxq->port_id);
2750 adapter->rx_vec_allowed = false;
2752 ixgbe_rxq_vec_setup(rxq);
2754 dev->data->rx_queues[queue_idx] = rxq;
2756 ixgbe_reset_rx_queue(adapter, rxq);
2762 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2764 #define IXGBE_RXQ_SCAN_INTERVAL 4
2765 volatile union ixgbe_adv_rx_desc *rxdp;
2766 struct ixgbe_rx_queue *rxq;
2769 if (rx_queue_id >= dev->data->nb_rx_queues) {
2770 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2774 rxq = dev->data->rx_queues[rx_queue_id];
2775 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2777 while ((desc < rxq->nb_rx_desc) &&
2778 (rxdp->wb.upper.status_error &
2779 rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2780 desc += IXGBE_RXQ_SCAN_INTERVAL;
2781 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2782 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2783 rxdp = &(rxq->rx_ring[rxq->rx_tail +
2784 desc - rxq->nb_rx_desc]);
2791 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2793 volatile union ixgbe_adv_rx_desc *rxdp;
2794 struct ixgbe_rx_queue *rxq = rx_queue;
2797 if (unlikely(offset >= rxq->nb_rx_desc))
2799 desc = rxq->rx_tail + offset;
2800 if (desc >= rxq->nb_rx_desc)
2801 desc -= rxq->nb_rx_desc;
2803 rxdp = &rxq->rx_ring[desc];
2804 return !!(rxdp->wb.upper.status_error &
2805 rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2808 void __attribute__((cold))
2809 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2812 struct ixgbe_adapter *adapter =
2813 (struct ixgbe_adapter *)dev->data->dev_private;
2815 PMD_INIT_FUNC_TRACE();
2817 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2818 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2820 txq->ops->release_mbufs(txq);
2821 txq->ops->reset(txq);
2825 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2826 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2828 ixgbe_rx_queue_release_mbufs(rxq);
2829 ixgbe_reset_rx_queue(adapter, rxq);
2835 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2839 PMD_INIT_FUNC_TRACE();
2841 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2842 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2843 dev->data->rx_queues[i] = NULL;
2845 dev->data->nb_rx_queues = 0;
2847 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2848 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2849 dev->data->tx_queues[i] = NULL;
2851 dev->data->nb_tx_queues = 0;
2854 /*********************************************************************
2856 * Device RX/TX init functions
2858 **********************************************************************/
2861 * Receive Side Scaling (RSS)
2862 * See section 7.1.2.8 in the following document:
2863 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2866 * The source and destination IP addresses of the IP header and the source
2867 * and destination ports of TCP/UDP headers, if any, of received packets are
2868 * hashed against a configurable random key to compute a 32-bit RSS hash result.
2869 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2870 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
2871 * RSS output index which is used as the RX queue index where to store the
2873 * The following output is supplied in the RX write-back descriptor:
2874 * - 32-bit result of the Microsoft RSS hash function,
2875 * - 4-bit RSS type field.
2879 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2880 * Used as the default key.
2882 static uint8_t rss_intel_key[40] = {
2883 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2884 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2885 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2886 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2887 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2891 ixgbe_rss_disable(struct rte_eth_dev *dev)
2893 struct ixgbe_hw *hw;
2897 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2898 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2899 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2900 mrqc &= ~IXGBE_MRQC_RSSEN;
2901 IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2905 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2915 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2916 rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2918 hash_key = rss_conf->rss_key;
2919 if (hash_key != NULL) {
2920 /* Fill in RSS hash key */
2921 for (i = 0; i < 10; i++) {
2922 rss_key = hash_key[(i * 4)];
2923 rss_key |= hash_key[(i * 4) + 1] << 8;
2924 rss_key |= hash_key[(i * 4) + 2] << 16;
2925 rss_key |= hash_key[(i * 4) + 3] << 24;
2926 IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
2930 /* Set configured hashing protocols in MRQC register */
2931 rss_hf = rss_conf->rss_hf;
2932 mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
2933 if (rss_hf & ETH_RSS_IPV4)
2934 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2935 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2936 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2937 if (rss_hf & ETH_RSS_IPV6)
2938 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2939 if (rss_hf & ETH_RSS_IPV6_EX)
2940 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2941 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
2942 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2943 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2944 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2945 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2946 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2947 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
2948 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2949 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2950 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2951 IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2955 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2956 struct rte_eth_rss_conf *rss_conf)
2958 struct ixgbe_hw *hw;
2963 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2965 if (!ixgbe_rss_update_sp(hw->mac.type)) {
2966 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
2970 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2973 * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
2974 * "RSS enabling cannot be done dynamically while it must be
2975 * preceded by a software reset"
2976 * Before changing anything, first check that the update RSS operation
2977 * does not attempt to disable RSS, if RSS was enabled at
2978 * initialization time, or does not attempt to enable RSS, if RSS was
2979 * disabled at initialization time.
2981 rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
2982 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2983 if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
2984 if (rss_hf != 0) /* Enable RSS */
2986 return 0; /* Nothing to do */
2989 if (rss_hf == 0) /* Disable RSS */
2991 ixgbe_hw_rss_hash_set(hw, rss_conf);
2996 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2997 struct rte_eth_rss_conf *rss_conf)
2999 struct ixgbe_hw *hw;
3008 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3009 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3010 rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3011 hash_key = rss_conf->rss_key;
3012 if (hash_key != NULL) {
3013 /* Return RSS hash key */
3014 for (i = 0; i < 10; i++) {
3015 rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3016 hash_key[(i * 4)] = rss_key & 0x000000FF;
3017 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3018 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3019 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3023 /* Get RSS functions configured in MRQC register */
3024 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3025 if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3026 rss_conf->rss_hf = 0;
3030 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3031 rss_hf |= ETH_RSS_IPV4;
3032 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3033 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3034 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3035 rss_hf |= ETH_RSS_IPV6;
3036 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3037 rss_hf |= ETH_RSS_IPV6_EX;
3038 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3039 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3040 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3041 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3042 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3043 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3044 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3045 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3046 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3047 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3048 rss_conf->rss_hf = rss_hf;
3053 ixgbe_rss_configure(struct rte_eth_dev *dev)
3055 struct rte_eth_rss_conf rss_conf;
3056 struct ixgbe_hw *hw;
3060 uint16_t sp_reta_size;
3063 PMD_INIT_FUNC_TRACE();
3064 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3066 sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3069 * Fill in redirection table
3070 * The byte-swap is needed because NIC registers are in
3071 * little-endian order.
3074 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3075 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3077 if (j == dev->data->nb_rx_queues)
3079 reta = (reta << 8) | j;
3081 IXGBE_WRITE_REG(hw, reta_reg,
3086 * Configure the RSS key and the RSS protocols used to compute
3087 * the RSS hash of input packets.
3089 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3090 if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3091 ixgbe_rss_disable(dev);
3094 if (rss_conf.rss_key == NULL)
3095 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3096 ixgbe_hw_rss_hash_set(hw, &rss_conf);
3099 #define NUM_VFTA_REGISTERS 128
3100 #define NIC_RX_BUFFER_SIZE 0x200
3101 #define X550_RX_BUFFER_SIZE 0x180
3104 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3106 struct rte_eth_vmdq_dcb_conf *cfg;
3107 struct ixgbe_hw *hw;
3108 enum rte_eth_nb_pools num_pools;
3109 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3111 uint8_t nb_tcs; /* number of traffic classes */
3114 PMD_INIT_FUNC_TRACE();
3115 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3116 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3117 num_pools = cfg->nb_queue_pools;
3118 /* Check we have a valid number of pools */
3119 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3120 ixgbe_rss_disable(dev);
3123 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3124 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3128 * split rx buffer up into sections, each for 1 traffic class
3130 switch (hw->mac.type) {
3131 case ixgbe_mac_X550:
3132 case ixgbe_mac_X550EM_x:
3133 case ixgbe_mac_X550EM_a:
3134 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3137 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3140 for (i = 0; i < nb_tcs; i++) {
3141 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3142 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3143 /* clear 10 bits. */
3144 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3145 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3147 /* zero alloc all unused TCs */
3148 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3149 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3150 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
3151 /* clear 10 bits. */
3152 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3155 /* MRQC: enable vmdq and dcb */
3156 mrqc = ((num_pools == ETH_16_POOLS) ? \
3157 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
3158 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3160 /* PFVTCTL: turn on virtualisation and set the default pool */
3161 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3162 if (cfg->enable_default_pool) {
3163 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3165 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3168 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3170 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3172 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3174 * mapping is done with 3 bits per priority,
3175 * so shift by i*3 each time
3177 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3179 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3181 /* RTRPCS: DCB related */
3182 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3184 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3185 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3186 vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3187 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3189 /* VFTA - enable all vlan filters */
3190 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3191 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3194 /* VFRE: pool enabling for receive - 16 or 32 */
3195 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
3196 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3199 * MPSAR - allow pools to read specific mac addresses
3200 * In this case, all pools should be able to read from mac addr 0
3202 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3203 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3205 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3206 for (i = 0; i < cfg->nb_pool_maps; i++) {
3207 /* set vlan id in VF register and set the valid bit */
3208 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
3209 (cfg->pool_map[i].vlan_id & 0xFFF)));
3211 * Put the allowed pools in VFB reg. As we only have 16 or 32
3212 * pools, we only need to use the first half of the register
3215 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3220 * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3221 * @hw: pointer to hardware structure
3222 * @dcb_config: pointer to ixgbe_dcb_config structure
3225 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
3226 struct ixgbe_dcb_config *dcb_config)
3231 PMD_INIT_FUNC_TRACE();
3232 if (hw->mac.type != ixgbe_mac_82598EB) {
3233 /* Disable the Tx desc arbiter so that MTQC can be changed */
3234 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3235 reg |= IXGBE_RTTDCS_ARBDIS;
3236 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3238 /* Enable DCB for Tx with 8 TCs */
3239 if (dcb_config->num_tcs.pg_tcs == 8) {
3240 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3243 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3245 if (dcb_config->vt_mode)
3246 reg |= IXGBE_MTQC_VT_ENA;
3247 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3249 /* Disable drop for all queues */
3250 for (q = 0; q < 128; q++)
3251 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3252 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3254 /* Enable the Tx desc arbiter */
3255 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3256 reg &= ~IXGBE_RTTDCS_ARBDIS;
3257 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3259 /* Enable Security TX Buffer IFG for DCB */
3260 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3261 reg |= IXGBE_SECTX_DCB;
3262 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3268 * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3269 * @dev: pointer to rte_eth_dev structure
3270 * @dcb_config: pointer to ixgbe_dcb_config structure
3273 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3274 struct ixgbe_dcb_config *dcb_config)
3276 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3277 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3278 struct ixgbe_hw *hw =
3279 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3281 PMD_INIT_FUNC_TRACE();
3282 if (hw->mac.type != ixgbe_mac_82598EB)
3283 /*PF VF Transmit Enable*/
3284 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3285 vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3287 /*Configure general DCB TX parameters*/
3288 ixgbe_dcb_tx_hw_config(hw,dcb_config);
3293 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3294 struct ixgbe_dcb_config *dcb_config)
3296 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3297 &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3298 struct ixgbe_dcb_tc_config *tc;
3301 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3302 if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
3303 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3304 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3307 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3308 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3310 /* User Priority to Traffic Class mapping */
3311 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3312 j = vmdq_rx_conf->dcb_tc[i];
3313 tc = &dcb_config->tc_config[j];
3314 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3320 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3321 struct ixgbe_dcb_config *dcb_config)
3323 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3324 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3325 struct ixgbe_dcb_tc_config *tc;
3328 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3329 if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
3330 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3331 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3334 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3335 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3338 /* User Priority to Traffic Class mapping */
3339 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3340 j = vmdq_tx_conf->dcb_tc[i];
3341 tc = &dcb_config->tc_config[j];
3342 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3349 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3350 struct ixgbe_dcb_config *dcb_config)
3352 struct rte_eth_dcb_rx_conf *rx_conf =
3353 &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3354 struct ixgbe_dcb_tc_config *tc;
3357 dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3358 dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3360 /* User Priority to Traffic Class mapping */
3361 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3362 j = rx_conf->dcb_tc[i];
3363 tc = &dcb_config->tc_config[j];
3364 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3370 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3371 struct ixgbe_dcb_config *dcb_config)
3373 struct rte_eth_dcb_tx_conf *tx_conf =
3374 &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3375 struct ixgbe_dcb_tc_config *tc;
3378 dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3379 dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3381 /* User Priority to Traffic Class mapping */
3382 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3383 j = tx_conf->dcb_tc[i];
3384 tc = &dcb_config->tc_config[j];
3385 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3391 * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3392 * @hw: pointer to hardware structure
3393 * @dcb_config: pointer to ixgbe_dcb_config structure
3396 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3397 struct ixgbe_dcb_config *dcb_config)
3403 PMD_INIT_FUNC_TRACE();
3405 * Disable the arbiter before changing parameters
3406 * (always enable recycle mode; WSP)
3408 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3409 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3411 if (hw->mac.type != ixgbe_mac_82598EB) {
3412 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3413 if (dcb_config->num_tcs.pg_tcs == 4) {
3414 if (dcb_config->vt_mode)
3415 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3416 IXGBE_MRQC_VMDQRT4TCEN;
3418 /* no matter the mode is DCB or DCB_RSS, just
3419 * set the MRQE to RSSXTCEN. RSS is controlled
3422 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3423 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3424 IXGBE_MRQC_RTRSS4TCEN;
3427 if (dcb_config->num_tcs.pg_tcs == 8) {
3428 if (dcb_config->vt_mode)
3429 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3430 IXGBE_MRQC_VMDQRT8TCEN;
3432 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3433 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3434 IXGBE_MRQC_RTRSS8TCEN;
3438 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3441 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3442 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3443 vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3444 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3446 /* VFTA - enable all vlan filters */
3447 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3448 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3452 * Configure Rx packet plane (recycle mode; WSP) and
3455 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3456 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3462 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3463 uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3465 switch (hw->mac.type) {
3466 case ixgbe_mac_82598EB:
3467 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3469 case ixgbe_mac_82599EB:
3470 case ixgbe_mac_X540:
3471 case ixgbe_mac_X550:
3472 case ixgbe_mac_X550EM_x:
3473 case ixgbe_mac_X550EM_a:
3474 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3483 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3484 uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3486 switch (hw->mac.type) {
3487 case ixgbe_mac_82598EB:
3488 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
3489 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
3491 case ixgbe_mac_82599EB:
3492 case ixgbe_mac_X540:
3493 case ixgbe_mac_X550:
3494 case ixgbe_mac_X550EM_x:
3495 case ixgbe_mac_X550EM_a:
3496 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
3497 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
3504 #define DCB_RX_CONFIG 1
3505 #define DCB_TX_CONFIG 1
3506 #define DCB_TX_PB 1024
3508 * ixgbe_dcb_hw_configure - Enable DCB and configure
3509 * general DCB in VT mode and non-VT mode parameters
3510 * @dev: pointer to rte_eth_dev structure
3511 * @dcb_config: pointer to ixgbe_dcb_config structure
3514 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3515 struct ixgbe_dcb_config *dcb_config)
3518 uint8_t i,pfc_en,nb_tcs;
3519 uint16_t pbsize, rx_buffer_size;
3520 uint8_t config_dcb_rx = 0;
3521 uint8_t config_dcb_tx = 0;
3522 uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3523 uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3524 uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3525 uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3526 uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3527 struct ixgbe_dcb_tc_config *tc;
3528 uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3529 struct ixgbe_hw *hw =
3530 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3532 switch(dev->data->dev_conf.rxmode.mq_mode){
3533 case ETH_MQ_RX_VMDQ_DCB:
3534 dcb_config->vt_mode = true;
3535 if (hw->mac.type != ixgbe_mac_82598EB) {
3536 config_dcb_rx = DCB_RX_CONFIG;
3538 *get dcb and VT rx configuration parameters
3541 ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3542 /*Configure general VMDQ and DCB RX parameters*/
3543 ixgbe_vmdq_dcb_configure(dev);
3547 case ETH_MQ_RX_DCB_RSS:
3548 dcb_config->vt_mode = false;
3549 config_dcb_rx = DCB_RX_CONFIG;
3550 /* Get dcb TX configuration parameters from rte_eth_conf */
3551 ixgbe_dcb_rx_config(dev, dcb_config);
3552 /*Configure general DCB RX parameters*/
3553 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3556 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3559 switch (dev->data->dev_conf.txmode.mq_mode) {
3560 case ETH_MQ_TX_VMDQ_DCB:
3561 dcb_config->vt_mode = true;
3562 config_dcb_tx = DCB_TX_CONFIG;
3563 /* get DCB and VT TX configuration parameters from rte_eth_conf */
3564 ixgbe_dcb_vt_tx_config(dev,dcb_config);
3565 /*Configure general VMDQ and DCB TX parameters*/
3566 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
3570 dcb_config->vt_mode = false;
3571 config_dcb_tx = DCB_TX_CONFIG;
3572 /*get DCB TX configuration parameters from rte_eth_conf*/
3573 ixgbe_dcb_tx_config(dev, dcb_config);
3574 /*Configure general DCB TX parameters*/
3575 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3578 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3582 nb_tcs = dcb_config->num_tcs.pfc_tcs;
3584 ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3585 if (nb_tcs == ETH_4_TCS) {
3586 /* Avoid un-configured priority mapping to TC0 */
3588 uint8_t mask = 0xFF;
3589 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3590 mask = (uint8_t)(mask & (~ (1 << map[i])));
3591 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3592 if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3596 /* Re-configure 4 TCs BW */
3597 for (i = 0; i < nb_tcs; i++) {
3598 tc = &dcb_config->tc_config[i];
3599 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3600 (uint8_t)(100 / nb_tcs);
3601 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3602 (uint8_t)(100 / nb_tcs);
3604 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3605 tc = &dcb_config->tc_config[i];
3606 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3607 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3611 switch (hw->mac.type) {
3612 case ixgbe_mac_X550:
3613 case ixgbe_mac_X550EM_x:
3614 case ixgbe_mac_X550EM_a:
3615 rx_buffer_size = X550_RX_BUFFER_SIZE;
3618 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3622 if (config_dcb_rx) {
3623 /* Set RX buffer size */
3624 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3625 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3626 for (i = 0; i < nb_tcs; i++) {
3627 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3629 /* zero alloc all unused TCs */
3630 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3631 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3634 if (config_dcb_tx) {
3635 /* Only support an equally distributed Tx packet buffer strategy. */
3636 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3637 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3638 for (i = 0; i < nb_tcs; i++) {
3639 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3640 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3642 /* Clear unused TCs, if any, to zero buffer size*/
3643 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3644 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3645 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3649 /*Calculates traffic class credits*/
3650 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3651 IXGBE_DCB_TX_CONFIG);
3652 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3653 IXGBE_DCB_RX_CONFIG);
3655 if (config_dcb_rx) {
3656 /* Unpack CEE standard containers */
3657 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3658 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3659 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3660 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3661 /* Configure PG(ETS) RX */
3662 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
3665 if (config_dcb_tx) {
3666 /* Unpack CEE standard containers */
3667 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3668 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3669 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3670 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3671 /* Configure PG(ETS) TX */
3672 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
3675 /*Configure queue statistics registers*/
3676 ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3678 /* Check if the PFC is supported */
3679 if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3680 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3681 for (i = 0; i < nb_tcs; i++) {
3683 * If the TC count is 8,and the default high_water is 48,
3684 * the low_water is 16 as default.
3686 hw->fc.high_water[i] = (pbsize * 3 ) / 4;
3687 hw->fc.low_water[i] = pbsize / 4;
3688 /* Enable pfc for this TC */
3689 tc = &dcb_config->tc_config[i];
3690 tc->pfc = ixgbe_dcb_pfc_enabled;
3692 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3693 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3695 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3702 * ixgbe_configure_dcb - Configure DCB Hardware
3703 * @dev: pointer to rte_eth_dev
3705 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3707 struct ixgbe_dcb_config *dcb_cfg =
3708 IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3709 struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3711 PMD_INIT_FUNC_TRACE();
3713 /* check support mq_mode for DCB */
3714 if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3715 (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3716 (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3719 if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3722 /** Configure DCB hardware **/
3723 ixgbe_dcb_hw_configure(dev, dcb_cfg);
3729 * VMDq only support for 10 GbE NIC.
3732 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3734 struct rte_eth_vmdq_rx_conf *cfg;
3735 struct ixgbe_hw *hw;
3736 enum rte_eth_nb_pools num_pools;
3737 uint32_t mrqc, vt_ctl, vlanctrl;
3741 PMD_INIT_FUNC_TRACE();
3742 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3743 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3744 num_pools = cfg->nb_queue_pools;
3746 ixgbe_rss_disable(dev);
3748 /* MRQC: enable vmdq */
3749 mrqc = IXGBE_MRQC_VMDQEN;
3750 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3752 /* PFVTCTL: turn on virtualisation and set the default pool */
3753 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3754 if (cfg->enable_default_pool)
3755 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3757 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3759 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3761 for (i = 0; i < (int)num_pools; i++) {
3762 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3763 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3766 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3767 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3768 vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3769 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3771 /* VFTA - enable all vlan filters */
3772 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3773 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3775 /* VFRE: pool enabling for receive - 64 */
3776 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3777 if (num_pools == ETH_64_POOLS)
3778 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3781 * MPSAR - allow pools to read specific mac addresses
3782 * In this case, all pools should be able to read from mac addr 0
3784 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3785 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3787 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3788 for (i = 0; i < cfg->nb_pool_maps; i++) {
3789 /* set vlan id in VF register and set the valid bit */
3790 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
3791 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3793 * Put the allowed pools in VFB reg. As we only have 16 or 64
3794 * pools, we only need to use the first half of the register
3797 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3798 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), \
3799 (cfg->pool_map[i].pools & UINT32_MAX));
3801 IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i*2+1)), \
3802 ((cfg->pool_map[i].pools >> 32) \
3807 /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3808 if (cfg->enable_loop_back) {
3809 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3810 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3811 IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3814 IXGBE_WRITE_FLUSH(hw);
3818 * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3819 * @hw: pointer to hardware structure
3822 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3827 PMD_INIT_FUNC_TRACE();
3828 /*PF VF Transmit Enable*/
3829 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3830 IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3832 /* Disable the Tx desc arbiter so that MTQC can be changed */
3833 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3834 reg |= IXGBE_RTTDCS_ARBDIS;
3835 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3837 reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3838 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3840 /* Disable drop for all queues */
3841 for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3842 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3843 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3845 /* Enable the Tx desc arbiter */
3846 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3847 reg &= ~IXGBE_RTTDCS_ARBDIS;
3848 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3850 IXGBE_WRITE_FLUSH(hw);
3855 static int __attribute__((cold))
3856 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3858 struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3862 /* Initialize software ring entries */
3863 for (i = 0; i < rxq->nb_rx_desc; i++) {
3864 volatile union ixgbe_adv_rx_desc *rxd;
3865 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
3867 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3868 (unsigned) rxq->queue_id);
3872 rte_mbuf_refcnt_set(mbuf, 1);
3874 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3876 mbuf->port = rxq->port_id;
3879 rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
3880 rxd = &rxq->rx_ring[i];
3881 rxd->read.hdr_addr = 0;
3882 rxd->read.pkt_addr = dma_addr;
3890 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3892 struct ixgbe_hw *hw;
3895 ixgbe_rss_configure(dev);
3897 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3899 /* MRQC: enable VF RSS */
3900 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3901 mrqc &= ~IXGBE_MRQC_MRQE_MASK;
3902 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3904 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
3908 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
3912 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
3916 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3922 ixgbe_config_vf_default(struct rte_eth_dev *dev)
3924 struct ixgbe_hw *hw =
3925 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3927 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3929 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3934 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3935 IXGBE_MRQC_VMDQRT4TCEN);
3939 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3940 IXGBE_MRQC_VMDQRT8TCEN);
3944 "invalid pool number in IOV mode");
3951 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3953 struct ixgbe_hw *hw =
3954 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3956 if (hw->mac.type == ixgbe_mac_82598EB)
3959 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3961 * SRIOV inactive scheme
3962 * any DCB/RSS w/o VMDq multi-queue setting
3964 switch (dev->data->dev_conf.rxmode.mq_mode) {
3966 case ETH_MQ_RX_DCB_RSS:
3967 case ETH_MQ_RX_VMDQ_RSS:
3968 ixgbe_rss_configure(dev);
3971 case ETH_MQ_RX_VMDQ_DCB:
3972 ixgbe_vmdq_dcb_configure(dev);
3975 case ETH_MQ_RX_VMDQ_ONLY:
3976 ixgbe_vmdq_rx_hw_configure(dev);
3979 case ETH_MQ_RX_NONE:
3981 /* if mq_mode is none, disable rss mode.*/
3982 ixgbe_rss_disable(dev);
3987 * SRIOV active scheme
3988 * Support RSS together with VMDq & SRIOV
3990 switch (dev->data->dev_conf.rxmode.mq_mode) {
3992 case ETH_MQ_RX_VMDQ_RSS:
3993 ixgbe_config_vf_rss(dev);
3996 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
3997 case ETH_MQ_RX_VMDQ_DCB:
3998 case ETH_MQ_RX_VMDQ_DCB_RSS:
4000 "Could not support DCB with VMDq & SRIOV");
4003 ixgbe_config_vf_default(dev);
4012 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4014 struct ixgbe_hw *hw =
4015 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4019 if (hw->mac.type == ixgbe_mac_82598EB)
4022 /* disable arbiter before setting MTQC */
4023 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4024 rttdcs |= IXGBE_RTTDCS_ARBDIS;
4025 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4027 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4029 * SRIOV inactive scheme
4030 * any DCB w/o VMDq multi-queue setting
4032 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4033 ixgbe_vmdq_tx_hw_configure(hw);
4035 mtqc = IXGBE_MTQC_64Q_1PB;
4036 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4039 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4042 * SRIOV active scheme
4043 * FIXME if support DCB together with VMDq & SRIOV
4046 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4049 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4052 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4056 mtqc = IXGBE_MTQC_64Q_1PB;
4057 PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4059 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4062 /* re-enable arbiter */
4063 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4064 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4070 * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4072 * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4073 * spec rev. 3.0 chapter 8.2.3.8.13.
4075 * @pool Memory pool of the Rx queue
4077 static inline uint32_t
4078 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4080 struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4082 /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4085 (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4088 return IXGBE_RSCCTL_MAXDESC_16;
4089 else if (maxdesc >= 8)
4090 return IXGBE_RSCCTL_MAXDESC_8;
4091 else if (maxdesc >= 4)
4092 return IXGBE_RSCCTL_MAXDESC_4;
4094 return IXGBE_RSCCTL_MAXDESC_1;
4098 * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4101 * (Taken from FreeBSD tree)
4102 * (yes this is all very magic and confusing :)
4105 * @entry the register array entry
4106 * @vector the MSIX vector for this queue
4110 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4112 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4115 vector |= IXGBE_IVAR_ALLOC_VAL;
4117 switch (hw->mac.type) {
4119 case ixgbe_mac_82598EB:
4121 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4123 entry += (type * 64);
4124 index = (entry >> 2) & 0x1F;
4125 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4126 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4127 ivar |= (vector << (8 * (entry & 0x3)));
4128 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4131 case ixgbe_mac_82599EB:
4132 case ixgbe_mac_X540:
4133 if (type == -1) { /* MISC IVAR */
4134 index = (entry & 1) * 8;
4135 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4136 ivar &= ~(0xFF << index);
4137 ivar |= (vector << index);
4138 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4139 } else { /* RX/TX IVARS */
4140 index = (16 * (entry & 1)) + (8 * type);
4141 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4142 ivar &= ~(0xFF << index);
4143 ivar |= (vector << index);
4144 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4154 void __attribute__((cold))
4155 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4157 uint16_t i, rx_using_sse;
4158 struct ixgbe_adapter *adapter =
4159 (struct ixgbe_adapter *)dev->data->dev_private;
4162 * In order to allow Vector Rx there are a few configuration
4163 * conditions to be met and Rx Bulk Allocation should be allowed.
4165 if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4166 !adapter->rx_bulk_alloc_allowed) {
4167 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4168 "preconditions or RTE_IXGBE_INC_VECTOR is "
4170 dev->data->port_id);
4172 adapter->rx_vec_allowed = false;
4176 * Initialize the appropriate LRO callback.
4178 * If all queues satisfy the bulk allocation preconditions
4179 * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4180 * Otherwise use a single allocation version.
4182 if (dev->data->lro) {
4183 if (adapter->rx_bulk_alloc_allowed) {
4184 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4185 "allocation version");
4186 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4188 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4189 "allocation version");
4190 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4192 } else if (dev->data->scattered_rx) {
4194 * Set the non-LRO scattered callback: there are Vector and
4195 * single allocation versions.
4197 if (adapter->rx_vec_allowed) {
4198 PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4199 "callback (port=%d).",
4200 dev->data->port_id);
4202 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4203 } else if (adapter->rx_bulk_alloc_allowed) {
4204 PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4205 "allocation callback (port=%d).",
4206 dev->data->port_id);
4207 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4209 PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4210 "single allocation) "
4211 "Scattered Rx callback "
4213 dev->data->port_id);
4215 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4218 * Below we set "simple" callbacks according to port/queues parameters.
4219 * If parameters allow we are going to choose between the following
4223 * - Single buffer allocation (the simplest one)
4225 } else if (adapter->rx_vec_allowed) {
4226 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4227 "burst size no less than %d (port=%d).",
4228 RTE_IXGBE_DESCS_PER_LOOP,
4229 dev->data->port_id);
4231 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4232 } else if (adapter->rx_bulk_alloc_allowed) {
4233 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4234 "satisfied. Rx Burst Bulk Alloc function "
4235 "will be used on port=%d.",
4236 dev->data->port_id);
4238 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4240 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4241 "satisfied, or Scattered Rx is requested "
4243 dev->data->port_id);
4245 dev->rx_pkt_burst = ixgbe_recv_pkts;
4248 /* Propagate information about RX function choice through all queues. */
4251 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4252 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4254 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4255 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4256 rxq->rx_using_sse = rx_using_sse;
4261 * ixgbe_set_rsc - configure RSC related port HW registers
4263 * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4264 * of 82599 Spec (x540 configuration is virtually the same).
4268 * Returns 0 in case of success or a non-zero error code
4271 ixgbe_set_rsc(struct rte_eth_dev *dev)
4273 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4274 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4275 struct rte_eth_dev_info dev_info = { 0 };
4276 bool rsc_capable = false;
4281 dev->dev_ops->dev_infos_get(dev, &dev_info);
4282 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4285 if (!rsc_capable && rx_conf->enable_lro) {
4286 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4291 /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4293 if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4295 * According to chapter of 4.6.7.2.1 of the Spec Rev.
4296 * 3.0 RSC configuration requires HW CRC stripping being
4297 * enabled. If user requested both HW CRC stripping off
4298 * and RSC on - return an error.
4300 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4305 /* RFCTL configuration */
4307 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4308 if (rx_conf->enable_lro)
4310 * Since NFS packets coalescing is not supported - clear
4311 * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4314 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4315 IXGBE_RFCTL_NFSR_DIS);
4317 rfctl |= IXGBE_RFCTL_RSC_DIS;
4319 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4322 /* If LRO hasn't been requested - we are done here. */
4323 if (!rx_conf->enable_lro)
4326 /* Set RDRXCTL.RSCACKC bit */
4327 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4328 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4329 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4331 /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4332 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4333 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4335 IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4337 IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4339 IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4341 IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4344 * ixgbe PMD doesn't support header-split at the moment.
4346 * Following the 4.6.7.2.1 chapter of the 82599/x540
4347 * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4348 * should be configured even if header split is not
4349 * enabled. We will configure it 128 bytes following the
4350 * recommendation in the spec.
4352 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4353 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4354 IXGBE_SRRCTL_BSIZEHDR_MASK;
4357 * TODO: Consider setting the Receive Descriptor Minimum
4358 * Threshold Size for an RSC case. This is not an obviously
4359 * beneficiary option but the one worth considering...
4362 rscctl |= IXGBE_RSCCTL_RSCEN;
4363 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4364 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4367 * RSC: Set ITR interval corresponding to 2K ints/s.
4369 * Full-sized RSC aggregations for a 10Gb/s link will
4370 * arrive at about 20K aggregation/s rate.
4372 * 2K inst/s rate will make only 10% of the
4373 * aggregations to be closed due to the interrupt timer
4374 * expiration for a streaming at wire-speed case.
4376 * For a sparse streaming case this setting will yield
4377 * at most 500us latency for a single RSC aggregation.
4379 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4380 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4382 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4383 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4384 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4385 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4388 * RSC requires the mapping of the queue to the
4391 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4396 PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4402 * Initializes Receive Unit.
4404 int __attribute__((cold))
4405 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4407 struct ixgbe_hw *hw;
4408 struct ixgbe_rx_queue *rxq;
4419 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4422 PMD_INIT_FUNC_TRACE();
4423 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4426 * Make sure receives are disabled while setting
4427 * up the RX context (registers, descriptor rings, etc.).
4429 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4430 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4432 /* Enable receipt of broadcasted frames */
4433 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4434 fctrl |= IXGBE_FCTRL_BAM;
4435 fctrl |= IXGBE_FCTRL_DPF;
4436 fctrl |= IXGBE_FCTRL_PMCF;
4437 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4440 * Configure CRC stripping, if any.
4442 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4443 if (rx_conf->hw_strip_crc)
4444 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4446 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4449 * Configure jumbo frame support, if any.
4451 if (rx_conf->jumbo_frame == 1) {
4452 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4453 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4454 maxfrs &= 0x0000FFFF;
4455 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4456 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4458 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4461 * If loopback mode is configured for 82599, set LPBK bit.
4463 if (hw->mac.type == ixgbe_mac_82599EB &&
4464 dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4465 hlreg0 |= IXGBE_HLREG0_LPBK;
4467 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4469 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4471 /* Setup RX queues */
4472 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4473 rxq = dev->data->rx_queues[i];
4476 * Reset crc_len in case it was changed after queue setup by a
4477 * call to configure.
4479 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4481 /* Setup the Base and Length of the Rx Descriptor Rings */
4482 bus_addr = rxq->rx_ring_phys_addr;
4483 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4484 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4485 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4486 (uint32_t)(bus_addr >> 32));
4487 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4488 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4489 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4490 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4492 /* Configure the SRRCTL register */
4493 #ifdef RTE_HEADER_SPLIT_ENABLE
4495 * Configure Header Split
4497 if (rx_conf->header_split) {
4498 if (hw->mac.type == ixgbe_mac_82599EB) {
4499 /* Must setup the PSRTYPE register */
4501 psrtype = IXGBE_PSRTYPE_TCPHDR |
4502 IXGBE_PSRTYPE_UDPHDR |
4503 IXGBE_PSRTYPE_IPV4HDR |
4504 IXGBE_PSRTYPE_IPV6HDR;
4505 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4507 srrctl = ((rx_conf->split_hdr_size <<
4508 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4509 IXGBE_SRRCTL_BSIZEHDR_MASK);
4510 srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4513 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4515 /* Set if packets are dropped when no descriptors available */
4517 srrctl |= IXGBE_SRRCTL_DROP_EN;
4520 * Configure the RX buffer size in the BSIZEPACKET field of
4521 * the SRRCTL register of the queue.
4522 * The value is in 1 KB resolution. Valid values can be from
4525 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4526 RTE_PKTMBUF_HEADROOM);
4527 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4528 IXGBE_SRRCTL_BSIZEPKT_MASK);
4530 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4532 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4533 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4535 /* It adds dual VLAN length for supporting dual VLAN */
4536 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4537 2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4538 dev->data->scattered_rx = 1;
4541 if (rx_conf->enable_scatter)
4542 dev->data->scattered_rx = 1;
4545 * Device configured with multiple RX queues.
4547 ixgbe_dev_mq_rx_configure(dev);
4550 * Setup the Checksum Register.
4551 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4552 * Enable IP/L4 checkum computation by hardware if requested to do so.
4554 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4555 rxcsum |= IXGBE_RXCSUM_PCSD;
4556 if (rx_conf->hw_ip_checksum)
4557 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4559 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4561 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4563 if (hw->mac.type == ixgbe_mac_82599EB ||
4564 hw->mac.type == ixgbe_mac_X540) {
4565 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4566 if (rx_conf->hw_strip_crc)
4567 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4569 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4570 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4571 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4574 rc = ixgbe_set_rsc(dev);
4578 ixgbe_set_rx_function(dev);
4584 * Initializes Transmit Unit.
4586 void __attribute__((cold))
4587 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4589 struct ixgbe_hw *hw;
4590 struct ixgbe_tx_queue *txq;
4596 PMD_INIT_FUNC_TRACE();
4597 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4599 /* Enable TX CRC (checksum offload requirement) and hw padding
4600 * (TSO requirement) */
4601 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4602 hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4603 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4605 /* Setup the Base and Length of the Tx Descriptor Rings */
4606 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4607 txq = dev->data->tx_queues[i];
4609 bus_addr = txq->tx_ring_phys_addr;
4610 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4611 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4612 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4613 (uint32_t)(bus_addr >> 32));
4614 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4615 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4616 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4617 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4618 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4621 * Disable Tx Head Writeback RO bit, since this hoses
4622 * bookkeeping if things aren't delivered in order.
4624 switch (hw->mac.type) {
4625 case ixgbe_mac_82598EB:
4626 txctrl = IXGBE_READ_REG(hw,
4627 IXGBE_DCA_TXCTRL(txq->reg_idx));
4628 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4629 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4633 case ixgbe_mac_82599EB:
4634 case ixgbe_mac_X540:
4635 case ixgbe_mac_X550:
4636 case ixgbe_mac_X550EM_x:
4637 case ixgbe_mac_X550EM_a:
4639 txctrl = IXGBE_READ_REG(hw,
4640 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4641 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4642 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4648 /* Device configured with multiple TX queues. */
4649 ixgbe_dev_mq_tx_configure(dev);
4653 * Set up link for 82599 loopback mode Tx->Rx.
4655 static inline void __attribute__((cold))
4656 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4658 PMD_INIT_FUNC_TRACE();
4660 if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4661 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4663 PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4672 IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4673 ixgbe_reset_pipeline_82599(hw);
4675 hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4681 * Start Transmit and Receive Units.
4683 int __attribute__((cold))
4684 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4686 struct ixgbe_hw *hw;
4687 struct ixgbe_tx_queue *txq;
4688 struct ixgbe_rx_queue *rxq;
4695 PMD_INIT_FUNC_TRACE();
4696 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4698 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4699 txq = dev->data->tx_queues[i];
4700 /* Setup Transmit Threshold Registers */
4701 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4702 txdctl |= txq->pthresh & 0x7F;
4703 txdctl |= ((txq->hthresh & 0x7F) << 8);
4704 txdctl |= ((txq->wthresh & 0x7F) << 16);
4705 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4708 if (hw->mac.type != ixgbe_mac_82598EB) {
4709 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4710 dmatxctl |= IXGBE_DMATXCTL_TE;
4711 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4714 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4715 txq = dev->data->tx_queues[i];
4716 if (!txq->tx_deferred_start) {
4717 ret = ixgbe_dev_tx_queue_start(dev, i);
4723 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4724 rxq = dev->data->rx_queues[i];
4725 if (!rxq->rx_deferred_start) {
4726 ret = ixgbe_dev_rx_queue_start(dev, i);
4732 /* Enable Receive engine */
4733 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4734 if (hw->mac.type == ixgbe_mac_82598EB)
4735 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4736 rxctrl |= IXGBE_RXCTRL_RXEN;
4737 hw->mac.ops.enable_rx_dma(hw, rxctrl);
4739 /* If loopback mode is enabled for 82599, set up the link accordingly */
4740 if (hw->mac.type == ixgbe_mac_82599EB &&
4741 dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4742 ixgbe_setup_loopback_link_82599(hw);
4748 * Start Receive Units for specified queue.
4750 int __attribute__((cold))
4751 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4753 struct ixgbe_hw *hw;
4754 struct ixgbe_rx_queue *rxq;
4758 PMD_INIT_FUNC_TRACE();
4759 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4761 if (rx_queue_id < dev->data->nb_rx_queues) {
4762 rxq = dev->data->rx_queues[rx_queue_id];
4764 /* Allocate buffers for descriptor rings */
4765 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4766 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4770 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4771 rxdctl |= IXGBE_RXDCTL_ENABLE;
4772 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4774 /* Wait until RX Enable ready */
4775 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4778 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4779 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4781 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4784 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4785 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4786 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4794 * Stop Receive Units for specified queue.
4796 int __attribute__((cold))
4797 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4799 struct ixgbe_hw *hw;
4800 struct ixgbe_adapter *adapter =
4801 (struct ixgbe_adapter *)dev->data->dev_private;
4802 struct ixgbe_rx_queue *rxq;
4806 PMD_INIT_FUNC_TRACE();
4807 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4809 if (rx_queue_id < dev->data->nb_rx_queues) {
4810 rxq = dev->data->rx_queues[rx_queue_id];
4812 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4813 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4814 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4816 /* Wait until RX Enable ready */
4817 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4820 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4821 } while (--poll_ms && (rxdctl | IXGBE_RXDCTL_ENABLE));
4823 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4826 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4828 ixgbe_rx_queue_release_mbufs(rxq);
4829 ixgbe_reset_rx_queue(adapter, rxq);
4830 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4839 * Start Transmit Units for specified queue.
4841 int __attribute__((cold))
4842 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4844 struct ixgbe_hw *hw;
4845 struct ixgbe_tx_queue *txq;
4849 PMD_INIT_FUNC_TRACE();
4850 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4852 if (tx_queue_id < dev->data->nb_tx_queues) {
4853 txq = dev->data->tx_queues[tx_queue_id];
4854 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4855 txdctl |= IXGBE_TXDCTL_ENABLE;
4856 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4858 /* Wait until TX Enable ready */
4859 if (hw->mac.type == ixgbe_mac_82599EB) {
4860 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4863 txdctl = IXGBE_READ_REG(hw,
4864 IXGBE_TXDCTL(txq->reg_idx));
4865 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4867 PMD_INIT_LOG(ERR, "Could not enable "
4868 "Tx Queue %d", tx_queue_id);
4871 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4872 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4873 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4881 * Stop Transmit Units for specified queue.
4883 int __attribute__((cold))
4884 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4886 struct ixgbe_hw *hw;
4887 struct ixgbe_tx_queue *txq;
4889 uint32_t txtdh, txtdt;
4892 PMD_INIT_FUNC_TRACE();
4893 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4895 if (tx_queue_id < dev->data->nb_tx_queues) {
4896 txq = dev->data->tx_queues[tx_queue_id];
4898 /* Wait until TX queue is empty */
4899 if (hw->mac.type == ixgbe_mac_82599EB) {
4900 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4902 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4903 txtdh = IXGBE_READ_REG(hw,
4904 IXGBE_TDH(txq->reg_idx));
4905 txtdt = IXGBE_READ_REG(hw,
4906 IXGBE_TDT(txq->reg_idx));
4907 } while (--poll_ms && (txtdh != txtdt));
4909 PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
4910 "when stopping.", tx_queue_id);
4913 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4914 txdctl &= ~IXGBE_TXDCTL_ENABLE;
4915 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4917 /* Wait until TX Enable ready */
4918 if (hw->mac.type == ixgbe_mac_82599EB) {
4919 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4922 txdctl = IXGBE_READ_REG(hw,
4923 IXGBE_TXDCTL(txq->reg_idx));
4924 } while (--poll_ms && (txdctl | IXGBE_TXDCTL_ENABLE));
4926 PMD_INIT_LOG(ERR, "Could not disable "
4927 "Tx Queue %d", tx_queue_id);
4930 if (txq->ops != NULL) {
4931 txq->ops->release_mbufs(txq);
4932 txq->ops->reset(txq);
4934 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4942 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4943 struct rte_eth_rxq_info *qinfo)
4945 struct ixgbe_rx_queue *rxq;
4947 rxq = dev->data->rx_queues[queue_id];
4949 qinfo->mp = rxq->mb_pool;
4950 qinfo->scattered_rx = dev->data->scattered_rx;
4951 qinfo->nb_desc = rxq->nb_rx_desc;
4953 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
4954 qinfo->conf.rx_drop_en = rxq->drop_en;
4955 qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
4959 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4960 struct rte_eth_txq_info *qinfo)
4962 struct ixgbe_tx_queue *txq;
4964 txq = dev->data->tx_queues[queue_id];
4966 qinfo->nb_desc = txq->nb_tx_desc;
4968 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
4969 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
4970 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
4972 qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
4973 qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
4974 qinfo->conf.txq_flags = txq->txq_flags;
4975 qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
4979 * [VF] Initializes Receive Unit.
4981 int __attribute__((cold))
4982 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
4984 struct ixgbe_hw *hw;
4985 struct ixgbe_rx_queue *rxq;
4987 uint32_t srrctl, psrtype = 0;
4992 PMD_INIT_FUNC_TRACE();
4993 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4995 if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
4996 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4997 "it should be power of 2");
5001 if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5002 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5003 "it should be equal to or less than %d",
5004 hw->mac.max_rx_queues);
5009 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5010 * disables the VF receipt of packets if the PF MTU is > 1500.
5011 * This is done to deal with 82599 limitations that imposes
5012 * the PF and all VFs to share the same MTU.
5013 * Then, the PF driver enables again the VF receipt of packet when
5014 * the VF driver issues a IXGBE_VF_SET_LPE request.
5015 * In the meantime, the VF device cannot be used, even if the VF driver
5016 * and the Guest VM network stack are ready to accept packets with a
5017 * size up to the PF MTU.
5018 * As a work-around to this PF behaviour, force the call to
5019 * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5020 * VF packets received can work in all cases.
5022 ixgbevf_rlpml_set_vf(hw,
5023 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5025 /* Setup RX queues */
5026 for (i = 0; i < dev->data->nb_rx_queues; i++) {
5027 rxq = dev->data->rx_queues[i];
5029 /* Allocate buffers for descriptor rings */
5030 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5034 /* Setup the Base and Length of the Rx Descriptor Rings */
5035 bus_addr = rxq->rx_ring_phys_addr;
5037 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5038 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5039 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5040 (uint32_t)(bus_addr >> 32));
5041 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5042 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5043 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5044 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5047 /* Configure the SRRCTL register */
5048 #ifdef RTE_HEADER_SPLIT_ENABLE
5050 * Configure Header Split
5052 if (dev->data->dev_conf.rxmode.header_split) {
5053 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5054 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5055 IXGBE_SRRCTL_BSIZEHDR_MASK);
5056 srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5059 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5061 /* Set if packets are dropped when no descriptors available */
5063 srrctl |= IXGBE_SRRCTL_DROP_EN;
5066 * Configure the RX buffer size in the BSIZEPACKET field of
5067 * the SRRCTL register of the queue.
5068 * The value is in 1 KB resolution. Valid values can be from
5071 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5072 RTE_PKTMBUF_HEADROOM);
5073 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5074 IXGBE_SRRCTL_BSIZEPKT_MASK);
5077 * VF modification to write virtual function SRRCTL register
5079 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5081 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5082 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5084 if (dev->data->dev_conf.rxmode.enable_scatter ||
5085 /* It adds dual VLAN length for supporting dual VLAN */
5086 (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5087 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5088 if (!dev->data->scattered_rx)
5089 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5090 dev->data->scattered_rx = 1;
5094 #ifdef RTE_HEADER_SPLIT_ENABLE
5095 if (dev->data->dev_conf.rxmode.header_split)
5096 /* Must setup the PSRTYPE register */
5097 psrtype = IXGBE_PSRTYPE_TCPHDR |
5098 IXGBE_PSRTYPE_UDPHDR |
5099 IXGBE_PSRTYPE_IPV4HDR |
5100 IXGBE_PSRTYPE_IPV6HDR;
5103 /* Set RQPL for VF RSS according to max Rx queue */
5104 psrtype |= (dev->data->nb_rx_queues >> 1) <<
5105 IXGBE_PSRTYPE_RQPL_SHIFT;
5106 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5108 ixgbe_set_rx_function(dev);
5114 * [VF] Initializes Transmit Unit.
5116 void __attribute__((cold))
5117 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5119 struct ixgbe_hw *hw;
5120 struct ixgbe_tx_queue *txq;
5125 PMD_INIT_FUNC_TRACE();
5126 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5128 /* Setup the Base and Length of the Tx Descriptor Rings */
5129 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5130 txq = dev->data->tx_queues[i];
5131 bus_addr = txq->tx_ring_phys_addr;
5132 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5133 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5134 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5135 (uint32_t)(bus_addr >> 32));
5136 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5137 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5138 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5139 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5140 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5143 * Disable Tx Head Writeback RO bit, since this hoses
5144 * bookkeeping if things aren't delivered in order.
5146 txctrl = IXGBE_READ_REG(hw,
5147 IXGBE_VFDCA_TXCTRL(i));
5148 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5149 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5155 * [VF] Start Transmit and Receive Units.
5157 void __attribute__((cold))
5158 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5160 struct ixgbe_hw *hw;
5161 struct ixgbe_tx_queue *txq;
5162 struct ixgbe_rx_queue *rxq;
5168 PMD_INIT_FUNC_TRACE();
5169 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5171 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5172 txq = dev->data->tx_queues[i];
5173 /* Setup Transmit Threshold Registers */
5174 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5175 txdctl |= txq->pthresh & 0x7F;
5176 txdctl |= ((txq->hthresh & 0x7F) << 8);
5177 txdctl |= ((txq->wthresh & 0x7F) << 16);
5178 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5181 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5183 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5184 txdctl |= IXGBE_TXDCTL_ENABLE;
5185 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5188 /* Wait until TX Enable ready */
5191 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5192 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5194 PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5196 for (i = 0; i < dev->data->nb_rx_queues; i++) {
5198 rxq = dev->data->rx_queues[i];
5200 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5201 rxdctl |= IXGBE_RXDCTL_ENABLE;
5202 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5204 /* Wait until RX Enable ready */
5208 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5209 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5211 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5213 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5218 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5219 int __attribute__((weak))
5220 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5225 uint16_t __attribute__((weak))
5226 ixgbe_recv_pkts_vec(
5227 void __rte_unused *rx_queue,
5228 struct rte_mbuf __rte_unused **rx_pkts,
5229 uint16_t __rte_unused nb_pkts)
5234 uint16_t __attribute__((weak))
5235 ixgbe_recv_scattered_pkts_vec(
5236 void __rte_unused *rx_queue,
5237 struct rte_mbuf __rte_unused **rx_pkts,
5238 uint16_t __rte_unused nb_pkts)
5243 int __attribute__((weak))
5244 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)