4 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
44 #include <rte_interrupts.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
48 #include <rte_debug.h>
50 #include <rte_memory.h>
51 #include <rte_memcpy.h>
52 #include <rte_memzone.h>
53 #include <rte_launch.h>
55 #include <rte_per_lcore.h>
56 #include <rte_lcore.h>
57 #include <rte_atomic.h>
58 #include <rte_branch_prediction.h>
59 #include <rte_mempool.h>
60 #include <rte_malloc.h>
62 #include <rte_ether.h>
63 #include <rte_ethdev.h>
64 #include <rte_prefetch.h>
69 #include <rte_string_fns.h>
71 #include "e1000_logs.h"
72 #include "base/e1000_api.h"
73 #include "e1000_ethdev.h"
75 #ifdef RTE_LIBRTE_IEEE1588
76 #define IGB_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
78 #define IGB_TX_IEEE1588_TMST 0
80 /* Bit Mask to indicate what bits required for building TX context */
81 #define IGB_TX_OFFLOAD_MASK ( \
92 #define IGB_TX_OFFLOAD_NOTSUP_MASK \
93 (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
96 * Structure associated with each descriptor of the RX ring of a RX queue.
99 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
103 * Structure associated with each descriptor of the TX ring of a TX queue.
105 struct igb_tx_entry {
106 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
107 uint16_t next_id; /**< Index of next descriptor in ring. */
108 uint16_t last_id; /**< Index of last scattered descriptor. */
115 IGB_RXQ_FLAG_LB_BSWAP_VLAN = 0x01,
119 * Structure associated with each RX queue.
121 struct igb_rx_queue {
122 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
123 volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
124 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
125 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
126 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
127 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
128 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
129 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
130 uint16_t nb_rx_desc; /**< number of RX descriptors. */
131 uint16_t rx_tail; /**< current value of RDT register. */
132 uint16_t nb_rx_hold; /**< number of held free RX desc. */
133 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
134 uint16_t queue_id; /**< RX queue index. */
135 uint16_t reg_idx; /**< RX queue register index. */
136 uint16_t port_id; /**< Device port identifier. */
137 uint8_t pthresh; /**< Prefetch threshold register. */
138 uint8_t hthresh; /**< Host threshold register. */
139 uint8_t wthresh; /**< Write-back threshold register. */
140 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
141 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
142 uint32_t flags; /**< RX flags. */
146 * Hardware context number
148 enum igb_advctx_num {
149 IGB_CTX_0 = 0, /**< CTX0 */
150 IGB_CTX_1 = 1, /**< CTX1 */
151 IGB_CTX_NUM = 2, /**< CTX_NUM */
154 /** Offload features */
155 union igb_tx_offload {
158 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
159 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
160 uint64_t vlan_tci:16; /**< VLAN Tag Control Identifier(CPU order). */
161 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
162 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
164 /* uint64_t unused:8; */
169 * Compare mask for igb_tx_offload.data,
170 * should be in sync with igb_tx_offload layout.
172 #define TX_MACIP_LEN_CMP_MASK 0x000000000000FFFFULL /**< L2L3 header mask. */
173 #define TX_VLAN_CMP_MASK 0x00000000FFFF0000ULL /**< Vlan mask. */
174 #define TX_TCP_LEN_CMP_MASK 0x000000FF00000000ULL /**< TCP header mask. */
175 #define TX_TSO_MSS_CMP_MASK 0x00FFFF0000000000ULL /**< TSO segsz mask. */
176 /** Mac + IP + TCP + Mss mask. */
177 #define TX_TSO_CMP_MASK \
178 (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
181 * Strucutre to check if new context need be built
183 struct igb_advctx_info {
184 uint64_t flags; /**< ol_flags related to context build. */
185 /** tx offload: vlan, tso, l2-l3-l4 lengths. */
186 union igb_tx_offload tx_offload;
187 /** compare mask for tx offload. */
188 union igb_tx_offload tx_offload_mask;
192 * Structure associated with each TX queue.
194 struct igb_tx_queue {
195 volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
196 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
197 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
198 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
199 uint32_t txd_type; /**< Device-specific TXD type */
200 uint16_t nb_tx_desc; /**< number of TX descriptors. */
201 uint16_t tx_tail; /**< Current value of TDT register. */
203 /**< Index of first used TX descriptor. */
204 uint16_t queue_id; /**< TX queue index. */
205 uint16_t reg_idx; /**< TX queue register index. */
206 uint16_t port_id; /**< Device port identifier. */
207 uint8_t pthresh; /**< Prefetch threshold register. */
208 uint8_t hthresh; /**< Host threshold register. */
209 uint8_t wthresh; /**< Write-back threshold register. */
211 /**< Current used hardware descriptor. */
213 /**< Start context position for transmit queue. */
214 struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
215 /**< Hardware context history.*/
219 #define RTE_PMD_USE_PREFETCH
222 #ifdef RTE_PMD_USE_PREFETCH
223 #define rte_igb_prefetch(p) rte_prefetch0(p)
225 #define rte_igb_prefetch(p) do {} while(0)
228 #ifdef RTE_PMD_PACKET_PREFETCH
229 #define rte_packet_prefetch(p) rte_prefetch1(p)
231 #define rte_packet_prefetch(p) do {} while(0)
235 * Macro for VMDq feature for 1 GbE NIC.
237 #define E1000_VMOLR_SIZE (8)
238 #define IGB_TSO_MAX_HDRLEN (512)
239 #define IGB_TSO_MAX_MSS (9216)
241 /*********************************************************************
245 **********************************************************************/
248 *There're some limitations in hardware for TCP segmentation offload. We
249 *should check whether the parameters are valid.
251 static inline uint64_t
252 check_tso_para(uint64_t ol_req, union igb_tx_offload ol_para)
254 if (!(ol_req & PKT_TX_TCP_SEG))
256 if ((ol_para.tso_segsz > IGB_TSO_MAX_MSS) || (ol_para.l2_len +
257 ol_para.l3_len + ol_para.l4_len > IGB_TSO_MAX_HDRLEN)) {
258 ol_req &= ~PKT_TX_TCP_SEG;
259 ol_req |= PKT_TX_TCP_CKSUM;
265 * Advanced context descriptor are almost same between igb/ixgbe
266 * This is a separate function, looking for optimization opportunity here
267 * Rework required to go with the pre-defined values.
271 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
272 volatile struct e1000_adv_tx_context_desc *ctx_txd,
273 uint64_t ol_flags, union igb_tx_offload tx_offload)
275 uint32_t type_tucmd_mlhl;
276 uint32_t mss_l4len_idx;
277 uint32_t ctx_idx, ctx_curr;
278 uint32_t vlan_macip_lens;
279 union igb_tx_offload tx_offload_mask;
281 ctx_curr = txq->ctx_curr;
282 ctx_idx = ctx_curr + txq->ctx_start;
284 tx_offload_mask.data = 0;
287 /* Specify which HW CTX to upload. */
288 mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
290 if (ol_flags & PKT_TX_VLAN_PKT)
291 tx_offload_mask.data |= TX_VLAN_CMP_MASK;
293 /* check if TCP segmentation required for this packet */
294 if (ol_flags & PKT_TX_TCP_SEG) {
295 /* implies IP cksum in IPv4 */
296 if (ol_flags & PKT_TX_IP_CKSUM)
297 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4 |
298 E1000_ADVTXD_TUCMD_L4T_TCP |
299 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
301 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV6 |
302 E1000_ADVTXD_TUCMD_L4T_TCP |
303 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
305 tx_offload_mask.data |= TX_TSO_CMP_MASK;
306 mss_l4len_idx |= tx_offload.tso_segsz << E1000_ADVTXD_MSS_SHIFT;
307 mss_l4len_idx |= tx_offload.l4_len << E1000_ADVTXD_L4LEN_SHIFT;
308 } else { /* no TSO, check if hardware checksum is needed */
309 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
310 tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
312 if (ol_flags & PKT_TX_IP_CKSUM)
313 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
315 switch (ol_flags & PKT_TX_L4_MASK) {
316 case PKT_TX_UDP_CKSUM:
317 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
318 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
319 mss_l4len_idx |= sizeof(struct udp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
321 case PKT_TX_TCP_CKSUM:
322 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
323 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
324 mss_l4len_idx |= sizeof(struct tcp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
326 case PKT_TX_SCTP_CKSUM:
327 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
328 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
329 mss_l4len_idx |= sizeof(struct sctp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
332 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
333 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
338 txq->ctx_cache[ctx_curr].flags = ol_flags;
339 txq->ctx_cache[ctx_curr].tx_offload.data =
340 tx_offload_mask.data & tx_offload.data;
341 txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
343 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
344 vlan_macip_lens = (uint32_t)tx_offload.data;
345 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
346 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
347 ctx_txd->seqnum_seed = 0;
351 * Check which hardware context can be used. Use the existing match
352 * or create a new context descriptor.
354 static inline uint32_t
355 what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
356 union igb_tx_offload tx_offload)
358 /* If match with the current context */
359 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
360 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
361 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
362 return txq->ctx_curr;
365 /* If match with the second context */
367 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
368 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
369 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
370 return txq->ctx_curr;
373 /* Mismatch, use the previous context */
377 static inline uint32_t
378 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
380 static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
381 static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
384 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
385 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
386 tmp |= l4_olinfo[(ol_flags & PKT_TX_TCP_SEG) != 0];
390 static inline uint32_t
391 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
394 static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
395 static uint32_t tso_cmd[2] = {0, E1000_ADVTXD_DCMD_TSE};
396 cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
397 cmdtype |= tso_cmd[(ol_flags & PKT_TX_TCP_SEG) != 0];
402 eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
405 struct igb_tx_queue *txq;
406 struct igb_tx_entry *sw_ring;
407 struct igb_tx_entry *txe, *txn;
408 volatile union e1000_adv_tx_desc *txr;
409 volatile union e1000_adv_tx_desc *txd;
410 struct rte_mbuf *tx_pkt;
411 struct rte_mbuf *m_seg;
412 uint64_t buf_dma_addr;
413 uint32_t olinfo_status;
414 uint32_t cmd_type_len;
423 uint32_t new_ctx = 0;
425 union igb_tx_offload tx_offload = {0};
428 sw_ring = txq->sw_ring;
430 tx_id = txq->tx_tail;
431 txe = &sw_ring[tx_id];
433 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
435 pkt_len = tx_pkt->pkt_len;
437 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
440 * The number of descriptors that must be allocated for a
441 * packet is the number of segments of that packet, plus 1
442 * Context Descriptor for the VLAN Tag Identifier, if any.
443 * Determine the last TX descriptor to allocate in the TX ring
444 * for the packet, starting from the current position (tx_id)
447 tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
449 ol_flags = tx_pkt->ol_flags;
450 tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;
452 /* If a Context Descriptor need be built . */
454 tx_offload.l2_len = tx_pkt->l2_len;
455 tx_offload.l3_len = tx_pkt->l3_len;
456 tx_offload.l4_len = tx_pkt->l4_len;
457 tx_offload.vlan_tci = tx_pkt->vlan_tci;
458 tx_offload.tso_segsz = tx_pkt->tso_segsz;
459 tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
461 ctx = what_advctx_update(txq, tx_ol_req, tx_offload);
462 /* Only allocate context descriptor if required*/
463 new_ctx = (ctx == IGB_CTX_NUM);
464 ctx = txq->ctx_curr + txq->ctx_start;
465 tx_last = (uint16_t) (tx_last + new_ctx);
467 if (tx_last >= txq->nb_tx_desc)
468 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
470 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
471 " tx_first=%u tx_last=%u",
472 (unsigned) txq->port_id,
473 (unsigned) txq->queue_id,
479 * Check if there are enough free descriptors in the TX ring
480 * to transmit the next packet.
481 * This operation is based on the two following rules:
483 * 1- Only check that the last needed TX descriptor can be
484 * allocated (by construction, if that descriptor is free,
485 * all intermediate ones are also free).
487 * For this purpose, the index of the last TX descriptor
488 * used for a packet (the "last descriptor" of a packet)
489 * is recorded in the TX entries (the last one included)
490 * that are associated with all TX descriptors allocated
493 * 2- Avoid to allocate the last free TX descriptor of the
494 * ring, in order to never set the TDT register with the
495 * same value stored in parallel by the NIC in the TDH
496 * register, which makes the TX engine of the NIC enter
497 * in a deadlock situation.
499 * By extension, avoid to allocate a free descriptor that
500 * belongs to the last set of free descriptors allocated
501 * to the same packet previously transmitted.
505 * The "last descriptor" of the previously sent packet, if any,
506 * which used the last descriptor to allocate.
508 tx_end = sw_ring[tx_last].last_id;
511 * The next descriptor following that "last descriptor" in the
514 tx_end = sw_ring[tx_end].next_id;
517 * The "last descriptor" associated with that next descriptor.
519 tx_end = sw_ring[tx_end].last_id;
522 * Check that this descriptor is free.
524 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
531 * Set common flags of all TX Data Descriptors.
533 * The following bits must be set in all Data Descriptors:
534 * - E1000_ADVTXD_DTYP_DATA
535 * - E1000_ADVTXD_DCMD_DEXT
537 * The following bits must be set in the first Data Descriptor
538 * and are ignored in the other ones:
539 * - E1000_ADVTXD_DCMD_IFCS
540 * - E1000_ADVTXD_MAC_1588
541 * - E1000_ADVTXD_DCMD_VLE
543 * The following bits must only be set in the last Data
545 * - E1000_TXD_CMD_EOP
547 * The following bits can be set in any Data Descriptor, but
548 * are only set in the last Data Descriptor:
551 cmd_type_len = txq->txd_type |
552 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
553 if (tx_ol_req & PKT_TX_TCP_SEG)
554 pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len);
555 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
556 #if defined(RTE_LIBRTE_IEEE1588)
557 if (ol_flags & PKT_TX_IEEE1588_TMST)
558 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
561 /* Setup TX Advanced context descriptor if required */
563 volatile struct e1000_adv_tx_context_desc *
566 ctx_txd = (volatile struct
567 e1000_adv_tx_context_desc *)
570 txn = &sw_ring[txe->next_id];
571 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
573 if (txe->mbuf != NULL) {
574 rte_pktmbuf_free_seg(txe->mbuf);
578 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
580 txe->last_id = tx_last;
581 tx_id = txe->next_id;
585 /* Setup the TX Advanced Data Descriptor */
586 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
587 olinfo_status |= tx_desc_cksum_flags_to_olinfo(tx_ol_req);
588 olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
593 txn = &sw_ring[txe->next_id];
596 if (txe->mbuf != NULL)
597 rte_pktmbuf_free_seg(txe->mbuf);
601 * Set up transmit descriptor.
603 slen = (uint16_t) m_seg->data_len;
604 buf_dma_addr = rte_mbuf_data_iova(m_seg);
605 txd->read.buffer_addr =
606 rte_cpu_to_le_64(buf_dma_addr);
607 txd->read.cmd_type_len =
608 rte_cpu_to_le_32(cmd_type_len | slen);
609 txd->read.olinfo_status =
610 rte_cpu_to_le_32(olinfo_status);
611 txe->last_id = tx_last;
612 tx_id = txe->next_id;
615 } while (m_seg != NULL);
618 * The last packet data descriptor needs End Of Packet (EOP)
619 * and Report Status (RS).
621 txd->read.cmd_type_len |=
622 rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
628 * Set the Transmit Descriptor Tail (TDT).
630 E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
631 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
632 (unsigned) txq->port_id, (unsigned) txq->queue_id,
633 (unsigned) tx_id, (unsigned) nb_tx);
634 txq->tx_tail = tx_id;
639 /*********************************************************************
643 **********************************************************************/
645 eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
651 for (i = 0; i < nb_pkts; i++) {
654 /* Check some limitations for TSO in hardware */
655 if (m->ol_flags & PKT_TX_TCP_SEG)
656 if ((m->tso_segsz > IGB_TSO_MAX_MSS) ||
657 (m->l2_len + m->l3_len + m->l4_len >
658 IGB_TSO_MAX_HDRLEN)) {
663 if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) {
664 rte_errno = -ENOTSUP;
668 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
669 ret = rte_validate_tx_offload(m);
675 ret = rte_net_intel_cksum_prepare(m);
685 /*********************************************************************
689 **********************************************************************/
690 #define IGB_PACKET_TYPE_IPV4 0X01
691 #define IGB_PACKET_TYPE_IPV4_TCP 0X11
692 #define IGB_PACKET_TYPE_IPV4_UDP 0X21
693 #define IGB_PACKET_TYPE_IPV4_SCTP 0X41
694 #define IGB_PACKET_TYPE_IPV4_EXT 0X03
695 #define IGB_PACKET_TYPE_IPV4_EXT_SCTP 0X43
696 #define IGB_PACKET_TYPE_IPV6 0X04
697 #define IGB_PACKET_TYPE_IPV6_TCP 0X14
698 #define IGB_PACKET_TYPE_IPV6_UDP 0X24
699 #define IGB_PACKET_TYPE_IPV6_EXT 0X0C
700 #define IGB_PACKET_TYPE_IPV6_EXT_TCP 0X1C
701 #define IGB_PACKET_TYPE_IPV6_EXT_UDP 0X2C
702 #define IGB_PACKET_TYPE_IPV4_IPV6 0X05
703 #define IGB_PACKET_TYPE_IPV4_IPV6_TCP 0X15
704 #define IGB_PACKET_TYPE_IPV4_IPV6_UDP 0X25
705 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
706 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
707 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
708 #define IGB_PACKET_TYPE_MAX 0X80
709 #define IGB_PACKET_TYPE_MASK 0X7F
710 #define IGB_PACKET_TYPE_SHIFT 0X04
711 static inline uint32_t
712 igb_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
714 static const uint32_t
715 ptype_table[IGB_PACKET_TYPE_MAX] __rte_cache_aligned = {
716 [IGB_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
718 [IGB_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
719 RTE_PTYPE_L3_IPV4_EXT,
720 [IGB_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
722 [IGB_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
723 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
724 RTE_PTYPE_INNER_L3_IPV6,
725 [IGB_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
726 RTE_PTYPE_L3_IPV6_EXT,
727 [IGB_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
728 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
729 RTE_PTYPE_INNER_L3_IPV6_EXT,
730 [IGB_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
731 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
732 [IGB_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
733 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
734 [IGB_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
735 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
736 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
737 [IGB_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
738 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
739 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
740 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
741 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
742 [IGB_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
743 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
744 [IGB_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
745 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
746 [IGB_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
747 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
748 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
749 [IGB_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
750 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
751 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
752 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
753 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
754 [IGB_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
755 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
756 [IGB_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
757 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
759 if (unlikely(pkt_info & E1000_RXDADV_PKTTYPE_ETQF))
760 return RTE_PTYPE_UNKNOWN;
762 pkt_info = (pkt_info >> IGB_PACKET_TYPE_SHIFT) & IGB_PACKET_TYPE_MASK;
764 return ptype_table[pkt_info];
767 static inline uint64_t
768 rx_desc_hlen_type_rss_to_pkt_flags(struct igb_rx_queue *rxq, uint32_t hl_tp_rs)
770 uint64_t pkt_flags = ((hl_tp_rs & 0x0F) == 0) ? 0 : PKT_RX_RSS_HASH;
772 #if defined(RTE_LIBRTE_IEEE1588)
773 static uint32_t ip_pkt_etqf_map[8] = {
774 0, 0, 0, PKT_RX_IEEE1588_PTP,
778 struct rte_eth_dev dev = rte_eth_devices[rxq->port_id];
779 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev.data->dev_private);
781 /* EtherType is in bits 8:10 in Packet Type, and not in the default 0:2 */
782 if (hw->mac.type == e1000_i210)
783 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 12) & 0x07];
785 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07];
793 static inline uint64_t
794 rx_desc_status_to_pkt_flags(uint32_t rx_status)
798 /* Check if VLAN present */
799 pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
800 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED : 0);
802 #if defined(RTE_LIBRTE_IEEE1588)
803 if (rx_status & E1000_RXD_STAT_TMST)
804 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
809 static inline uint64_t
810 rx_desc_error_to_pkt_flags(uint32_t rx_status)
813 * Bit 30: IPE, IPv4 checksum error
814 * Bit 29: L4I, L4I integrity error
817 static uint64_t error_to_pkt_flags_map[4] = {
818 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
819 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
820 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
821 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
823 return error_to_pkt_flags_map[(rx_status >>
824 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
828 eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
831 struct igb_rx_queue *rxq;
832 volatile union e1000_adv_rx_desc *rx_ring;
833 volatile union e1000_adv_rx_desc *rxdp;
834 struct igb_rx_entry *sw_ring;
835 struct igb_rx_entry *rxe;
836 struct rte_mbuf *rxm;
837 struct rte_mbuf *nmb;
838 union e1000_adv_rx_desc rxd;
841 uint32_t hlen_type_rss;
851 rx_id = rxq->rx_tail;
852 rx_ring = rxq->rx_ring;
853 sw_ring = rxq->sw_ring;
854 while (nb_rx < nb_pkts) {
856 * The order of operations here is important as the DD status
857 * bit must not be read after any other descriptor fields.
858 * rx_ring and rxdp are pointing to volatile data so the order
859 * of accesses cannot be reordered by the compiler. If they were
860 * not volatile, they could be reordered which could lead to
861 * using invalid descriptor fields when read from rxd.
863 rxdp = &rx_ring[rx_id];
864 staterr = rxdp->wb.upper.status_error;
865 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
872 * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
873 * likely to be invalid and to be dropped by the various
874 * validation checks performed by the network stack.
876 * Allocate a new mbuf to replenish the RX ring descriptor.
877 * If the allocation fails:
878 * - arrange for that RX descriptor to be the first one
879 * being parsed the next time the receive function is
880 * invoked [on the same queue].
882 * - Stop parsing the RX ring and return immediately.
884 * This policy do not drop the packet received in the RX
885 * descriptor for which the allocation of a new mbuf failed.
886 * Thus, it allows that packet to be later retrieved if
887 * mbuf have been freed in the mean time.
888 * As a side effect, holding RX descriptors instead of
889 * systematically giving them back to the NIC may lead to
890 * RX ring exhaustion situations.
891 * However, the NIC can gracefully prevent such situations
892 * to happen by sending specific "back-pressure" flow control
893 * frames to its peer(s).
895 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
896 "staterr=0x%x pkt_len=%u",
897 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
898 (unsigned) rx_id, (unsigned) staterr,
899 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
901 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
903 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
904 "queue_id=%u", (unsigned) rxq->port_id,
905 (unsigned) rxq->queue_id);
906 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
911 rxe = &sw_ring[rx_id];
913 if (rx_id == rxq->nb_rx_desc)
916 /* Prefetch next mbuf while processing current one. */
917 rte_igb_prefetch(sw_ring[rx_id].mbuf);
920 * When next RX descriptor is on a cache-line boundary,
921 * prefetch the next 4 RX descriptors and the next 8 pointers
924 if ((rx_id & 0x3) == 0) {
925 rte_igb_prefetch(&rx_ring[rx_id]);
926 rte_igb_prefetch(&sw_ring[rx_id]);
932 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
933 rxdp->read.hdr_addr = 0;
934 rxdp->read.pkt_addr = dma_addr;
937 * Initialize the returned mbuf.
938 * 1) setup generic mbuf fields:
939 * - number of segments,
942 * - RX port identifier.
943 * 2) integrate hardware offload data, if any:
945 * - IP checksum flag,
946 * - VLAN TCI, if any,
949 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
951 rxm->data_off = RTE_PKTMBUF_HEADROOM;
952 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
955 rxm->pkt_len = pkt_len;
956 rxm->data_len = pkt_len;
957 rxm->port = rxq->port_id;
959 rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
960 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
963 * The vlan_tci field is only valid when PKT_RX_VLAN is
964 * set in the pkt_flags field and must be in CPU byte order.
966 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
967 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
968 rxm->vlan_tci = rte_be_to_cpu_16(rxd.wb.upper.vlan);
970 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
972 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
973 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
974 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
975 rxm->ol_flags = pkt_flags;
976 rxm->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.lower.
977 lo_dword.hs_rss.pkt_info);
980 * Store the mbuf address into the next entry of the array
981 * of returned packets.
983 rx_pkts[nb_rx++] = rxm;
985 rxq->rx_tail = rx_id;
988 * If the number of free RX descriptors is greater than the RX free
989 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
991 * Update the RDT with the value of the last processed RX descriptor
992 * minus 1, to guarantee that the RDT register is never equal to the
993 * RDH register, which creates a "full" ring situtation from the
994 * hardware point of view...
996 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
997 if (nb_hold > rxq->rx_free_thresh) {
998 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
999 "nb_hold=%u nb_rx=%u",
1000 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1001 (unsigned) rx_id, (unsigned) nb_hold,
1003 rx_id = (uint16_t) ((rx_id == 0) ?
1004 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1005 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1008 rxq->nb_rx_hold = nb_hold;
1013 eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1016 struct igb_rx_queue *rxq;
1017 volatile union e1000_adv_rx_desc *rx_ring;
1018 volatile union e1000_adv_rx_desc *rxdp;
1019 struct igb_rx_entry *sw_ring;
1020 struct igb_rx_entry *rxe;
1021 struct rte_mbuf *first_seg;
1022 struct rte_mbuf *last_seg;
1023 struct rte_mbuf *rxm;
1024 struct rte_mbuf *nmb;
1025 union e1000_adv_rx_desc rxd;
1026 uint64_t dma; /* Physical address of mbuf data buffer */
1028 uint32_t hlen_type_rss;
1038 rx_id = rxq->rx_tail;
1039 rx_ring = rxq->rx_ring;
1040 sw_ring = rxq->sw_ring;
1043 * Retrieve RX context of current packet, if any.
1045 first_seg = rxq->pkt_first_seg;
1046 last_seg = rxq->pkt_last_seg;
1048 while (nb_rx < nb_pkts) {
1051 * The order of operations here is important as the DD status
1052 * bit must not be read after any other descriptor fields.
1053 * rx_ring and rxdp are pointing to volatile data so the order
1054 * of accesses cannot be reordered by the compiler. If they were
1055 * not volatile, they could be reordered which could lead to
1056 * using invalid descriptor fields when read from rxd.
1058 rxdp = &rx_ring[rx_id];
1059 staterr = rxdp->wb.upper.status_error;
1060 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
1067 * Allocate a new mbuf to replenish the RX ring descriptor.
1068 * If the allocation fails:
1069 * - arrange for that RX descriptor to be the first one
1070 * being parsed the next time the receive function is
1071 * invoked [on the same queue].
1073 * - Stop parsing the RX ring and return immediately.
1075 * This policy does not drop the packet received in the RX
1076 * descriptor for which the allocation of a new mbuf failed.
1077 * Thus, it allows that packet to be later retrieved if
1078 * mbuf have been freed in the mean time.
1079 * As a side effect, holding RX descriptors instead of
1080 * systematically giving them back to the NIC may lead to
1081 * RX ring exhaustion situations.
1082 * However, the NIC can gracefully prevent such situations
1083 * to happen by sending specific "back-pressure" flow control
1084 * frames to its peer(s).
1086 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1087 "staterr=0x%x data_len=%u",
1088 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1089 (unsigned) rx_id, (unsigned) staterr,
1090 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1092 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1094 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1095 "queue_id=%u", (unsigned) rxq->port_id,
1096 (unsigned) rxq->queue_id);
1097 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1102 rxe = &sw_ring[rx_id];
1104 if (rx_id == rxq->nb_rx_desc)
1107 /* Prefetch next mbuf while processing current one. */
1108 rte_igb_prefetch(sw_ring[rx_id].mbuf);
1111 * When next RX descriptor is on a cache-line boundary,
1112 * prefetch the next 4 RX descriptors and the next 8 pointers
1115 if ((rx_id & 0x3) == 0) {
1116 rte_igb_prefetch(&rx_ring[rx_id]);
1117 rte_igb_prefetch(&sw_ring[rx_id]);
1121 * Update RX descriptor with the physical address of the new
1122 * data buffer of the new allocated mbuf.
1126 dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1127 rxdp->read.pkt_addr = dma;
1128 rxdp->read.hdr_addr = 0;
1131 * Set data length & data buffer address of mbuf.
1133 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1134 rxm->data_len = data_len;
1135 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1138 * If this is the first buffer of the received packet,
1139 * set the pointer to the first mbuf of the packet and
1140 * initialize its context.
1141 * Otherwise, update the total length and the number of segments
1142 * of the current scattered packet, and update the pointer to
1143 * the last mbuf of the current packet.
1145 if (first_seg == NULL) {
1147 first_seg->pkt_len = data_len;
1148 first_seg->nb_segs = 1;
1150 first_seg->pkt_len += data_len;
1151 first_seg->nb_segs++;
1152 last_seg->next = rxm;
1156 * If this is not the last buffer of the received packet,
1157 * update the pointer to the last mbuf of the current scattered
1158 * packet and continue to parse the RX ring.
1160 if (! (staterr & E1000_RXD_STAT_EOP)) {
1166 * This is the last buffer of the received packet.
1167 * If the CRC is not stripped by the hardware:
1168 * - Subtract the CRC length from the total packet length.
1169 * - If the last buffer only contains the whole CRC or a part
1170 * of it, free the mbuf associated to the last buffer.
1171 * If part of the CRC is also contained in the previous
1172 * mbuf, subtract the length of that CRC part from the
1173 * data length of the previous mbuf.
1176 if (unlikely(rxq->crc_len > 0)) {
1177 first_seg->pkt_len -= ETHER_CRC_LEN;
1178 if (data_len <= ETHER_CRC_LEN) {
1179 rte_pktmbuf_free_seg(rxm);
1180 first_seg->nb_segs--;
1181 last_seg->data_len = (uint16_t)
1182 (last_seg->data_len -
1183 (ETHER_CRC_LEN - data_len));
1184 last_seg->next = NULL;
1187 (uint16_t) (data_len - ETHER_CRC_LEN);
1191 * Initialize the first mbuf of the returned packet:
1192 * - RX port identifier,
1193 * - hardware offload data, if any:
1194 * - RSS flag & hash,
1195 * - IP checksum flag,
1196 * - VLAN TCI, if any,
1199 first_seg->port = rxq->port_id;
1200 first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
1203 * The vlan_tci field is only valid when PKT_RX_VLAN is
1204 * set in the pkt_flags field and must be in CPU byte order.
1206 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
1207 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
1208 first_seg->vlan_tci =
1209 rte_be_to_cpu_16(rxd.wb.upper.vlan);
1211 first_seg->vlan_tci =
1212 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1214 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1215 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
1216 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1217 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1218 first_seg->ol_flags = pkt_flags;
1219 first_seg->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.
1220 lower.lo_dword.hs_rss.pkt_info);
1222 /* Prefetch data of first segment, if configured to do so. */
1223 rte_packet_prefetch((char *)first_seg->buf_addr +
1224 first_seg->data_off);
1227 * Store the mbuf address into the next entry of the array
1228 * of returned packets.
1230 rx_pkts[nb_rx++] = first_seg;
1233 * Setup receipt context for a new packet.
1239 * Record index of the next RX descriptor to probe.
1241 rxq->rx_tail = rx_id;
1244 * Save receive context.
1246 rxq->pkt_first_seg = first_seg;
1247 rxq->pkt_last_seg = last_seg;
1250 * If the number of free RX descriptors is greater than the RX free
1251 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1253 * Update the RDT with the value of the last processed RX descriptor
1254 * minus 1, to guarantee that the RDT register is never equal to the
1255 * RDH register, which creates a "full" ring situtation from the
1256 * hardware point of view...
1258 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1259 if (nb_hold > rxq->rx_free_thresh) {
1260 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1261 "nb_hold=%u nb_rx=%u",
1262 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1263 (unsigned) rx_id, (unsigned) nb_hold,
1265 rx_id = (uint16_t) ((rx_id == 0) ?
1266 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1267 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1270 rxq->nb_rx_hold = nb_hold;
1275 * Maximum number of Ring Descriptors.
1277 * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1278 * desscriptors should meet the following condition:
1279 * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1283 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1287 if (txq->sw_ring != NULL) {
1288 for (i = 0; i < txq->nb_tx_desc; i++) {
1289 if (txq->sw_ring[i].mbuf != NULL) {
1290 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1291 txq->sw_ring[i].mbuf = NULL;
1298 igb_tx_queue_release(struct igb_tx_queue *txq)
1301 igb_tx_queue_release_mbufs(txq);
1302 rte_free(txq->sw_ring);
1308 eth_igb_tx_queue_release(void *txq)
1310 igb_tx_queue_release(txq);
1314 igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt)
1316 struct igb_tx_entry *sw_ring;
1317 volatile union e1000_adv_tx_desc *txr;
1318 uint16_t tx_first; /* First segment analyzed. */
1319 uint16_t tx_id; /* Current segment being processed. */
1320 uint16_t tx_last; /* Last segment in the current packet. */
1321 uint16_t tx_next; /* First segment of the next packet. */
1326 sw_ring = txq->sw_ring;
1330 * tx_tail is the last sent packet on the sw_ring. Goto the end
1331 * of that packet (the last segment in the packet chain) and
1332 * then the next segment will be the start of the oldest segment
1333 * in the sw_ring. This is the first packet that will be
1334 * attempted to be freed.
1337 /* Get last segment in most recently added packet. */
1338 tx_first = sw_ring[txq->tx_tail].last_id;
1340 /* Get the next segment, which is the oldest segment in ring. */
1341 tx_first = sw_ring[tx_first].next_id;
1343 /* Set the current index to the first. */
1347 * Loop through each packet. For each packet, verify that an
1348 * mbuf exists and that the last segment is free. If so, free
1352 tx_last = sw_ring[tx_id].last_id;
1354 if (sw_ring[tx_last].mbuf) {
1355 if (txr[tx_last].wb.status &
1356 E1000_TXD_STAT_DD) {
1358 * Increment the number of packets
1363 /* Get the start of the next packet. */
1364 tx_next = sw_ring[tx_last].next_id;
1367 * Loop through all segments in a
1371 rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
1372 sw_ring[tx_id].mbuf = NULL;
1373 sw_ring[tx_id].last_id = tx_id;
1375 /* Move to next segemnt. */
1376 tx_id = sw_ring[tx_id].next_id;
1378 } while (tx_id != tx_next);
1380 if (unlikely(count == (int)free_cnt))
1384 * mbuf still in use, nothing left to
1390 * There are multiple reasons to be here:
1391 * 1) All the packets on the ring have been
1392 * freed - tx_id is equal to tx_first
1393 * and some packets have been freed.
1395 * 2) Interfaces has not sent a rings worth of
1396 * packets yet, so the segment after tail is
1397 * still empty. Or a previous call to this
1398 * function freed some of the segments but
1399 * not all so there is a hole in the list.
1400 * Hopefully this is a rare case.
1401 * - Walk the list and find the next mbuf. If
1402 * there isn't one, then done.
1404 if (likely((tx_id == tx_first) && (count != 0)))
1408 * Walk the list and find the next mbuf, if any.
1411 /* Move to next segemnt. */
1412 tx_id = sw_ring[tx_id].next_id;
1414 if (sw_ring[tx_id].mbuf)
1417 } while (tx_id != tx_first);
1420 * Determine why previous loop bailed. If there
1421 * is not an mbuf, done.
1423 if (sw_ring[tx_id].mbuf == NULL)
1434 eth_igb_tx_done_cleanup(void *txq, uint32_t free_cnt)
1436 return igb_tx_done_cleanup(txq, free_cnt);
1440 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1445 memset((void*)&txq->ctx_cache, 0,
1446 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1450 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1452 static const union e1000_adv_tx_desc zeroed_desc = {{0}};
1453 struct igb_tx_entry *txe = txq->sw_ring;
1455 struct e1000_hw *hw;
1457 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1458 /* Zero out HW ring memory */
1459 for (i = 0; i < txq->nb_tx_desc; i++) {
1460 txq->tx_ring[i] = zeroed_desc;
1463 /* Initialize ring entries */
1464 prev = (uint16_t)(txq->nb_tx_desc - 1);
1465 for (i = 0; i < txq->nb_tx_desc; i++) {
1466 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1468 txd->wb.status = E1000_TXD_STAT_DD;
1471 txe[prev].next_id = i;
1475 txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1476 /* 82575 specific, each tx queue will use 2 hw contexts */
1477 if (hw->mac.type == e1000_82575)
1478 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1480 igb_reset_tx_queue_stat(txq);
1484 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1487 unsigned int socket_id,
1488 const struct rte_eth_txconf *tx_conf)
1490 const struct rte_memzone *tz;
1491 struct igb_tx_queue *txq;
1492 struct e1000_hw *hw;
1495 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1498 * Validate number of transmit descriptors.
1499 * It must not exceed hardware maximum, and must be multiple
1502 if (nb_desc % IGB_TXD_ALIGN != 0 ||
1503 (nb_desc > E1000_MAX_RING_DESC) ||
1504 (nb_desc < E1000_MIN_RING_DESC)) {
1509 * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1512 if (tx_conf->tx_free_thresh != 0)
1513 PMD_INIT_LOG(INFO, "The tx_free_thresh parameter is not "
1514 "used for the 1G driver.");
1515 if (tx_conf->tx_rs_thresh != 0)
1516 PMD_INIT_LOG(INFO, "The tx_rs_thresh parameter is not "
1517 "used for the 1G driver.");
1518 if (tx_conf->tx_thresh.wthresh == 0 && hw->mac.type != e1000_82576)
1519 PMD_INIT_LOG(INFO, "To improve 1G driver performance, "
1520 "consider setting the TX WTHRESH value to 4, 8, "
1523 /* Free memory prior to re-allocation if needed */
1524 if (dev->data->tx_queues[queue_idx] != NULL) {
1525 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1526 dev->data->tx_queues[queue_idx] = NULL;
1529 /* First allocate the tx queue data structure */
1530 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1531 RTE_CACHE_LINE_SIZE);
1536 * Allocate TX ring hardware descriptors. A memzone large enough to
1537 * handle the maximum ring size is allocated in order to allow for
1538 * resizing in later calls to the queue setup function.
1540 size = sizeof(union e1000_adv_tx_desc) * E1000_MAX_RING_DESC;
1541 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
1542 E1000_ALIGN, socket_id);
1544 igb_tx_queue_release(txq);
1548 txq->nb_tx_desc = nb_desc;
1549 txq->pthresh = tx_conf->tx_thresh.pthresh;
1550 txq->hthresh = tx_conf->tx_thresh.hthresh;
1551 txq->wthresh = tx_conf->tx_thresh.wthresh;
1552 if (txq->wthresh > 0 && hw->mac.type == e1000_82576)
1554 txq->queue_id = queue_idx;
1555 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1556 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1557 txq->port_id = dev->data->port_id;
1559 txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1560 txq->tx_ring_phys_addr = tz->iova;
1562 txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1563 /* Allocate software ring */
1564 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1565 sizeof(struct igb_tx_entry) * nb_desc,
1566 RTE_CACHE_LINE_SIZE);
1567 if (txq->sw_ring == NULL) {
1568 igb_tx_queue_release(txq);
1571 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1572 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1574 igb_reset_tx_queue(txq, dev);
1575 dev->tx_pkt_burst = eth_igb_xmit_pkts;
1576 dev->tx_pkt_prepare = ð_igb_prep_pkts;
1577 dev->data->tx_queues[queue_idx] = txq;
1583 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1587 if (rxq->sw_ring != NULL) {
1588 for (i = 0; i < rxq->nb_rx_desc; i++) {
1589 if (rxq->sw_ring[i].mbuf != NULL) {
1590 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1591 rxq->sw_ring[i].mbuf = NULL;
1598 igb_rx_queue_release(struct igb_rx_queue *rxq)
1601 igb_rx_queue_release_mbufs(rxq);
1602 rte_free(rxq->sw_ring);
1608 eth_igb_rx_queue_release(void *rxq)
1610 igb_rx_queue_release(rxq);
1614 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1616 static const union e1000_adv_rx_desc zeroed_desc = {{0}};
1619 /* Zero out HW ring memory */
1620 for (i = 0; i < rxq->nb_rx_desc; i++) {
1621 rxq->rx_ring[i] = zeroed_desc;
1625 rxq->pkt_first_seg = NULL;
1626 rxq->pkt_last_seg = NULL;
1630 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1633 unsigned int socket_id,
1634 const struct rte_eth_rxconf *rx_conf,
1635 struct rte_mempool *mp)
1637 const struct rte_memzone *rz;
1638 struct igb_rx_queue *rxq;
1639 struct e1000_hw *hw;
1642 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1645 * Validate number of receive descriptors.
1646 * It must not exceed hardware maximum, and must be multiple
1649 if (nb_desc % IGB_RXD_ALIGN != 0 ||
1650 (nb_desc > E1000_MAX_RING_DESC) ||
1651 (nb_desc < E1000_MIN_RING_DESC)) {
1655 /* Free memory prior to re-allocation if needed */
1656 if (dev->data->rx_queues[queue_idx] != NULL) {
1657 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1658 dev->data->rx_queues[queue_idx] = NULL;
1661 /* First allocate the RX queue data structure. */
1662 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1663 RTE_CACHE_LINE_SIZE);
1667 rxq->nb_rx_desc = nb_desc;
1668 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1669 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1670 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1671 if (rxq->wthresh > 0 &&
1672 (hw->mac.type == e1000_82576 || hw->mac.type == e1000_vfadapt_i350))
1674 rxq->drop_en = rx_conf->rx_drop_en;
1675 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1676 rxq->queue_id = queue_idx;
1677 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1678 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1679 rxq->port_id = dev->data->port_id;
1680 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
1684 * Allocate RX ring hardware descriptors. A memzone large enough to
1685 * handle the maximum ring size is allocated in order to allow for
1686 * resizing in later calls to the queue setup function.
1688 size = sizeof(union e1000_adv_rx_desc) * E1000_MAX_RING_DESC;
1689 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1690 E1000_ALIGN, socket_id);
1692 igb_rx_queue_release(rxq);
1695 rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1696 rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1697 rxq->rx_ring_phys_addr = rz->iova;
1698 rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1700 /* Allocate software ring. */
1701 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1702 sizeof(struct igb_rx_entry) * nb_desc,
1703 RTE_CACHE_LINE_SIZE);
1704 if (rxq->sw_ring == NULL) {
1705 igb_rx_queue_release(rxq);
1708 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1709 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1711 dev->data->rx_queues[queue_idx] = rxq;
1712 igb_reset_rx_queue(rxq);
1718 eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1720 #define IGB_RXQ_SCAN_INTERVAL 4
1721 volatile union e1000_adv_rx_desc *rxdp;
1722 struct igb_rx_queue *rxq;
1725 rxq = dev->data->rx_queues[rx_queue_id];
1726 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1728 while ((desc < rxq->nb_rx_desc) &&
1729 (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1730 desc += IGB_RXQ_SCAN_INTERVAL;
1731 rxdp += IGB_RXQ_SCAN_INTERVAL;
1732 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1733 rxdp = &(rxq->rx_ring[rxq->rx_tail +
1734 desc - rxq->nb_rx_desc]);
1741 eth_igb_rx_descriptor_done(void *rx_queue, uint16_t offset)
1743 volatile union e1000_adv_rx_desc *rxdp;
1744 struct igb_rx_queue *rxq = rx_queue;
1747 if (unlikely(offset >= rxq->nb_rx_desc))
1749 desc = rxq->rx_tail + offset;
1750 if (desc >= rxq->nb_rx_desc)
1751 desc -= rxq->nb_rx_desc;
1753 rxdp = &rxq->rx_ring[desc];
1754 return !!(rxdp->wb.upper.status_error & E1000_RXD_STAT_DD);
1758 eth_igb_rx_descriptor_status(void *rx_queue, uint16_t offset)
1760 struct igb_rx_queue *rxq = rx_queue;
1761 volatile uint32_t *status;
1764 if (unlikely(offset >= rxq->nb_rx_desc))
1767 if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
1768 return RTE_ETH_RX_DESC_UNAVAIL;
1770 desc = rxq->rx_tail + offset;
1771 if (desc >= rxq->nb_rx_desc)
1772 desc -= rxq->nb_rx_desc;
1774 status = &rxq->rx_ring[desc].wb.upper.status_error;
1775 if (*status & rte_cpu_to_le_32(E1000_RXD_STAT_DD))
1776 return RTE_ETH_RX_DESC_DONE;
1778 return RTE_ETH_RX_DESC_AVAIL;
1782 eth_igb_tx_descriptor_status(void *tx_queue, uint16_t offset)
1784 struct igb_tx_queue *txq = tx_queue;
1785 volatile uint32_t *status;
1788 if (unlikely(offset >= txq->nb_tx_desc))
1791 desc = txq->tx_tail + offset;
1792 if (desc >= txq->nb_tx_desc)
1793 desc -= txq->nb_tx_desc;
1795 status = &txq->tx_ring[desc].wb.status;
1796 if (*status & rte_cpu_to_le_32(E1000_TXD_STAT_DD))
1797 return RTE_ETH_TX_DESC_DONE;
1799 return RTE_ETH_TX_DESC_FULL;
1803 igb_dev_clear_queues(struct rte_eth_dev *dev)
1806 struct igb_tx_queue *txq;
1807 struct igb_rx_queue *rxq;
1809 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1810 txq = dev->data->tx_queues[i];
1812 igb_tx_queue_release_mbufs(txq);
1813 igb_reset_tx_queue(txq, dev);
1817 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1818 rxq = dev->data->rx_queues[i];
1820 igb_rx_queue_release_mbufs(rxq);
1821 igb_reset_rx_queue(rxq);
1827 igb_dev_free_queues(struct rte_eth_dev *dev)
1831 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1832 eth_igb_rx_queue_release(dev->data->rx_queues[i]);
1833 dev->data->rx_queues[i] = NULL;
1835 dev->data->nb_rx_queues = 0;
1837 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1838 eth_igb_tx_queue_release(dev->data->tx_queues[i]);
1839 dev->data->tx_queues[i] = NULL;
1841 dev->data->nb_tx_queues = 0;
1845 * Receive Side Scaling (RSS).
1846 * See section 7.1.1.7 in the following document:
1847 * "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1850 * The source and destination IP addresses of the IP header and the source and
1851 * destination ports of TCP/UDP headers, if any, of received packets are hashed
1852 * against a configurable random key to compute a 32-bit RSS hash result.
1853 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1854 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1855 * RSS output index which is used as the RX queue index where to store the
1857 * The following output is supplied in the RX write-back descriptor:
1858 * - 32-bit result of the Microsoft RSS hash function,
1859 * - 4-bit RSS type field.
1863 * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1864 * Used as the default key.
1866 static uint8_t rss_intel_key[40] = {
1867 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1868 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1869 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1870 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1871 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1875 igb_rss_disable(struct rte_eth_dev *dev)
1877 struct e1000_hw *hw;
1880 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1881 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1882 mrqc &= ~E1000_MRQC_ENABLE_MASK;
1883 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1887 igb_hw_rss_hash_set(struct e1000_hw *hw, struct rte_eth_rss_conf *rss_conf)
1895 hash_key = rss_conf->rss_key;
1896 if (hash_key != NULL) {
1897 /* Fill in RSS hash key */
1898 for (i = 0; i < 10; i++) {
1899 rss_key = hash_key[(i * 4)];
1900 rss_key |= hash_key[(i * 4) + 1] << 8;
1901 rss_key |= hash_key[(i * 4) + 2] << 16;
1902 rss_key |= hash_key[(i * 4) + 3] << 24;
1903 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1907 /* Set configured hashing protocols in MRQC register */
1908 rss_hf = rss_conf->rss_hf;
1909 mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1910 if (rss_hf & ETH_RSS_IPV4)
1911 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1912 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1913 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1914 if (rss_hf & ETH_RSS_IPV6)
1915 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1916 if (rss_hf & ETH_RSS_IPV6_EX)
1917 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1918 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1919 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1920 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1921 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1922 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1923 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1924 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1925 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1926 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1927 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1928 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1932 eth_igb_rss_hash_update(struct rte_eth_dev *dev,
1933 struct rte_eth_rss_conf *rss_conf)
1935 struct e1000_hw *hw;
1939 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1942 * Before changing anything, first check that the update RSS operation
1943 * does not attempt to disable RSS, if RSS was enabled at
1944 * initialization time, or does not attempt to enable RSS, if RSS was
1945 * disabled at initialization time.
1947 rss_hf = rss_conf->rss_hf & IGB_RSS_OFFLOAD_ALL;
1948 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1949 if (!(mrqc & E1000_MRQC_ENABLE_MASK)) { /* RSS disabled */
1950 if (rss_hf != 0) /* Enable RSS */
1952 return 0; /* Nothing to do */
1955 if (rss_hf == 0) /* Disable RSS */
1957 igb_hw_rss_hash_set(hw, rss_conf);
1961 int eth_igb_rss_hash_conf_get(struct rte_eth_dev *dev,
1962 struct rte_eth_rss_conf *rss_conf)
1964 struct e1000_hw *hw;
1971 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1972 hash_key = rss_conf->rss_key;
1973 if (hash_key != NULL) {
1974 /* Return RSS hash key */
1975 for (i = 0; i < 10; i++) {
1976 rss_key = E1000_READ_REG_ARRAY(hw, E1000_RSSRK(0), i);
1977 hash_key[(i * 4)] = rss_key & 0x000000FF;
1978 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
1979 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
1980 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
1984 /* Get RSS functions configured in MRQC register */
1985 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1986 if ((mrqc & E1000_MRQC_ENABLE_RSS_4Q) == 0) { /* RSS is disabled */
1987 rss_conf->rss_hf = 0;
1991 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4)
1992 rss_hf |= ETH_RSS_IPV4;
1993 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_TCP)
1994 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
1995 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6)
1996 rss_hf |= ETH_RSS_IPV6;
1997 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_EX)
1998 rss_hf |= ETH_RSS_IPV6_EX;
1999 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP)
2000 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2001 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP_EX)
2002 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2003 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_UDP)
2004 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2005 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP)
2006 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2007 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP_EX)
2008 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2009 rss_conf->rss_hf = rss_hf;
2014 igb_rss_configure(struct rte_eth_dev *dev)
2016 struct rte_eth_rss_conf rss_conf;
2017 struct e1000_hw *hw;
2021 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2023 /* Fill in redirection table. */
2024 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2025 for (i = 0; i < 128; i++) {
2032 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
2033 i % dev->data->nb_rx_queues : 0);
2034 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
2036 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2040 * Configure the RSS key and the RSS protocols used to compute
2041 * the RSS hash of input packets.
2043 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2044 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2045 igb_rss_disable(dev);
2048 if (rss_conf.rss_key == NULL)
2049 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2050 igb_hw_rss_hash_set(hw, &rss_conf);
2054 * Check if the mac type support VMDq or not.
2055 * Return 1 if it supports, otherwise, return 0.
2058 igb_is_vmdq_supported(const struct rte_eth_dev *dev)
2060 const struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2062 switch (hw->mac.type) {
2083 PMD_INIT_LOG(ERR, "Cannot support VMDq feature");
2089 igb_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
2091 struct rte_eth_vmdq_rx_conf *cfg;
2092 struct e1000_hw *hw;
2093 uint32_t mrqc, vt_ctl, vmolr, rctl;
2096 PMD_INIT_FUNC_TRACE();
2098 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2099 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
2101 /* Check if mac type can support VMDq, return value of 0 means NOT support */
2102 if (igb_is_vmdq_supported(dev) == 0)
2105 igb_rss_disable(dev);
2107 /* RCTL: eanble VLAN filter */
2108 rctl = E1000_READ_REG(hw, E1000_RCTL);
2109 rctl |= E1000_RCTL_VFE;
2110 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2112 /* MRQC: enable vmdq */
2113 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2114 mrqc |= E1000_MRQC_ENABLE_VMDQ;
2115 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2117 /* VTCTL: pool selection according to VLAN tag */
2118 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
2119 if (cfg->enable_default_pool)
2120 vt_ctl |= (cfg->default_pool << E1000_VT_CTL_DEFAULT_POOL_SHIFT);
2121 vt_ctl |= E1000_VT_CTL_IGNORE_MAC;
2122 E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
2124 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2125 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2126 vmolr &= ~(E1000_VMOLR_AUPE | E1000_VMOLR_ROMPE |
2127 E1000_VMOLR_ROPE | E1000_VMOLR_BAM |
2130 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_UNTAG)
2131 vmolr |= E1000_VMOLR_AUPE;
2132 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_MC)
2133 vmolr |= E1000_VMOLR_ROMPE;
2134 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_UC)
2135 vmolr |= E1000_VMOLR_ROPE;
2136 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_BROADCAST)
2137 vmolr |= E1000_VMOLR_BAM;
2138 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_MULTICAST)
2139 vmolr |= E1000_VMOLR_MPME;
2141 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2145 * VMOLR: set STRVLAN as 1 if IGMAC in VTCTL is set as 1
2146 * Both 82576 and 82580 support it
2148 if (hw->mac.type != e1000_i350) {
2149 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2150 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2151 vmolr |= E1000_VMOLR_STRVLAN;
2152 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2156 /* VFTA - enable all vlan filters */
2157 for (i = 0; i < IGB_VFTA_SIZE; i++)
2158 E1000_WRITE_REG(hw, (E1000_VFTA+(i*4)), UINT32_MAX);
2160 /* VFRE: 8 pools enabling for rx, both 82576 and i350 support it */
2161 if (hw->mac.type != e1000_82580)
2162 E1000_WRITE_REG(hw, E1000_VFRE, E1000_MBVFICR_VFREQ_MASK);
2165 * RAH/RAL - allow pools to read specific mac addresses
2166 * In this case, all pools should be able to read from mac addr 0
2168 E1000_WRITE_REG(hw, E1000_RAH(0), (E1000_RAH_AV | UINT16_MAX));
2169 E1000_WRITE_REG(hw, E1000_RAL(0), UINT32_MAX);
2171 /* VLVF: set up filters for vlan tags as configured */
2172 for (i = 0; i < cfg->nb_pool_maps; i++) {
2173 /* set vlan id in VF register and set the valid bit */
2174 E1000_WRITE_REG(hw, E1000_VLVF(i), (E1000_VLVF_VLANID_ENABLE | \
2175 (cfg->pool_map[i].vlan_id & ETH_VLAN_ID_MAX) | \
2176 ((cfg->pool_map[i].pools << E1000_VLVF_POOLSEL_SHIFT ) & \
2177 E1000_VLVF_POOLSEL_MASK)));
2180 E1000_WRITE_FLUSH(hw);
2186 /*********************************************************************
2188 * Enable receive unit.
2190 **********************************************************************/
2193 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2195 struct igb_rx_entry *rxe = rxq->sw_ring;
2199 /* Initialize software ring entries. */
2200 for (i = 0; i < rxq->nb_rx_desc; i++) {
2201 volatile union e1000_adv_rx_desc *rxd;
2202 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
2205 PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
2206 "queue_id=%hu", rxq->queue_id);
2210 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2211 rxd = &rxq->rx_ring[i];
2212 rxd->read.hdr_addr = 0;
2213 rxd->read.pkt_addr = dma_addr;
2220 #define E1000_MRQC_DEF_Q_SHIFT (3)
2222 igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
2224 struct e1000_hw *hw =
2225 E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2228 if (RTE_ETH_DEV_SRIOV(dev).active == ETH_8_POOLS) {
2230 * SRIOV active scheme
2231 * FIXME if support RSS together with VMDq & SRIOV
2233 mrqc = E1000_MRQC_ENABLE_VMDQ;
2234 /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
2235 mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
2236 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2237 } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
2239 * SRIOV inactive scheme
2241 switch (dev->data->dev_conf.rxmode.mq_mode) {
2243 igb_rss_configure(dev);
2245 case ETH_MQ_RX_VMDQ_ONLY:
2246 /*Configure general VMDQ only RX parameters*/
2247 igb_vmdq_rx_hw_configure(dev);
2249 case ETH_MQ_RX_NONE:
2250 /* if mq_mode is none, disable rss mode.*/
2252 igb_rss_disable(dev);
2261 eth_igb_rx_init(struct rte_eth_dev *dev)
2263 struct e1000_hw *hw;
2264 struct igb_rx_queue *rxq;
2269 uint16_t rctl_bsize;
2273 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2277 * Make sure receives are disabled while setting
2278 * up the descriptor ring.
2280 rctl = E1000_READ_REG(hw, E1000_RCTL);
2281 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2284 * Configure support of jumbo frames, if any.
2286 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
2287 rctl |= E1000_RCTL_LPE;
2290 * Set maximum packet length by default, and might be updated
2291 * together with enabling/disabling dual VLAN.
2293 E1000_WRITE_REG(hw, E1000_RLPML,
2294 dev->data->dev_conf.rxmode.max_rx_pkt_len +
2297 rctl &= ~E1000_RCTL_LPE;
2299 /* Configure and enable each RX queue. */
2301 dev->rx_pkt_burst = eth_igb_recv_pkts;
2302 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2306 rxq = dev->data->rx_queues[i];
2310 * i350 and i354 vlan packets have vlan tags byte swapped.
2312 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i354) {
2313 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2314 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2316 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2319 /* Allocate buffers for descriptor rings and set up queue */
2320 ret = igb_alloc_rx_queue_mbufs(rxq);
2325 * Reset crc_len in case it was changed after queue setup by a
2329 (uint8_t)(dev->data->dev_conf.rxmode.hw_strip_crc ?
2332 bus_addr = rxq->rx_ring_phys_addr;
2333 E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
2335 sizeof(union e1000_adv_rx_desc));
2336 E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
2337 (uint32_t)(bus_addr >> 32));
2338 E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
2340 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2343 * Configure RX buffer size.
2345 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2346 RTE_PKTMBUF_HEADROOM);
2347 if (buf_size >= 1024) {
2349 * Configure the BSIZEPACKET field of the SRRCTL
2350 * register of the queue.
2351 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2352 * If this field is equal to 0b, then RCTL.BSIZE
2353 * determines the RX packet buffer size.
2355 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2356 E1000_SRRCTL_BSIZEPKT_MASK);
2357 buf_size = (uint16_t) ((srrctl &
2358 E1000_SRRCTL_BSIZEPKT_MASK) <<
2359 E1000_SRRCTL_BSIZEPKT_SHIFT);
2361 /* It adds dual VLAN length for supporting dual VLAN */
2362 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2363 2 * VLAN_TAG_SIZE) > buf_size){
2364 if (!dev->data->scattered_rx)
2366 "forcing scatter mode");
2367 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2368 dev->data->scattered_rx = 1;
2372 * Use BSIZE field of the device RCTL register.
2374 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2375 rctl_bsize = buf_size;
2376 if (!dev->data->scattered_rx)
2377 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2378 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2379 dev->data->scattered_rx = 1;
2382 /* Set if packets are dropped when no descriptors available */
2384 srrctl |= E1000_SRRCTL_DROP_EN;
2386 E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
2388 /* Enable this RX queue. */
2389 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
2390 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2391 rxdctl &= 0xFFF00000;
2392 rxdctl |= (rxq->pthresh & 0x1F);
2393 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2394 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2395 E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
2398 if (dev->data->dev_conf.rxmode.enable_scatter) {
2399 if (!dev->data->scattered_rx)
2400 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2401 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2402 dev->data->scattered_rx = 1;
2406 * Setup BSIZE field of RCTL register, if needed.
2407 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
2408 * register, since the code above configures the SRRCTL register of
2409 * the RX queue in such a case.
2410 * All configurable sizes are:
2411 * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
2412 * 8192: rctl |= (E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX);
2413 * 4096: rctl |= (E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX);
2414 * 2048: rctl |= E1000_RCTL_SZ_2048;
2415 * 1024: rctl |= E1000_RCTL_SZ_1024;
2416 * 512: rctl |= E1000_RCTL_SZ_512;
2417 * 256: rctl |= E1000_RCTL_SZ_256;
2419 if (rctl_bsize > 0) {
2420 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
2421 rctl |= E1000_RCTL_SZ_512;
2422 else /* 256 <= buf_size < 512 - use 256 */
2423 rctl |= E1000_RCTL_SZ_256;
2427 * Configure RSS if device configured with multiple RX queues.
2429 igb_dev_mq_rx_configure(dev);
2431 /* Update the rctl since igb_dev_mq_rx_configure may change its value */
2432 rctl |= E1000_READ_REG(hw, E1000_RCTL);
2435 * Setup the Checksum Register.
2436 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
2438 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2439 rxcsum |= E1000_RXCSUM_PCSD;
2441 /* Enable both L3/L4 rx checksum offload */
2442 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
2443 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL |
2444 E1000_RXCSUM_CRCOFL);
2446 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL |
2447 E1000_RXCSUM_CRCOFL);
2448 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2450 /* Setup the Receive Control Register. */
2451 if (dev->data->dev_conf.rxmode.hw_strip_crc) {
2452 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
2454 /* set STRCRC bit in all queues */
2455 if (hw->mac.type == e1000_i350 ||
2456 hw->mac.type == e1000_i210 ||
2457 hw->mac.type == e1000_i211 ||
2458 hw->mac.type == e1000_i354) {
2459 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2460 rxq = dev->data->rx_queues[i];
2461 uint32_t dvmolr = E1000_READ_REG(hw,
2462 E1000_DVMOLR(rxq->reg_idx));
2463 dvmolr |= E1000_DVMOLR_STRCRC;
2464 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2468 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
2470 /* clear STRCRC bit in all queues */
2471 if (hw->mac.type == e1000_i350 ||
2472 hw->mac.type == e1000_i210 ||
2473 hw->mac.type == e1000_i211 ||
2474 hw->mac.type == e1000_i354) {
2475 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2476 rxq = dev->data->rx_queues[i];
2477 uint32_t dvmolr = E1000_READ_REG(hw,
2478 E1000_DVMOLR(rxq->reg_idx));
2479 dvmolr &= ~E1000_DVMOLR_STRCRC;
2480 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2485 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2486 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2487 E1000_RCTL_RDMTS_HALF |
2488 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2490 /* Make sure VLAN Filters are off. */
2491 if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_VMDQ_ONLY)
2492 rctl &= ~E1000_RCTL_VFE;
2493 /* Don't store bad packets. */
2494 rctl &= ~E1000_RCTL_SBP;
2496 /* Enable Receives. */
2497 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2500 * Setup the HW Rx Head and Tail Descriptor Pointers.
2501 * This needs to be done after enable.
2503 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2504 rxq = dev->data->rx_queues[i];
2505 E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
2506 E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
2512 /*********************************************************************
2514 * Enable transmit unit.
2516 **********************************************************************/
2518 eth_igb_tx_init(struct rte_eth_dev *dev)
2520 struct e1000_hw *hw;
2521 struct igb_tx_queue *txq;
2526 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2528 /* Setup the Base and Length of the Tx Descriptor Rings. */
2529 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2531 txq = dev->data->tx_queues[i];
2532 bus_addr = txq->tx_ring_phys_addr;
2534 E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
2536 sizeof(union e1000_adv_tx_desc));
2537 E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
2538 (uint32_t)(bus_addr >> 32));
2539 E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
2541 /* Setup the HW Tx Head and Tail descriptor pointers. */
2542 E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
2543 E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
2545 /* Setup Transmit threshold registers. */
2546 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
2547 txdctl |= txq->pthresh & 0x1F;
2548 txdctl |= ((txq->hthresh & 0x1F) << 8);
2549 txdctl |= ((txq->wthresh & 0x1F) << 16);
2550 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2551 E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
2554 /* Program the Transmit Control Register. */
2555 tctl = E1000_READ_REG(hw, E1000_TCTL);
2556 tctl &= ~E1000_TCTL_CT;
2557 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2558 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2560 e1000_config_collision_dist(hw);
2562 /* This write will effectively turn on the transmit unit. */
2563 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2566 /*********************************************************************
2568 * Enable VF receive unit.
2570 **********************************************************************/
2572 eth_igbvf_rx_init(struct rte_eth_dev *dev)
2574 struct e1000_hw *hw;
2575 struct igb_rx_queue *rxq;
2578 uint16_t rctl_bsize;
2582 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2585 e1000_rlpml_set_vf(hw,
2586 (uint16_t)(dev->data->dev_conf.rxmode.max_rx_pkt_len +
2589 /* Configure and enable each RX queue. */
2591 dev->rx_pkt_burst = eth_igb_recv_pkts;
2592 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2596 rxq = dev->data->rx_queues[i];
2600 * i350VF LB vlan packets have vlan tags byte swapped.
2602 if (hw->mac.type == e1000_vfadapt_i350) {
2603 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2604 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2606 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2609 /* Allocate buffers for descriptor rings and set up queue */
2610 ret = igb_alloc_rx_queue_mbufs(rxq);
2614 bus_addr = rxq->rx_ring_phys_addr;
2615 E1000_WRITE_REG(hw, E1000_RDLEN(i),
2617 sizeof(union e1000_adv_rx_desc));
2618 E1000_WRITE_REG(hw, E1000_RDBAH(i),
2619 (uint32_t)(bus_addr >> 32));
2620 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
2622 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2625 * Configure RX buffer size.
2627 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2628 RTE_PKTMBUF_HEADROOM);
2629 if (buf_size >= 1024) {
2631 * Configure the BSIZEPACKET field of the SRRCTL
2632 * register of the queue.
2633 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2634 * If this field is equal to 0b, then RCTL.BSIZE
2635 * determines the RX packet buffer size.
2637 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2638 E1000_SRRCTL_BSIZEPKT_MASK);
2639 buf_size = (uint16_t) ((srrctl &
2640 E1000_SRRCTL_BSIZEPKT_MASK) <<
2641 E1000_SRRCTL_BSIZEPKT_SHIFT);
2643 /* It adds dual VLAN length for supporting dual VLAN */
2644 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2645 2 * VLAN_TAG_SIZE) > buf_size){
2646 if (!dev->data->scattered_rx)
2648 "forcing scatter mode");
2649 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2650 dev->data->scattered_rx = 1;
2654 * Use BSIZE field of the device RCTL register.
2656 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2657 rctl_bsize = buf_size;
2658 if (!dev->data->scattered_rx)
2659 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2660 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2661 dev->data->scattered_rx = 1;
2664 /* Set if packets are dropped when no descriptors available */
2666 srrctl |= E1000_SRRCTL_DROP_EN;
2668 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
2670 /* Enable this RX queue. */
2671 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
2672 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2673 rxdctl &= 0xFFF00000;
2674 rxdctl |= (rxq->pthresh & 0x1F);
2675 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2676 if (hw->mac.type == e1000_vfadapt) {
2678 * Workaround of 82576 VF Erratum
2679 * force set WTHRESH to 1
2680 * to avoid Write-Back not triggered sometimes
2683 PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !");
2686 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2687 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2690 if (dev->data->dev_conf.rxmode.enable_scatter) {
2691 if (!dev->data->scattered_rx)
2692 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2693 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2694 dev->data->scattered_rx = 1;
2698 * Setup the HW Rx Head and Tail Descriptor Pointers.
2699 * This needs to be done after enable.
2701 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2702 rxq = dev->data->rx_queues[i];
2703 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2704 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2710 /*********************************************************************
2712 * Enable VF transmit unit.
2714 **********************************************************************/
2716 eth_igbvf_tx_init(struct rte_eth_dev *dev)
2718 struct e1000_hw *hw;
2719 struct igb_tx_queue *txq;
2723 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2725 /* Setup the Base and Length of the Tx Descriptor Rings. */
2726 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2729 txq = dev->data->tx_queues[i];
2730 bus_addr = txq->tx_ring_phys_addr;
2731 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2733 sizeof(union e1000_adv_tx_desc));
2734 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2735 (uint32_t)(bus_addr >> 32));
2736 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2738 /* Setup the HW Tx Head and Tail descriptor pointers. */
2739 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2740 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2742 /* Setup Transmit threshold registers. */
2743 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2744 txdctl |= txq->pthresh & 0x1F;
2745 txdctl |= ((txq->hthresh & 0x1F) << 8);
2746 if (hw->mac.type == e1000_82576) {
2748 * Workaround of 82576 VF Erratum
2749 * force set WTHRESH to 1
2750 * to avoid Write-Back not triggered sometimes
2753 PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !");
2756 txdctl |= ((txq->wthresh & 0x1F) << 16);
2757 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2758 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
2764 igb_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2765 struct rte_eth_rxq_info *qinfo)
2767 struct igb_rx_queue *rxq;
2769 rxq = dev->data->rx_queues[queue_id];
2771 qinfo->mp = rxq->mb_pool;
2772 qinfo->scattered_rx = dev->data->scattered_rx;
2773 qinfo->nb_desc = rxq->nb_rx_desc;
2775 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2776 qinfo->conf.rx_drop_en = rxq->drop_en;
2780 igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2781 struct rte_eth_txq_info *qinfo)
2783 struct igb_tx_queue *txq;
2785 txq = dev->data->tx_queues[queue_id];
2787 qinfo->nb_desc = txq->nb_tx_desc;
2789 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2790 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2791 qinfo->conf.tx_thresh.wthresh = txq->wthresh;