New upstream version 17.11.5
[deb_dpdk.git] / drivers / net / e1000 / igb_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <inttypes.h>
43
44 #include <rte_interrupts.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_log.h>
48 #include <rte_debug.h>
49 #include <rte_pci.h>
50 #include <rte_memory.h>
51 #include <rte_memcpy.h>
52 #include <rte_memzone.h>
53 #include <rte_launch.h>
54 #include <rte_eal.h>
55 #include <rte_per_lcore.h>
56 #include <rte_lcore.h>
57 #include <rte_atomic.h>
58 #include <rte_branch_prediction.h>
59 #include <rte_mempool.h>
60 #include <rte_malloc.h>
61 #include <rte_mbuf.h>
62 #include <rte_ether.h>
63 #include <rte_ethdev.h>
64 #include <rte_prefetch.h>
65 #include <rte_udp.h>
66 #include <rte_tcp.h>
67 #include <rte_sctp.h>
68 #include <rte_net.h>
69 #include <rte_string_fns.h>
70
71 #include "e1000_logs.h"
72 #include "base/e1000_api.h"
73 #include "e1000_ethdev.h"
74
75 #ifdef RTE_LIBRTE_IEEE1588
76 #define IGB_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
77 #else
78 #define IGB_TX_IEEE1588_TMST 0
79 #endif
80 /* Bit Mask to indicate what bits required for building TX context */
81 #define IGB_TX_OFFLOAD_MASK (                    \
82                 PKT_TX_OUTER_IPV6 |      \
83                 PKT_TX_OUTER_IPV4 |      \
84                 PKT_TX_IPV6 |            \
85                 PKT_TX_IPV4 |            \
86                 PKT_TX_VLAN_PKT |                \
87                 PKT_TX_IP_CKSUM |                \
88                 PKT_TX_L4_MASK |                 \
89                 PKT_TX_TCP_SEG |                 \
90                 IGB_TX_IEEE1588_TMST)
91
92 #define IGB_TX_OFFLOAD_NOTSUP_MASK \
93                 (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
94
95 /**
96  * Structure associated with each descriptor of the RX ring of a RX queue.
97  */
98 struct igb_rx_entry {
99         struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
100 };
101
102 /**
103  * Structure associated with each descriptor of the TX ring of a TX queue.
104  */
105 struct igb_tx_entry {
106         struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
107         uint16_t next_id; /**< Index of next descriptor in ring. */
108         uint16_t last_id; /**< Index of last scattered descriptor. */
109 };
110
111 /**
112  * rx queue flags
113  */
114 enum igb_rxq_flags {
115         IGB_RXQ_FLAG_LB_BSWAP_VLAN = 0x01,
116 };
117
118 /**
119  * Structure associated with each RX queue.
120  */
121 struct igb_rx_queue {
122         struct rte_mempool  *mb_pool;   /**< mbuf pool to populate RX ring. */
123         volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
124         uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
125         volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
126         volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
127         struct igb_rx_entry *sw_ring;   /**< address of RX software ring. */
128         struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
129         struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
130         uint16_t            nb_rx_desc; /**< number of RX descriptors. */
131         uint16_t            rx_tail;    /**< current value of RDT register. */
132         uint16_t            nb_rx_hold; /**< number of held free RX desc. */
133         uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
134         uint16_t            queue_id;   /**< RX queue index. */
135         uint16_t            reg_idx;    /**< RX queue register index. */
136         uint16_t            port_id;    /**< Device port identifier. */
137         uint8_t             pthresh;    /**< Prefetch threshold register. */
138         uint8_t             hthresh;    /**< Host threshold register. */
139         uint8_t             wthresh;    /**< Write-back threshold register. */
140         uint8_t             crc_len;    /**< 0 if CRC stripped, 4 otherwise. */
141         uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
142         uint32_t            flags;      /**< RX flags. */
143 };
144
145 /**
146  * Hardware context number
147  */
148 enum igb_advctx_num {
149         IGB_CTX_0    = 0, /**< CTX0    */
150         IGB_CTX_1    = 1, /**< CTX1    */
151         IGB_CTX_NUM  = 2, /**< CTX_NUM */
152 };
153
154 /** Offload features */
155 union igb_tx_offload {
156         uint64_t data;
157         struct {
158                 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
159                 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
160                 uint64_t vlan_tci:16;  /**< VLAN Tag Control Identifier(CPU order). */
161                 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
162                 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
163
164                 /* uint64_t unused:8; */
165         };
166 };
167
168 /*
169  * Compare mask for igb_tx_offload.data,
170  * should be in sync with igb_tx_offload layout.
171  * */
172 #define TX_MACIP_LEN_CMP_MASK   0x000000000000FFFFULL /**< L2L3 header mask. */
173 #define TX_VLAN_CMP_MASK                0x00000000FFFF0000ULL /**< Vlan mask. */
174 #define TX_TCP_LEN_CMP_MASK             0x000000FF00000000ULL /**< TCP header mask. */
175 #define TX_TSO_MSS_CMP_MASK             0x00FFFF0000000000ULL /**< TSO segsz mask. */
176 /** Mac + IP + TCP + Mss mask. */
177 #define TX_TSO_CMP_MASK \
178         (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
179
180 /**
181  * Strucutre to check if new context need be built
182  */
183 struct igb_advctx_info {
184         uint64_t flags;           /**< ol_flags related to context build. */
185         /** tx offload: vlan, tso, l2-l3-l4 lengths. */
186         union igb_tx_offload tx_offload;
187         /** compare mask for tx offload. */
188         union igb_tx_offload tx_offload_mask;
189 };
190
191 /**
192  * Structure associated with each TX queue.
193  */
194 struct igb_tx_queue {
195         volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
196         uint64_t               tx_ring_phys_addr; /**< TX ring DMA address. */
197         struct igb_tx_entry    *sw_ring; /**< virtual address of SW ring. */
198         volatile uint32_t      *tdt_reg_addr; /**< Address of TDT register. */
199         uint32_t               txd_type;      /**< Device-specific TXD type */
200         uint16_t               nb_tx_desc;    /**< number of TX descriptors. */
201         uint16_t               tx_tail; /**< Current value of TDT register. */
202         uint16_t               tx_head;
203         /**< Index of first used TX descriptor. */
204         uint16_t               queue_id; /**< TX queue index. */
205         uint16_t               reg_idx;  /**< TX queue register index. */
206         uint16_t               port_id;  /**< Device port identifier. */
207         uint8_t                pthresh;  /**< Prefetch threshold register. */
208         uint8_t                hthresh;  /**< Host threshold register. */
209         uint8_t                wthresh;  /**< Write-back threshold register. */
210         uint32_t               ctx_curr;
211         /**< Current used hardware descriptor. */
212         uint32_t               ctx_start;
213         /**< Start context position for transmit queue. */
214         struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
215         /**< Hardware context history.*/
216 };
217
218 #if 1
219 #define RTE_PMD_USE_PREFETCH
220 #endif
221
222 #ifdef RTE_PMD_USE_PREFETCH
223 #define rte_igb_prefetch(p)     rte_prefetch0(p)
224 #else
225 #define rte_igb_prefetch(p)     do {} while(0)
226 #endif
227
228 #ifdef RTE_PMD_PACKET_PREFETCH
229 #define rte_packet_prefetch(p) rte_prefetch1(p)
230 #else
231 #define rte_packet_prefetch(p)  do {} while(0)
232 #endif
233
234 /*
235  * Macro for VMDq feature for 1 GbE NIC.
236  */
237 #define E1000_VMOLR_SIZE                        (8)
238 #define IGB_TSO_MAX_HDRLEN                      (512)
239 #define IGB_TSO_MAX_MSS                         (9216)
240
241 /*********************************************************************
242  *
243  *  TX function
244  *
245  **********************************************************************/
246
247 /*
248  *There're some limitations in hardware for TCP segmentation offload. We
249  *should check whether the parameters are valid.
250  */
251 static inline uint64_t
252 check_tso_para(uint64_t ol_req, union igb_tx_offload ol_para)
253 {
254         if (!(ol_req & PKT_TX_TCP_SEG))
255                 return ol_req;
256         if ((ol_para.tso_segsz > IGB_TSO_MAX_MSS) || (ol_para.l2_len +
257                         ol_para.l3_len + ol_para.l4_len > IGB_TSO_MAX_HDRLEN)) {
258                 ol_req &= ~PKT_TX_TCP_SEG;
259                 ol_req |= PKT_TX_TCP_CKSUM;
260         }
261         return ol_req;
262 }
263
264 /*
265  * Advanced context descriptor are almost same between igb/ixgbe
266  * This is a separate function, looking for optimization opportunity here
267  * Rework required to go with the pre-defined values.
268  */
269
270 static inline void
271 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
272                 volatile struct e1000_adv_tx_context_desc *ctx_txd,
273                 uint64_t ol_flags, union igb_tx_offload tx_offload)
274 {
275         uint32_t type_tucmd_mlhl;
276         uint32_t mss_l4len_idx;
277         uint32_t ctx_idx, ctx_curr;
278         uint32_t vlan_macip_lens;
279         union igb_tx_offload tx_offload_mask;
280
281         ctx_curr = txq->ctx_curr;
282         ctx_idx = ctx_curr + txq->ctx_start;
283
284         tx_offload_mask.data = 0;
285         type_tucmd_mlhl = 0;
286
287         /* Specify which HW CTX to upload. */
288         mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
289
290         if (ol_flags & PKT_TX_VLAN_PKT)
291                 tx_offload_mask.data |= TX_VLAN_CMP_MASK;
292
293         /* check if TCP segmentation required for this packet */
294         if (ol_flags & PKT_TX_TCP_SEG) {
295                 /* implies IP cksum in IPv4 */
296                 if (ol_flags & PKT_TX_IP_CKSUM)
297                         type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4 |
298                                 E1000_ADVTXD_TUCMD_L4T_TCP |
299                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
300                 else
301                         type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV6 |
302                                 E1000_ADVTXD_TUCMD_L4T_TCP |
303                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
304
305                 tx_offload_mask.data |= TX_TSO_CMP_MASK;
306                 mss_l4len_idx |= tx_offload.tso_segsz << E1000_ADVTXD_MSS_SHIFT;
307                 mss_l4len_idx |= tx_offload.l4_len << E1000_ADVTXD_L4LEN_SHIFT;
308         } else { /* no TSO, check if hardware checksum is needed */
309                 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
310                         tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
311
312                 if (ol_flags & PKT_TX_IP_CKSUM)
313                         type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
314
315                 switch (ol_flags & PKT_TX_L4_MASK) {
316                 case PKT_TX_UDP_CKSUM:
317                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
318                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
319                         mss_l4len_idx |= sizeof(struct udp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
320                         break;
321                 case PKT_TX_TCP_CKSUM:
322                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
323                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
324                         mss_l4len_idx |= sizeof(struct tcp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
325                         break;
326                 case PKT_TX_SCTP_CKSUM:
327                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
328                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
329                         mss_l4len_idx |= sizeof(struct sctp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
330                         break;
331                 default:
332                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
333                                 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
334                         break;
335                 }
336         }
337
338         txq->ctx_cache[ctx_curr].flags = ol_flags;
339         txq->ctx_cache[ctx_curr].tx_offload.data =
340                 tx_offload_mask.data & tx_offload.data;
341         txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
342
343         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
344         vlan_macip_lens = (uint32_t)tx_offload.data;
345         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
346         ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
347         ctx_txd->seqnum_seed = 0;
348 }
349
350 /*
351  * Check which hardware context can be used. Use the existing match
352  * or create a new context descriptor.
353  */
354 static inline uint32_t
355 what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
356                 union igb_tx_offload tx_offload)
357 {
358         /* If match with the current context */
359         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
360                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
361                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
362                         return txq->ctx_curr;
363         }
364
365         /* If match with the second context */
366         txq->ctx_curr ^= 1;
367         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
368                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
369                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
370                         return txq->ctx_curr;
371         }
372
373         /* Mismatch, use the previous context */
374         return IGB_CTX_NUM;
375 }
376
377 static inline uint32_t
378 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
379 {
380         static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
381         static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
382         uint32_t tmp;
383
384         tmp  = l4_olinfo[(ol_flags & PKT_TX_L4_MASK)  != PKT_TX_L4_NO_CKSUM];
385         tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
386         tmp |= l4_olinfo[(ol_flags & PKT_TX_TCP_SEG) != 0];
387         return tmp;
388 }
389
390 static inline uint32_t
391 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
392 {
393         uint32_t cmdtype;
394         static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
395         static uint32_t tso_cmd[2] = {0, E1000_ADVTXD_DCMD_TSE};
396         cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
397         cmdtype |= tso_cmd[(ol_flags & PKT_TX_TCP_SEG) != 0];
398         return cmdtype;
399 }
400
401 uint16_t
402 eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
403                uint16_t nb_pkts)
404 {
405         struct igb_tx_queue *txq;
406         struct igb_tx_entry *sw_ring;
407         struct igb_tx_entry *txe, *txn;
408         volatile union e1000_adv_tx_desc *txr;
409         volatile union e1000_adv_tx_desc *txd;
410         struct rte_mbuf     *tx_pkt;
411         struct rte_mbuf     *m_seg;
412         uint64_t buf_dma_addr;
413         uint32_t olinfo_status;
414         uint32_t cmd_type_len;
415         uint32_t pkt_len;
416         uint16_t slen;
417         uint64_t ol_flags;
418         uint16_t tx_end;
419         uint16_t tx_id;
420         uint16_t tx_last;
421         uint16_t nb_tx;
422         uint64_t tx_ol_req;
423         uint32_t new_ctx = 0;
424         uint32_t ctx = 0;
425         union igb_tx_offload tx_offload = {0};
426
427         txq = tx_queue;
428         sw_ring = txq->sw_ring;
429         txr     = txq->tx_ring;
430         tx_id   = txq->tx_tail;
431         txe = &sw_ring[tx_id];
432
433         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
434                 tx_pkt = *tx_pkts++;
435                 pkt_len = tx_pkt->pkt_len;
436
437                 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
438
439                 /*
440                  * The number of descriptors that must be allocated for a
441                  * packet is the number of segments of that packet, plus 1
442                  * Context Descriptor for the VLAN Tag Identifier, if any.
443                  * Determine the last TX descriptor to allocate in the TX ring
444                  * for the packet, starting from the current position (tx_id)
445                  * in the ring.
446                  */
447                 tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
448
449                 ol_flags = tx_pkt->ol_flags;
450                 tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;
451
452                 /* If a Context Descriptor need be built . */
453                 if (tx_ol_req) {
454                         tx_offload.l2_len = tx_pkt->l2_len;
455                         tx_offload.l3_len = tx_pkt->l3_len;
456                         tx_offload.l4_len = tx_pkt->l4_len;
457                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
458                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
459                         tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
460
461                         ctx = what_advctx_update(txq, tx_ol_req, tx_offload);
462                         /* Only allocate context descriptor if required*/
463                         new_ctx = (ctx == IGB_CTX_NUM);
464                         ctx = txq->ctx_curr + txq->ctx_start;
465                         tx_last = (uint16_t) (tx_last + new_ctx);
466                 }
467                 if (tx_last >= txq->nb_tx_desc)
468                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
469
470                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
471                            " tx_first=%u tx_last=%u",
472                            (unsigned) txq->port_id,
473                            (unsigned) txq->queue_id,
474                            (unsigned) pkt_len,
475                            (unsigned) tx_id,
476                            (unsigned) tx_last);
477
478                 /*
479                  * Check if there are enough free descriptors in the TX ring
480                  * to transmit the next packet.
481                  * This operation is based on the two following rules:
482                  *
483                  *   1- Only check that the last needed TX descriptor can be
484                  *      allocated (by construction, if that descriptor is free,
485                  *      all intermediate ones are also free).
486                  *
487                  *      For this purpose, the index of the last TX descriptor
488                  *      used for a packet (the "last descriptor" of a packet)
489                  *      is recorded in the TX entries (the last one included)
490                  *      that are associated with all TX descriptors allocated
491                  *      for that packet.
492                  *
493                  *   2- Avoid to allocate the last free TX descriptor of the
494                  *      ring, in order to never set the TDT register with the
495                  *      same value stored in parallel by the NIC in the TDH
496                  *      register, which makes the TX engine of the NIC enter
497                  *      in a deadlock situation.
498                  *
499                  *      By extension, avoid to allocate a free descriptor that
500                  *      belongs to the last set of free descriptors allocated
501                  *      to the same packet previously transmitted.
502                  */
503
504                 /*
505                  * The "last descriptor" of the previously sent packet, if any,
506                  * which used the last descriptor to allocate.
507                  */
508                 tx_end = sw_ring[tx_last].last_id;
509
510                 /*
511                  * The next descriptor following that "last descriptor" in the
512                  * ring.
513                  */
514                 tx_end = sw_ring[tx_end].next_id;
515
516                 /*
517                  * The "last descriptor" associated with that next descriptor.
518                  */
519                 tx_end = sw_ring[tx_end].last_id;
520
521                 /*
522                  * Check that this descriptor is free.
523                  */
524                 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
525                         if (nb_tx == 0)
526                                 return 0;
527                         goto end_of_tx;
528                 }
529
530                 /*
531                  * Set common flags of all TX Data Descriptors.
532                  *
533                  * The following bits must be set in all Data Descriptors:
534                  *   - E1000_ADVTXD_DTYP_DATA
535                  *   - E1000_ADVTXD_DCMD_DEXT
536                  *
537                  * The following bits must be set in the first Data Descriptor
538                  * and are ignored in the other ones:
539                  *   - E1000_ADVTXD_DCMD_IFCS
540                  *   - E1000_ADVTXD_MAC_1588
541                  *   - E1000_ADVTXD_DCMD_VLE
542                  *
543                  * The following bits must only be set in the last Data
544                  * Descriptor:
545                  *   - E1000_TXD_CMD_EOP
546                  *
547                  * The following bits can be set in any Data Descriptor, but
548                  * are only set in the last Data Descriptor:
549                  *   - E1000_TXD_CMD_RS
550                  */
551                 cmd_type_len = txq->txd_type |
552                         E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
553                 if (tx_ol_req & PKT_TX_TCP_SEG)
554                         pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len);
555                 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
556 #if defined(RTE_LIBRTE_IEEE1588)
557                 if (ol_flags & PKT_TX_IEEE1588_TMST)
558                         cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
559 #endif
560                 if (tx_ol_req) {
561                         /* Setup TX Advanced context descriptor if required */
562                         if (new_ctx) {
563                                 volatile struct e1000_adv_tx_context_desc *
564                                     ctx_txd;
565
566                                 ctx_txd = (volatile struct
567                                     e1000_adv_tx_context_desc *)
568                                     &txr[tx_id];
569
570                                 txn = &sw_ring[txe->next_id];
571                                 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
572
573                                 if (txe->mbuf != NULL) {
574                                         rte_pktmbuf_free_seg(txe->mbuf);
575                                         txe->mbuf = NULL;
576                                 }
577
578                                 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
579
580                                 txe->last_id = tx_last;
581                                 tx_id = txe->next_id;
582                                 txe = txn;
583                         }
584
585                         /* Setup the TX Advanced Data Descriptor */
586                         cmd_type_len  |= tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
587                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(tx_ol_req);
588                         olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
589                 }
590
591                 m_seg = tx_pkt;
592                 do {
593                         txn = &sw_ring[txe->next_id];
594                         txd = &txr[tx_id];
595
596                         if (txe->mbuf != NULL)
597                                 rte_pktmbuf_free_seg(txe->mbuf);
598                         txe->mbuf = m_seg;
599
600                         /*
601                          * Set up transmit descriptor.
602                          */
603                         slen = (uint16_t) m_seg->data_len;
604                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
605                         txd->read.buffer_addr =
606                                 rte_cpu_to_le_64(buf_dma_addr);
607                         txd->read.cmd_type_len =
608                                 rte_cpu_to_le_32(cmd_type_len | slen);
609                         txd->read.olinfo_status =
610                                 rte_cpu_to_le_32(olinfo_status);
611                         txe->last_id = tx_last;
612                         tx_id = txe->next_id;
613                         txe = txn;
614                         m_seg = m_seg->next;
615                 } while (m_seg != NULL);
616
617                 /*
618                  * The last packet data descriptor needs End Of Packet (EOP)
619                  * and Report Status (RS).
620                  */
621                 txd->read.cmd_type_len |=
622                         rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
623         }
624  end_of_tx:
625         rte_wmb();
626
627         /*
628          * Set the Transmit Descriptor Tail (TDT).
629          */
630         E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
631         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
632                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
633                    (unsigned) tx_id, (unsigned) nb_tx);
634         txq->tx_tail = tx_id;
635
636         return nb_tx;
637 }
638
639 /*********************************************************************
640  *
641  *  TX prep functions
642  *
643  **********************************************************************/
644 uint16_t
645 eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
646                 uint16_t nb_pkts)
647 {
648         int i, ret;
649         struct rte_mbuf *m;
650
651         for (i = 0; i < nb_pkts; i++) {
652                 m = tx_pkts[i];
653
654                 /* Check some limitations for TSO in hardware */
655                 if (m->ol_flags & PKT_TX_TCP_SEG)
656                         if ((m->tso_segsz > IGB_TSO_MAX_MSS) ||
657                                         (m->l2_len + m->l3_len + m->l4_len >
658                                         IGB_TSO_MAX_HDRLEN)) {
659                                 rte_errno = -EINVAL;
660                                 return i;
661                         }
662
663                 if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) {
664                         rte_errno = -ENOTSUP;
665                         return i;
666                 }
667
668 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
669                 ret = rte_validate_tx_offload(m);
670                 if (ret != 0) {
671                         rte_errno = ret;
672                         return i;
673                 }
674 #endif
675                 ret = rte_net_intel_cksum_prepare(m);
676                 if (ret != 0) {
677                         rte_errno = ret;
678                         return i;
679                 }
680         }
681
682         return i;
683 }
684
685 /*********************************************************************
686  *
687  *  RX functions
688  *
689  **********************************************************************/
690 #define IGB_PACKET_TYPE_IPV4              0X01
691 #define IGB_PACKET_TYPE_IPV4_TCP          0X11
692 #define IGB_PACKET_TYPE_IPV4_UDP          0X21
693 #define IGB_PACKET_TYPE_IPV4_SCTP         0X41
694 #define IGB_PACKET_TYPE_IPV4_EXT          0X03
695 #define IGB_PACKET_TYPE_IPV4_EXT_SCTP     0X43
696 #define IGB_PACKET_TYPE_IPV6              0X04
697 #define IGB_PACKET_TYPE_IPV6_TCP          0X14
698 #define IGB_PACKET_TYPE_IPV6_UDP          0X24
699 #define IGB_PACKET_TYPE_IPV6_EXT          0X0C
700 #define IGB_PACKET_TYPE_IPV6_EXT_TCP      0X1C
701 #define IGB_PACKET_TYPE_IPV6_EXT_UDP      0X2C
702 #define IGB_PACKET_TYPE_IPV4_IPV6         0X05
703 #define IGB_PACKET_TYPE_IPV4_IPV6_TCP     0X15
704 #define IGB_PACKET_TYPE_IPV4_IPV6_UDP     0X25
705 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
706 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
707 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
708 #define IGB_PACKET_TYPE_MAX               0X80
709 #define IGB_PACKET_TYPE_MASK              0X7F
710 #define IGB_PACKET_TYPE_SHIFT             0X04
711 static inline uint32_t
712 igb_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
713 {
714         static const uint32_t
715                 ptype_table[IGB_PACKET_TYPE_MAX] __rte_cache_aligned = {
716                 [IGB_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
717                         RTE_PTYPE_L3_IPV4,
718                 [IGB_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
719                         RTE_PTYPE_L3_IPV4_EXT,
720                 [IGB_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
721                         RTE_PTYPE_L3_IPV6,
722                 [IGB_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
723                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
724                         RTE_PTYPE_INNER_L3_IPV6,
725                 [IGB_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
726                         RTE_PTYPE_L3_IPV6_EXT,
727                 [IGB_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
728                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
729                         RTE_PTYPE_INNER_L3_IPV6_EXT,
730                 [IGB_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
731                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
732                 [IGB_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
733                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
734                 [IGB_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
735                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
736                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
737                 [IGB_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
738                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
739                 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
740                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
741                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
742                 [IGB_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
743                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
744                 [IGB_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
745                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
746                 [IGB_PACKET_TYPE_IPV4_IPV6_UDP] =  RTE_PTYPE_L2_ETHER |
747                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
748                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
749                 [IGB_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
750                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
751                 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
752                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
753                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
754                 [IGB_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
755                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
756                 [IGB_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
757                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
758         };
759         if (unlikely(pkt_info & E1000_RXDADV_PKTTYPE_ETQF))
760                 return RTE_PTYPE_UNKNOWN;
761
762         pkt_info = (pkt_info >> IGB_PACKET_TYPE_SHIFT) & IGB_PACKET_TYPE_MASK;
763
764         return ptype_table[pkt_info];
765 }
766
767 static inline uint64_t
768 rx_desc_hlen_type_rss_to_pkt_flags(struct igb_rx_queue *rxq, uint32_t hl_tp_rs)
769 {
770         uint64_t pkt_flags = ((hl_tp_rs & 0x0F) == 0) ?  0 : PKT_RX_RSS_HASH;
771
772 #if defined(RTE_LIBRTE_IEEE1588)
773         static uint32_t ip_pkt_etqf_map[8] = {
774                 0, 0, 0, PKT_RX_IEEE1588_PTP,
775                 0, 0, 0, 0,
776         };
777
778         struct rte_eth_dev dev = rte_eth_devices[rxq->port_id];
779         struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev.data->dev_private);
780
781         /* EtherType is in bits 8:10 in Packet Type, and not in the default 0:2 */
782         if (hw->mac.type == e1000_i210)
783                 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 12) & 0x07];
784         else
785                 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07];
786 #else
787         RTE_SET_USED(rxq);
788 #endif
789
790         return pkt_flags;
791 }
792
793 static inline uint64_t
794 rx_desc_status_to_pkt_flags(uint32_t rx_status)
795 {
796         uint64_t pkt_flags;
797
798         /* Check if VLAN present */
799         pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
800                 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED : 0);
801
802 #if defined(RTE_LIBRTE_IEEE1588)
803         if (rx_status & E1000_RXD_STAT_TMST)
804                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
805 #endif
806         return pkt_flags;
807 }
808
809 static inline uint64_t
810 rx_desc_error_to_pkt_flags(uint32_t rx_status)
811 {
812         /*
813          * Bit 30: IPE, IPv4 checksum error
814          * Bit 29: L4I, L4I integrity error
815          */
816
817         static uint64_t error_to_pkt_flags_map[4] = {
818                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
819                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
820                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
821                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
822         };
823         return error_to_pkt_flags_map[(rx_status >>
824                 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
825 }
826
827 uint16_t
828 eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
829                uint16_t nb_pkts)
830 {
831         struct igb_rx_queue *rxq;
832         volatile union e1000_adv_rx_desc *rx_ring;
833         volatile union e1000_adv_rx_desc *rxdp;
834         struct igb_rx_entry *sw_ring;
835         struct igb_rx_entry *rxe;
836         struct rte_mbuf *rxm;
837         struct rte_mbuf *nmb;
838         union e1000_adv_rx_desc rxd;
839         uint64_t dma_addr;
840         uint32_t staterr;
841         uint32_t hlen_type_rss;
842         uint16_t pkt_len;
843         uint16_t rx_id;
844         uint16_t nb_rx;
845         uint16_t nb_hold;
846         uint64_t pkt_flags;
847
848         nb_rx = 0;
849         nb_hold = 0;
850         rxq = rx_queue;
851         rx_id = rxq->rx_tail;
852         rx_ring = rxq->rx_ring;
853         sw_ring = rxq->sw_ring;
854         while (nb_rx < nb_pkts) {
855                 /*
856                  * The order of operations here is important as the DD status
857                  * bit must not be read after any other descriptor fields.
858                  * rx_ring and rxdp are pointing to volatile data so the order
859                  * of accesses cannot be reordered by the compiler. If they were
860                  * not volatile, they could be reordered which could lead to
861                  * using invalid descriptor fields when read from rxd.
862                  */
863                 rxdp = &rx_ring[rx_id];
864                 staterr = rxdp->wb.upper.status_error;
865                 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
866                         break;
867                 rxd = *rxdp;
868
869                 /*
870                  * End of packet.
871                  *
872                  * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
873                  * likely to be invalid and to be dropped by the various
874                  * validation checks performed by the network stack.
875                  *
876                  * Allocate a new mbuf to replenish the RX ring descriptor.
877                  * If the allocation fails:
878                  *    - arrange for that RX descriptor to be the first one
879                  *      being parsed the next time the receive function is
880                  *      invoked [on the same queue].
881                  *
882                  *    - Stop parsing the RX ring and return immediately.
883                  *
884                  * This policy do not drop the packet received in the RX
885                  * descriptor for which the allocation of a new mbuf failed.
886                  * Thus, it allows that packet to be later retrieved if
887                  * mbuf have been freed in the mean time.
888                  * As a side effect, holding RX descriptors instead of
889                  * systematically giving them back to the NIC may lead to
890                  * RX ring exhaustion situations.
891                  * However, the NIC can gracefully prevent such situations
892                  * to happen by sending specific "back-pressure" flow control
893                  * frames to its peer(s).
894                  */
895                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
896                            "staterr=0x%x pkt_len=%u",
897                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
898                            (unsigned) rx_id, (unsigned) staterr,
899                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
900
901                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
902                 if (nmb == NULL) {
903                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
904                                    "queue_id=%u", (unsigned) rxq->port_id,
905                                    (unsigned) rxq->queue_id);
906                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
907                         break;
908                 }
909
910                 nb_hold++;
911                 rxe = &sw_ring[rx_id];
912                 rx_id++;
913                 if (rx_id == rxq->nb_rx_desc)
914                         rx_id = 0;
915
916                 /* Prefetch next mbuf while processing current one. */
917                 rte_igb_prefetch(sw_ring[rx_id].mbuf);
918
919                 /*
920                  * When next RX descriptor is on a cache-line boundary,
921                  * prefetch the next 4 RX descriptors and the next 8 pointers
922                  * to mbufs.
923                  */
924                 if ((rx_id & 0x3) == 0) {
925                         rte_igb_prefetch(&rx_ring[rx_id]);
926                         rte_igb_prefetch(&sw_ring[rx_id]);
927                 }
928
929                 rxm = rxe->mbuf;
930                 rxe->mbuf = nmb;
931                 dma_addr =
932                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
933                 rxdp->read.hdr_addr = 0;
934                 rxdp->read.pkt_addr = dma_addr;
935
936                 /*
937                  * Initialize the returned mbuf.
938                  * 1) setup generic mbuf fields:
939                  *    - number of segments,
940                  *    - next segment,
941                  *    - packet length,
942                  *    - RX port identifier.
943                  * 2) integrate hardware offload data, if any:
944                  *    - RSS flag & hash,
945                  *    - IP checksum flag,
946                  *    - VLAN TCI, if any,
947                  *    - error flags.
948                  */
949                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
950                                       rxq->crc_len);
951                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
952                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
953                 rxm->nb_segs = 1;
954                 rxm->next = NULL;
955                 rxm->pkt_len = pkt_len;
956                 rxm->data_len = pkt_len;
957                 rxm->port = rxq->port_id;
958
959                 rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
960                 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
961
962                 /*
963                  * The vlan_tci field is only valid when PKT_RX_VLAN is
964                  * set in the pkt_flags field and must be in CPU byte order.
965                  */
966                 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
967                                 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
968                         rxm->vlan_tci = rte_be_to_cpu_16(rxd.wb.upper.vlan);
969                 } else {
970                         rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
971                 }
972                 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
973                 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
974                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
975                 rxm->ol_flags = pkt_flags;
976                 rxm->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.lower.
977                                                 lo_dword.hs_rss.pkt_info);
978
979                 /*
980                  * Store the mbuf address into the next entry of the array
981                  * of returned packets.
982                  */
983                 rx_pkts[nb_rx++] = rxm;
984         }
985         rxq->rx_tail = rx_id;
986
987         /*
988          * If the number of free RX descriptors is greater than the RX free
989          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
990          * register.
991          * Update the RDT with the value of the last processed RX descriptor
992          * minus 1, to guarantee that the RDT register is never equal to the
993          * RDH register, which creates a "full" ring situtation from the
994          * hardware point of view...
995          */
996         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
997         if (nb_hold > rxq->rx_free_thresh) {
998                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
999                            "nb_hold=%u nb_rx=%u",
1000                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1001                            (unsigned) rx_id, (unsigned) nb_hold,
1002                            (unsigned) nb_rx);
1003                 rx_id = (uint16_t) ((rx_id == 0) ?
1004                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1005                 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1006                 nb_hold = 0;
1007         }
1008         rxq->nb_rx_hold = nb_hold;
1009         return nb_rx;
1010 }
1011
1012 uint16_t
1013 eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1014                          uint16_t nb_pkts)
1015 {
1016         struct igb_rx_queue *rxq;
1017         volatile union e1000_adv_rx_desc *rx_ring;
1018         volatile union e1000_adv_rx_desc *rxdp;
1019         struct igb_rx_entry *sw_ring;
1020         struct igb_rx_entry *rxe;
1021         struct rte_mbuf *first_seg;
1022         struct rte_mbuf *last_seg;
1023         struct rte_mbuf *rxm;
1024         struct rte_mbuf *nmb;
1025         union e1000_adv_rx_desc rxd;
1026         uint64_t dma; /* Physical address of mbuf data buffer */
1027         uint32_t staterr;
1028         uint32_t hlen_type_rss;
1029         uint16_t rx_id;
1030         uint16_t nb_rx;
1031         uint16_t nb_hold;
1032         uint16_t data_len;
1033         uint64_t pkt_flags;
1034
1035         nb_rx = 0;
1036         nb_hold = 0;
1037         rxq = rx_queue;
1038         rx_id = rxq->rx_tail;
1039         rx_ring = rxq->rx_ring;
1040         sw_ring = rxq->sw_ring;
1041
1042         /*
1043          * Retrieve RX context of current packet, if any.
1044          */
1045         first_seg = rxq->pkt_first_seg;
1046         last_seg = rxq->pkt_last_seg;
1047
1048         while (nb_rx < nb_pkts) {
1049         next_desc:
1050                 /*
1051                  * The order of operations here is important as the DD status
1052                  * bit must not be read after any other descriptor fields.
1053                  * rx_ring and rxdp are pointing to volatile data so the order
1054                  * of accesses cannot be reordered by the compiler. If they were
1055                  * not volatile, they could be reordered which could lead to
1056                  * using invalid descriptor fields when read from rxd.
1057                  */
1058                 rxdp = &rx_ring[rx_id];
1059                 staterr = rxdp->wb.upper.status_error;
1060                 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
1061                         break;
1062                 rxd = *rxdp;
1063
1064                 /*
1065                  * Descriptor done.
1066                  *
1067                  * Allocate a new mbuf to replenish the RX ring descriptor.
1068                  * If the allocation fails:
1069                  *    - arrange for that RX descriptor to be the first one
1070                  *      being parsed the next time the receive function is
1071                  *      invoked [on the same queue].
1072                  *
1073                  *    - Stop parsing the RX ring and return immediately.
1074                  *
1075                  * This policy does not drop the packet received in the RX
1076                  * descriptor for which the allocation of a new mbuf failed.
1077                  * Thus, it allows that packet to be later retrieved if
1078                  * mbuf have been freed in the mean time.
1079                  * As a side effect, holding RX descriptors instead of
1080                  * systematically giving them back to the NIC may lead to
1081                  * RX ring exhaustion situations.
1082                  * However, the NIC can gracefully prevent such situations
1083                  * to happen by sending specific "back-pressure" flow control
1084                  * frames to its peer(s).
1085                  */
1086                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1087                            "staterr=0x%x data_len=%u",
1088                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1089                            (unsigned) rx_id, (unsigned) staterr,
1090                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1091
1092                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1093                 if (nmb == NULL) {
1094                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1095                                    "queue_id=%u", (unsigned) rxq->port_id,
1096                                    (unsigned) rxq->queue_id);
1097                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1098                         break;
1099                 }
1100
1101                 nb_hold++;
1102                 rxe = &sw_ring[rx_id];
1103                 rx_id++;
1104                 if (rx_id == rxq->nb_rx_desc)
1105                         rx_id = 0;
1106
1107                 /* Prefetch next mbuf while processing current one. */
1108                 rte_igb_prefetch(sw_ring[rx_id].mbuf);
1109
1110                 /*
1111                  * When next RX descriptor is on a cache-line boundary,
1112                  * prefetch the next 4 RX descriptors and the next 8 pointers
1113                  * to mbufs.
1114                  */
1115                 if ((rx_id & 0x3) == 0) {
1116                         rte_igb_prefetch(&rx_ring[rx_id]);
1117                         rte_igb_prefetch(&sw_ring[rx_id]);
1118                 }
1119
1120                 /*
1121                  * Update RX descriptor with the physical address of the new
1122                  * data buffer of the new allocated mbuf.
1123                  */
1124                 rxm = rxe->mbuf;
1125                 rxe->mbuf = nmb;
1126                 dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1127                 rxdp->read.pkt_addr = dma;
1128                 rxdp->read.hdr_addr = 0;
1129
1130                 /*
1131                  * Set data length & data buffer address of mbuf.
1132                  */
1133                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1134                 rxm->data_len = data_len;
1135                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1136
1137                 /*
1138                  * If this is the first buffer of the received packet,
1139                  * set the pointer to the first mbuf of the packet and
1140                  * initialize its context.
1141                  * Otherwise, update the total length and the number of segments
1142                  * of the current scattered packet, and update the pointer to
1143                  * the last mbuf of the current packet.
1144                  */
1145                 if (first_seg == NULL) {
1146                         first_seg = rxm;
1147                         first_seg->pkt_len = data_len;
1148                         first_seg->nb_segs = 1;
1149                 } else {
1150                         first_seg->pkt_len += data_len;
1151                         first_seg->nb_segs++;
1152                         last_seg->next = rxm;
1153                 }
1154
1155                 /*
1156                  * If this is not the last buffer of the received packet,
1157                  * update the pointer to the last mbuf of the current scattered
1158                  * packet and continue to parse the RX ring.
1159                  */
1160                 if (! (staterr & E1000_RXD_STAT_EOP)) {
1161                         last_seg = rxm;
1162                         goto next_desc;
1163                 }
1164
1165                 /*
1166                  * This is the last buffer of the received packet.
1167                  * If the CRC is not stripped by the hardware:
1168                  *   - Subtract the CRC length from the total packet length.
1169                  *   - If the last buffer only contains the whole CRC or a part
1170                  *     of it, free the mbuf associated to the last buffer.
1171                  *     If part of the CRC is also contained in the previous
1172                  *     mbuf, subtract the length of that CRC part from the
1173                  *     data length of the previous mbuf.
1174                  */
1175                 rxm->next = NULL;
1176                 if (unlikely(rxq->crc_len > 0)) {
1177                         first_seg->pkt_len -= ETHER_CRC_LEN;
1178                         if (data_len <= ETHER_CRC_LEN) {
1179                                 rte_pktmbuf_free_seg(rxm);
1180                                 first_seg->nb_segs--;
1181                                 last_seg->data_len = (uint16_t)
1182                                         (last_seg->data_len -
1183                                          (ETHER_CRC_LEN - data_len));
1184                                 last_seg->next = NULL;
1185                         } else
1186                                 rxm->data_len =
1187                                         (uint16_t) (data_len - ETHER_CRC_LEN);
1188                 }
1189
1190                 /*
1191                  * Initialize the first mbuf of the returned packet:
1192                  *    - RX port identifier,
1193                  *    - hardware offload data, if any:
1194                  *      - RSS flag & hash,
1195                  *      - IP checksum flag,
1196                  *      - VLAN TCI, if any,
1197                  *      - error flags.
1198                  */
1199                 first_seg->port = rxq->port_id;
1200                 first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
1201
1202                 /*
1203                  * The vlan_tci field is only valid when PKT_RX_VLAN is
1204                  * set in the pkt_flags field and must be in CPU byte order.
1205                  */
1206                 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
1207                                 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
1208                         first_seg->vlan_tci =
1209                                 rte_be_to_cpu_16(rxd.wb.upper.vlan);
1210                 } else {
1211                         first_seg->vlan_tci =
1212                                 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1213                 }
1214                 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1215                 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
1216                 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1217                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1218                 first_seg->ol_flags = pkt_flags;
1219                 first_seg->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.
1220                                         lower.lo_dword.hs_rss.pkt_info);
1221
1222                 /* Prefetch data of first segment, if configured to do so. */
1223                 rte_packet_prefetch((char *)first_seg->buf_addr +
1224                         first_seg->data_off);
1225
1226                 /*
1227                  * Store the mbuf address into the next entry of the array
1228                  * of returned packets.
1229                  */
1230                 rx_pkts[nb_rx++] = first_seg;
1231
1232                 /*
1233                  * Setup receipt context for a new packet.
1234                  */
1235                 first_seg = NULL;
1236         }
1237
1238         /*
1239          * Record index of the next RX descriptor to probe.
1240          */
1241         rxq->rx_tail = rx_id;
1242
1243         /*
1244          * Save receive context.
1245          */
1246         rxq->pkt_first_seg = first_seg;
1247         rxq->pkt_last_seg = last_seg;
1248
1249         /*
1250          * If the number of free RX descriptors is greater than the RX free
1251          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1252          * register.
1253          * Update the RDT with the value of the last processed RX descriptor
1254          * minus 1, to guarantee that the RDT register is never equal to the
1255          * RDH register, which creates a "full" ring situtation from the
1256          * hardware point of view...
1257          */
1258         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1259         if (nb_hold > rxq->rx_free_thresh) {
1260                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1261                            "nb_hold=%u nb_rx=%u",
1262                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1263                            (unsigned) rx_id, (unsigned) nb_hold,
1264                            (unsigned) nb_rx);
1265                 rx_id = (uint16_t) ((rx_id == 0) ?
1266                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1267                 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1268                 nb_hold = 0;
1269         }
1270         rxq->nb_rx_hold = nb_hold;
1271         return nb_rx;
1272 }
1273
1274 /*
1275  * Maximum number of Ring Descriptors.
1276  *
1277  * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1278  * desscriptors should meet the following condition:
1279  *      (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1280  */
1281
1282 static void
1283 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1284 {
1285         unsigned i;
1286
1287         if (txq->sw_ring != NULL) {
1288                 for (i = 0; i < txq->nb_tx_desc; i++) {
1289                         if (txq->sw_ring[i].mbuf != NULL) {
1290                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1291                                 txq->sw_ring[i].mbuf = NULL;
1292                         }
1293                 }
1294         }
1295 }
1296
1297 static void
1298 igb_tx_queue_release(struct igb_tx_queue *txq)
1299 {
1300         if (txq != NULL) {
1301                 igb_tx_queue_release_mbufs(txq);
1302                 rte_free(txq->sw_ring);
1303                 rte_free(txq);
1304         }
1305 }
1306
1307 void
1308 eth_igb_tx_queue_release(void *txq)
1309 {
1310         igb_tx_queue_release(txq);
1311 }
1312
1313 static int
1314 igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt)
1315 {
1316         struct igb_tx_entry *sw_ring;
1317         volatile union e1000_adv_tx_desc *txr;
1318         uint16_t tx_first; /* First segment analyzed. */
1319         uint16_t tx_id;    /* Current segment being processed. */
1320         uint16_t tx_last;  /* Last segment in the current packet. */
1321         uint16_t tx_next;  /* First segment of the next packet. */
1322         int count;
1323
1324         if (txq != NULL) {
1325                 count = 0;
1326                 sw_ring = txq->sw_ring;
1327                 txr = txq->tx_ring;
1328
1329                 /*
1330                  * tx_tail is the last sent packet on the sw_ring. Goto the end
1331                  * of that packet (the last segment in the packet chain) and
1332                  * then the next segment will be the start of the oldest segment
1333                  * in the sw_ring. This is the first packet that will be
1334                  * attempted to be freed.
1335                  */
1336
1337                 /* Get last segment in most recently added packet. */
1338                 tx_first = sw_ring[txq->tx_tail].last_id;
1339
1340                 /* Get the next segment, which is the oldest segment in ring. */
1341                 tx_first = sw_ring[tx_first].next_id;
1342
1343                 /* Set the current index to the first. */
1344                 tx_id = tx_first;
1345
1346                 /*
1347                  * Loop through each packet. For each packet, verify that an
1348                  * mbuf exists and that the last segment is free. If so, free
1349                  * it and move on.
1350                  */
1351                 while (1) {
1352                         tx_last = sw_ring[tx_id].last_id;
1353
1354                         if (sw_ring[tx_last].mbuf) {
1355                                 if (txr[tx_last].wb.status &
1356                                                 E1000_TXD_STAT_DD) {
1357                                         /*
1358                                          * Increment the number of packets
1359                                          * freed.
1360                                          */
1361                                         count++;
1362
1363                                         /* Get the start of the next packet. */
1364                                         tx_next = sw_ring[tx_last].next_id;
1365
1366                                         /*
1367                                          * Loop through all segments in a
1368                                          * packet.
1369                                          */
1370                                         do {
1371                                                 rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
1372                                                 sw_ring[tx_id].mbuf = NULL;
1373                                                 sw_ring[tx_id].last_id = tx_id;
1374
1375                                                 /* Move to next segemnt. */
1376                                                 tx_id = sw_ring[tx_id].next_id;
1377
1378                                         } while (tx_id != tx_next);
1379
1380                                         if (unlikely(count == (int)free_cnt))
1381                                                 break;
1382                                 } else
1383                                         /*
1384                                          * mbuf still in use, nothing left to
1385                                          * free.
1386                                          */
1387                                         break;
1388                         } else {
1389                                 /*
1390                                  * There are multiple reasons to be here:
1391                                  * 1) All the packets on the ring have been
1392                                  *    freed - tx_id is equal to tx_first
1393                                  *    and some packets have been freed.
1394                                  *    - Done, exit
1395                                  * 2) Interfaces has not sent a rings worth of
1396                                  *    packets yet, so the segment after tail is
1397                                  *    still empty. Or a previous call to this
1398                                  *    function freed some of the segments but
1399                                  *    not all so there is a hole in the list.
1400                                  *    Hopefully this is a rare case.
1401                                  *    - Walk the list and find the next mbuf. If
1402                                  *      there isn't one, then done.
1403                                  */
1404                                 if (likely((tx_id == tx_first) && (count != 0)))
1405                                         break;
1406
1407                                 /*
1408                                  * Walk the list and find the next mbuf, if any.
1409                                  */
1410                                 do {
1411                                         /* Move to next segemnt. */
1412                                         tx_id = sw_ring[tx_id].next_id;
1413
1414                                         if (sw_ring[tx_id].mbuf)
1415                                                 break;
1416
1417                                 } while (tx_id != tx_first);
1418
1419                                 /*
1420                                  * Determine why previous loop bailed. If there
1421                                  * is not an mbuf, done.
1422                                  */
1423                                 if (sw_ring[tx_id].mbuf == NULL)
1424                                         break;
1425                         }
1426                 }
1427         } else
1428                 count = -ENODEV;
1429
1430         return count;
1431 }
1432
1433 int
1434 eth_igb_tx_done_cleanup(void *txq, uint32_t free_cnt)
1435 {
1436         return igb_tx_done_cleanup(txq, free_cnt);
1437 }
1438
1439 static void
1440 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1441 {
1442         txq->tx_head = 0;
1443         txq->tx_tail = 0;
1444         txq->ctx_curr = 0;
1445         memset((void*)&txq->ctx_cache, 0,
1446                 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1447 }
1448
1449 static void
1450 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1451 {
1452         static const union e1000_adv_tx_desc zeroed_desc = {{0}};
1453         struct igb_tx_entry *txe = txq->sw_ring;
1454         uint16_t i, prev;
1455         struct e1000_hw *hw;
1456
1457         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1458         /* Zero out HW ring memory */
1459         for (i = 0; i < txq->nb_tx_desc; i++) {
1460                 txq->tx_ring[i] = zeroed_desc;
1461         }
1462
1463         /* Initialize ring entries */
1464         prev = (uint16_t)(txq->nb_tx_desc - 1);
1465         for (i = 0; i < txq->nb_tx_desc; i++) {
1466                 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1467
1468                 txd->wb.status = E1000_TXD_STAT_DD;
1469                 txe[i].mbuf = NULL;
1470                 txe[i].last_id = i;
1471                 txe[prev].next_id = i;
1472                 prev = i;
1473         }
1474
1475         txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1476         /* 82575 specific, each tx queue will use 2 hw contexts */
1477         if (hw->mac.type == e1000_82575)
1478                 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1479
1480         igb_reset_tx_queue_stat(txq);
1481 }
1482
1483 int
1484 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1485                          uint16_t queue_idx,
1486                          uint16_t nb_desc,
1487                          unsigned int socket_id,
1488                          const struct rte_eth_txconf *tx_conf)
1489 {
1490         const struct rte_memzone *tz;
1491         struct igb_tx_queue *txq;
1492         struct e1000_hw     *hw;
1493         uint32_t size;
1494
1495         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1496
1497         /*
1498          * Validate number of transmit descriptors.
1499          * It must not exceed hardware maximum, and must be multiple
1500          * of E1000_ALIGN.
1501          */
1502         if (nb_desc % IGB_TXD_ALIGN != 0 ||
1503                         (nb_desc > E1000_MAX_RING_DESC) ||
1504                         (nb_desc < E1000_MIN_RING_DESC)) {
1505                 return -EINVAL;
1506         }
1507
1508         /*
1509          * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1510          * driver.
1511          */
1512         if (tx_conf->tx_free_thresh != 0)
1513                 PMD_INIT_LOG(INFO, "The tx_free_thresh parameter is not "
1514                              "used for the 1G driver.");
1515         if (tx_conf->tx_rs_thresh != 0)
1516                 PMD_INIT_LOG(INFO, "The tx_rs_thresh parameter is not "
1517                              "used for the 1G driver.");
1518         if (tx_conf->tx_thresh.wthresh == 0 && hw->mac.type != e1000_82576)
1519                 PMD_INIT_LOG(INFO, "To improve 1G driver performance, "
1520                              "consider setting the TX WTHRESH value to 4, 8, "
1521                              "or 16.");
1522
1523         /* Free memory prior to re-allocation if needed */
1524         if (dev->data->tx_queues[queue_idx] != NULL) {
1525                 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1526                 dev->data->tx_queues[queue_idx] = NULL;
1527         }
1528
1529         /* First allocate the tx queue data structure */
1530         txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1531                                                         RTE_CACHE_LINE_SIZE);
1532         if (txq == NULL)
1533                 return -ENOMEM;
1534
1535         /*
1536          * Allocate TX ring hardware descriptors. A memzone large enough to
1537          * handle the maximum ring size is allocated in order to allow for
1538          * resizing in later calls to the queue setup function.
1539          */
1540         size = sizeof(union e1000_adv_tx_desc) * E1000_MAX_RING_DESC;
1541         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
1542                                       E1000_ALIGN, socket_id);
1543         if (tz == NULL) {
1544                 igb_tx_queue_release(txq);
1545                 return -ENOMEM;
1546         }
1547
1548         txq->nb_tx_desc = nb_desc;
1549         txq->pthresh = tx_conf->tx_thresh.pthresh;
1550         txq->hthresh = tx_conf->tx_thresh.hthresh;
1551         txq->wthresh = tx_conf->tx_thresh.wthresh;
1552         if (txq->wthresh > 0 && hw->mac.type == e1000_82576)
1553                 txq->wthresh = 1;
1554         txq->queue_id = queue_idx;
1555         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1556                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1557         txq->port_id = dev->data->port_id;
1558
1559         txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1560         txq->tx_ring_phys_addr = tz->iova;
1561
1562         txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1563         /* Allocate software ring */
1564         txq->sw_ring = rte_zmalloc("txq->sw_ring",
1565                                    sizeof(struct igb_tx_entry) * nb_desc,
1566                                    RTE_CACHE_LINE_SIZE);
1567         if (txq->sw_ring == NULL) {
1568                 igb_tx_queue_release(txq);
1569                 return -ENOMEM;
1570         }
1571         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1572                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1573
1574         igb_reset_tx_queue(txq, dev);
1575         dev->tx_pkt_burst = eth_igb_xmit_pkts;
1576         dev->tx_pkt_prepare = &eth_igb_prep_pkts;
1577         dev->data->tx_queues[queue_idx] = txq;
1578
1579         return 0;
1580 }
1581
1582 static void
1583 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1584 {
1585         unsigned i;
1586
1587         if (rxq->sw_ring != NULL) {
1588                 for (i = 0; i < rxq->nb_rx_desc; i++) {
1589                         if (rxq->sw_ring[i].mbuf != NULL) {
1590                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1591                                 rxq->sw_ring[i].mbuf = NULL;
1592                         }
1593                 }
1594         }
1595 }
1596
1597 static void
1598 igb_rx_queue_release(struct igb_rx_queue *rxq)
1599 {
1600         if (rxq != NULL) {
1601                 igb_rx_queue_release_mbufs(rxq);
1602                 rte_free(rxq->sw_ring);
1603                 rte_free(rxq);
1604         }
1605 }
1606
1607 void
1608 eth_igb_rx_queue_release(void *rxq)
1609 {
1610         igb_rx_queue_release(rxq);
1611 }
1612
1613 static void
1614 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1615 {
1616         static const union e1000_adv_rx_desc zeroed_desc = {{0}};
1617         unsigned i;
1618
1619         /* Zero out HW ring memory */
1620         for (i = 0; i < rxq->nb_rx_desc; i++) {
1621                 rxq->rx_ring[i] = zeroed_desc;
1622         }
1623
1624         rxq->rx_tail = 0;
1625         rxq->pkt_first_seg = NULL;
1626         rxq->pkt_last_seg = NULL;
1627 }
1628
1629 int
1630 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1631                          uint16_t queue_idx,
1632                          uint16_t nb_desc,
1633                          unsigned int socket_id,
1634                          const struct rte_eth_rxconf *rx_conf,
1635                          struct rte_mempool *mp)
1636 {
1637         const struct rte_memzone *rz;
1638         struct igb_rx_queue *rxq;
1639         struct e1000_hw     *hw;
1640         unsigned int size;
1641
1642         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1643
1644         /*
1645          * Validate number of receive descriptors.
1646          * It must not exceed hardware maximum, and must be multiple
1647          * of E1000_ALIGN.
1648          */
1649         if (nb_desc % IGB_RXD_ALIGN != 0 ||
1650                         (nb_desc > E1000_MAX_RING_DESC) ||
1651                         (nb_desc < E1000_MIN_RING_DESC)) {
1652                 return -EINVAL;
1653         }
1654
1655         /* Free memory prior to re-allocation if needed */
1656         if (dev->data->rx_queues[queue_idx] != NULL) {
1657                 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1658                 dev->data->rx_queues[queue_idx] = NULL;
1659         }
1660
1661         /* First allocate the RX queue data structure. */
1662         rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1663                           RTE_CACHE_LINE_SIZE);
1664         if (rxq == NULL)
1665                 return -ENOMEM;
1666         rxq->mb_pool = mp;
1667         rxq->nb_rx_desc = nb_desc;
1668         rxq->pthresh = rx_conf->rx_thresh.pthresh;
1669         rxq->hthresh = rx_conf->rx_thresh.hthresh;
1670         rxq->wthresh = rx_conf->rx_thresh.wthresh;
1671         if (rxq->wthresh > 0 &&
1672             (hw->mac.type == e1000_82576 || hw->mac.type == e1000_vfadapt_i350))
1673                 rxq->wthresh = 1;
1674         rxq->drop_en = rx_conf->rx_drop_en;
1675         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1676         rxq->queue_id = queue_idx;
1677         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1678                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1679         rxq->port_id = dev->data->port_id;
1680         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
1681                                   ETHER_CRC_LEN);
1682
1683         /*
1684          *  Allocate RX ring hardware descriptors. A memzone large enough to
1685          *  handle the maximum ring size is allocated in order to allow for
1686          *  resizing in later calls to the queue setup function.
1687          */
1688         size = sizeof(union e1000_adv_rx_desc) * E1000_MAX_RING_DESC;
1689         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1690                                       E1000_ALIGN, socket_id);
1691         if (rz == NULL) {
1692                 igb_rx_queue_release(rxq);
1693                 return -ENOMEM;
1694         }
1695         rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1696         rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1697         rxq->rx_ring_phys_addr = rz->iova;
1698         rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1699
1700         /* Allocate software ring. */
1701         rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1702                                    sizeof(struct igb_rx_entry) * nb_desc,
1703                                    RTE_CACHE_LINE_SIZE);
1704         if (rxq->sw_ring == NULL) {
1705                 igb_rx_queue_release(rxq);
1706                 return -ENOMEM;
1707         }
1708         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1709                      rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1710
1711         dev->data->rx_queues[queue_idx] = rxq;
1712         igb_reset_rx_queue(rxq);
1713
1714         return 0;
1715 }
1716
1717 uint32_t
1718 eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1719 {
1720 #define IGB_RXQ_SCAN_INTERVAL 4
1721         volatile union e1000_adv_rx_desc *rxdp;
1722         struct igb_rx_queue *rxq;
1723         uint32_t desc = 0;
1724
1725         rxq = dev->data->rx_queues[rx_queue_id];
1726         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1727
1728         while ((desc < rxq->nb_rx_desc) &&
1729                 (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1730                 desc += IGB_RXQ_SCAN_INTERVAL;
1731                 rxdp += IGB_RXQ_SCAN_INTERVAL;
1732                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1733                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
1734                                 desc - rxq->nb_rx_desc]);
1735         }
1736
1737         return desc;
1738 }
1739
1740 int
1741 eth_igb_rx_descriptor_done(void *rx_queue, uint16_t offset)
1742 {
1743         volatile union e1000_adv_rx_desc *rxdp;
1744         struct igb_rx_queue *rxq = rx_queue;
1745         uint32_t desc;
1746
1747         if (unlikely(offset >= rxq->nb_rx_desc))
1748                 return 0;
1749         desc = rxq->rx_tail + offset;
1750         if (desc >= rxq->nb_rx_desc)
1751                 desc -= rxq->nb_rx_desc;
1752
1753         rxdp = &rxq->rx_ring[desc];
1754         return !!(rxdp->wb.upper.status_error & E1000_RXD_STAT_DD);
1755 }
1756
1757 int
1758 eth_igb_rx_descriptor_status(void *rx_queue, uint16_t offset)
1759 {
1760         struct igb_rx_queue *rxq = rx_queue;
1761         volatile uint32_t *status;
1762         uint32_t desc;
1763
1764         if (unlikely(offset >= rxq->nb_rx_desc))
1765                 return -EINVAL;
1766
1767         if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
1768                 return RTE_ETH_RX_DESC_UNAVAIL;
1769
1770         desc = rxq->rx_tail + offset;
1771         if (desc >= rxq->nb_rx_desc)
1772                 desc -= rxq->nb_rx_desc;
1773
1774         status = &rxq->rx_ring[desc].wb.upper.status_error;
1775         if (*status & rte_cpu_to_le_32(E1000_RXD_STAT_DD))
1776                 return RTE_ETH_RX_DESC_DONE;
1777
1778         return RTE_ETH_RX_DESC_AVAIL;
1779 }
1780
1781 int
1782 eth_igb_tx_descriptor_status(void *tx_queue, uint16_t offset)
1783 {
1784         struct igb_tx_queue *txq = tx_queue;
1785         volatile uint32_t *status;
1786         uint32_t desc;
1787
1788         if (unlikely(offset >= txq->nb_tx_desc))
1789                 return -EINVAL;
1790
1791         desc = txq->tx_tail + offset;
1792         if (desc >= txq->nb_tx_desc)
1793                 desc -= txq->nb_tx_desc;
1794
1795         status = &txq->tx_ring[desc].wb.status;
1796         if (*status & rte_cpu_to_le_32(E1000_TXD_STAT_DD))
1797                 return RTE_ETH_TX_DESC_DONE;
1798
1799         return RTE_ETH_TX_DESC_FULL;
1800 }
1801
1802 void
1803 igb_dev_clear_queues(struct rte_eth_dev *dev)
1804 {
1805         uint16_t i;
1806         struct igb_tx_queue *txq;
1807         struct igb_rx_queue *rxq;
1808
1809         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1810                 txq = dev->data->tx_queues[i];
1811                 if (txq != NULL) {
1812                         igb_tx_queue_release_mbufs(txq);
1813                         igb_reset_tx_queue(txq, dev);
1814                 }
1815         }
1816
1817         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1818                 rxq = dev->data->rx_queues[i];
1819                 if (rxq != NULL) {
1820                         igb_rx_queue_release_mbufs(rxq);
1821                         igb_reset_rx_queue(rxq);
1822                 }
1823         }
1824 }
1825
1826 void
1827 igb_dev_free_queues(struct rte_eth_dev *dev)
1828 {
1829         uint16_t i;
1830
1831         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1832                 eth_igb_rx_queue_release(dev->data->rx_queues[i]);
1833                 dev->data->rx_queues[i] = NULL;
1834         }
1835         dev->data->nb_rx_queues = 0;
1836
1837         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1838                 eth_igb_tx_queue_release(dev->data->tx_queues[i]);
1839                 dev->data->tx_queues[i] = NULL;
1840         }
1841         dev->data->nb_tx_queues = 0;
1842 }
1843
1844 /**
1845  * Receive Side Scaling (RSS).
1846  * See section 7.1.1.7 in the following document:
1847  *     "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1848  *
1849  * Principles:
1850  * The source and destination IP addresses of the IP header and the source and
1851  * destination ports of TCP/UDP headers, if any, of received packets are hashed
1852  * against a configurable random key to compute a 32-bit RSS hash result.
1853  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1854  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
1855  * RSS output index which is used as the RX queue index where to store the
1856  * received packets.
1857  * The following output is supplied in the RX write-back descriptor:
1858  *     - 32-bit result of the Microsoft RSS hash function,
1859  *     - 4-bit RSS type field.
1860  */
1861
1862 /*
1863  * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1864  * Used as the default key.
1865  */
1866 static uint8_t rss_intel_key[40] = {
1867         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1868         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1869         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1870         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1871         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1872 };
1873
1874 static void
1875 igb_rss_disable(struct rte_eth_dev *dev)
1876 {
1877         struct e1000_hw *hw;
1878         uint32_t mrqc;
1879
1880         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1881         mrqc = E1000_READ_REG(hw, E1000_MRQC);
1882         mrqc &= ~E1000_MRQC_ENABLE_MASK;
1883         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1884 }
1885
1886 static void
1887 igb_hw_rss_hash_set(struct e1000_hw *hw, struct rte_eth_rss_conf *rss_conf)
1888 {
1889         uint8_t  *hash_key;
1890         uint32_t rss_key;
1891         uint32_t mrqc;
1892         uint64_t rss_hf;
1893         uint16_t i;
1894
1895         hash_key = rss_conf->rss_key;
1896         if (hash_key != NULL) {
1897                 /* Fill in RSS hash key */
1898                 for (i = 0; i < 10; i++) {
1899                         rss_key  = hash_key[(i * 4)];
1900                         rss_key |= hash_key[(i * 4) + 1] << 8;
1901                         rss_key |= hash_key[(i * 4) + 2] << 16;
1902                         rss_key |= hash_key[(i * 4) + 3] << 24;
1903                         E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1904                 }
1905         }
1906
1907         /* Set configured hashing protocols in MRQC register */
1908         rss_hf = rss_conf->rss_hf;
1909         mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1910         if (rss_hf & ETH_RSS_IPV4)
1911                 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1912         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1913                 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1914         if (rss_hf & ETH_RSS_IPV6)
1915                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1916         if (rss_hf & ETH_RSS_IPV6_EX)
1917                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1918         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1919                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1920         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1921                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1922         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1923                 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1924         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1925                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1926         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1927                 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1928         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1929 }
1930
1931 int
1932 eth_igb_rss_hash_update(struct rte_eth_dev *dev,
1933                         struct rte_eth_rss_conf *rss_conf)
1934 {
1935         struct e1000_hw *hw;
1936         uint32_t mrqc;
1937         uint64_t rss_hf;
1938
1939         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1940
1941         /*
1942          * Before changing anything, first check that the update RSS operation
1943          * does not attempt to disable RSS, if RSS was enabled at
1944          * initialization time, or does not attempt to enable RSS, if RSS was
1945          * disabled at initialization time.
1946          */
1947         rss_hf = rss_conf->rss_hf & IGB_RSS_OFFLOAD_ALL;
1948         mrqc = E1000_READ_REG(hw, E1000_MRQC);
1949         if (!(mrqc & E1000_MRQC_ENABLE_MASK)) { /* RSS disabled */
1950                 if (rss_hf != 0) /* Enable RSS */
1951                         return -(EINVAL);
1952                 return 0; /* Nothing to do */
1953         }
1954         /* RSS enabled */
1955         if (rss_hf == 0) /* Disable RSS */
1956                 return -(EINVAL);
1957         igb_hw_rss_hash_set(hw, rss_conf);
1958         return 0;
1959 }
1960
1961 int eth_igb_rss_hash_conf_get(struct rte_eth_dev *dev,
1962                               struct rte_eth_rss_conf *rss_conf)
1963 {
1964         struct e1000_hw *hw;
1965         uint8_t *hash_key;
1966         uint32_t rss_key;
1967         uint32_t mrqc;
1968         uint64_t rss_hf;
1969         uint16_t i;
1970
1971         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1972         hash_key = rss_conf->rss_key;
1973         if (hash_key != NULL) {
1974                 /* Return RSS hash key */
1975                 for (i = 0; i < 10; i++) {
1976                         rss_key = E1000_READ_REG_ARRAY(hw, E1000_RSSRK(0), i);
1977                         hash_key[(i * 4)] = rss_key & 0x000000FF;
1978                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
1979                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
1980                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
1981                 }
1982         }
1983
1984         /* Get RSS functions configured in MRQC register */
1985         mrqc = E1000_READ_REG(hw, E1000_MRQC);
1986         if ((mrqc & E1000_MRQC_ENABLE_RSS_4Q) == 0) { /* RSS is disabled */
1987                 rss_conf->rss_hf = 0;
1988                 return 0;
1989         }
1990         rss_hf = 0;
1991         if (mrqc & E1000_MRQC_RSS_FIELD_IPV4)
1992                 rss_hf |= ETH_RSS_IPV4;
1993         if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_TCP)
1994                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
1995         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6)
1996                 rss_hf |= ETH_RSS_IPV6;
1997         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_EX)
1998                 rss_hf |= ETH_RSS_IPV6_EX;
1999         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP)
2000                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2001         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP_EX)
2002                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2003         if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_UDP)
2004                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2005         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP)
2006                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2007         if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP_EX)
2008                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2009         rss_conf->rss_hf = rss_hf;
2010         return 0;
2011 }
2012
2013 static void
2014 igb_rss_configure(struct rte_eth_dev *dev)
2015 {
2016         struct rte_eth_rss_conf rss_conf;
2017         struct e1000_hw *hw;
2018         uint32_t shift;
2019         uint16_t i;
2020
2021         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2022
2023         /* Fill in redirection table. */
2024         shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2025         for (i = 0; i < 128; i++) {
2026                 union e1000_reta {
2027                         uint32_t dword;
2028                         uint8_t  bytes[4];
2029                 } reta;
2030                 uint8_t q_idx;
2031
2032                 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
2033                                    i % dev->data->nb_rx_queues : 0);
2034                 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
2035                 if ((i & 3) == 3)
2036                         E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2037         }
2038
2039         /*
2040          * Configure the RSS key and the RSS protocols used to compute
2041          * the RSS hash of input packets.
2042          */
2043         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2044         if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2045                 igb_rss_disable(dev);
2046                 return;
2047         }
2048         if (rss_conf.rss_key == NULL)
2049                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2050         igb_hw_rss_hash_set(hw, &rss_conf);
2051 }
2052
2053 /*
2054  * Check if the mac type support VMDq or not.
2055  * Return 1 if it supports, otherwise, return 0.
2056  */
2057 static int
2058 igb_is_vmdq_supported(const struct rte_eth_dev *dev)
2059 {
2060         const struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2061
2062         switch (hw->mac.type) {
2063         case e1000_82576:
2064         case e1000_82580:
2065         case e1000_i350:
2066                 return 1;
2067         case e1000_82540:
2068         case e1000_82541:
2069         case e1000_82542:
2070         case e1000_82543:
2071         case e1000_82544:
2072         case e1000_82545:
2073         case e1000_82546:
2074         case e1000_82547:
2075         case e1000_82571:
2076         case e1000_82572:
2077         case e1000_82573:
2078         case e1000_82574:
2079         case e1000_82583:
2080         case e1000_i210:
2081         case e1000_i211:
2082         default:
2083                 PMD_INIT_LOG(ERR, "Cannot support VMDq feature");
2084                 return 0;
2085         }
2086 }
2087
2088 static int
2089 igb_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
2090 {
2091         struct rte_eth_vmdq_rx_conf *cfg;
2092         struct e1000_hw *hw;
2093         uint32_t mrqc, vt_ctl, vmolr, rctl;
2094         int i;
2095
2096         PMD_INIT_FUNC_TRACE();
2097
2098         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2099         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
2100
2101         /* Check if mac type can support VMDq, return value of 0 means NOT support */
2102         if (igb_is_vmdq_supported(dev) == 0)
2103                 return -1;
2104
2105         igb_rss_disable(dev);
2106
2107         /* RCTL: eanble VLAN filter */
2108         rctl = E1000_READ_REG(hw, E1000_RCTL);
2109         rctl |= E1000_RCTL_VFE;
2110         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2111
2112         /* MRQC: enable vmdq */
2113         mrqc = E1000_READ_REG(hw, E1000_MRQC);
2114         mrqc |= E1000_MRQC_ENABLE_VMDQ;
2115         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2116
2117         /* VTCTL:  pool selection according to VLAN tag */
2118         vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
2119         if (cfg->enable_default_pool)
2120                 vt_ctl |= (cfg->default_pool << E1000_VT_CTL_DEFAULT_POOL_SHIFT);
2121         vt_ctl |= E1000_VT_CTL_IGNORE_MAC;
2122         E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
2123
2124         for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2125                 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2126                 vmolr &= ~(E1000_VMOLR_AUPE | E1000_VMOLR_ROMPE |
2127                         E1000_VMOLR_ROPE | E1000_VMOLR_BAM |
2128                         E1000_VMOLR_MPME);
2129
2130                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_UNTAG)
2131                         vmolr |= E1000_VMOLR_AUPE;
2132                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_MC)
2133                         vmolr |= E1000_VMOLR_ROMPE;
2134                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_UC)
2135                         vmolr |= E1000_VMOLR_ROPE;
2136                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_BROADCAST)
2137                         vmolr |= E1000_VMOLR_BAM;
2138                 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_MULTICAST)
2139                         vmolr |= E1000_VMOLR_MPME;
2140
2141                 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2142         }
2143
2144         /*
2145          * VMOLR: set STRVLAN as 1 if IGMAC in VTCTL is set as 1
2146          * Both 82576 and 82580 support it
2147          */
2148         if (hw->mac.type != e1000_i350) {
2149                 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2150                         vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2151                         vmolr |= E1000_VMOLR_STRVLAN;
2152                         E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2153                 }
2154         }
2155
2156         /* VFTA - enable all vlan filters */
2157         for (i = 0; i < IGB_VFTA_SIZE; i++)
2158                 E1000_WRITE_REG(hw, (E1000_VFTA+(i*4)), UINT32_MAX);
2159
2160         /* VFRE: 8 pools enabling for rx, both 82576 and i350 support it */
2161         if (hw->mac.type != e1000_82580)
2162                 E1000_WRITE_REG(hw, E1000_VFRE, E1000_MBVFICR_VFREQ_MASK);
2163
2164         /*
2165          * RAH/RAL - allow pools to read specific mac addresses
2166          * In this case, all pools should be able to read from mac addr 0
2167          */
2168         E1000_WRITE_REG(hw, E1000_RAH(0), (E1000_RAH_AV | UINT16_MAX));
2169         E1000_WRITE_REG(hw, E1000_RAL(0), UINT32_MAX);
2170
2171         /* VLVF: set up filters for vlan tags as configured */
2172         for (i = 0; i < cfg->nb_pool_maps; i++) {
2173                 /* set vlan id in VF register and set the valid bit */
2174                 E1000_WRITE_REG(hw, E1000_VLVF(i), (E1000_VLVF_VLANID_ENABLE | \
2175                         (cfg->pool_map[i].vlan_id & ETH_VLAN_ID_MAX) | \
2176                         ((cfg->pool_map[i].pools << E1000_VLVF_POOLSEL_SHIFT ) & \
2177                         E1000_VLVF_POOLSEL_MASK)));
2178         }
2179
2180         E1000_WRITE_FLUSH(hw);
2181
2182         return 0;
2183 }
2184
2185
2186 /*********************************************************************
2187  *
2188  *  Enable receive unit.
2189  *
2190  **********************************************************************/
2191
2192 static int
2193 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2194 {
2195         struct igb_rx_entry *rxe = rxq->sw_ring;
2196         uint64_t dma_addr;
2197         unsigned i;
2198
2199         /* Initialize software ring entries. */
2200         for (i = 0; i < rxq->nb_rx_desc; i++) {
2201                 volatile union e1000_adv_rx_desc *rxd;
2202                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
2203
2204                 if (mbuf == NULL) {
2205                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
2206                                      "queue_id=%hu", rxq->queue_id);
2207                         return -ENOMEM;
2208                 }
2209                 dma_addr =
2210                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2211                 rxd = &rxq->rx_ring[i];
2212                 rxd->read.hdr_addr = 0;
2213                 rxd->read.pkt_addr = dma_addr;
2214                 rxe[i].mbuf = mbuf;
2215         }
2216
2217         return 0;
2218 }
2219
2220 #define E1000_MRQC_DEF_Q_SHIFT               (3)
2221 static int
2222 igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
2223 {
2224         struct e1000_hw *hw =
2225                 E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2226         uint32_t mrqc;
2227
2228         if (RTE_ETH_DEV_SRIOV(dev).active == ETH_8_POOLS) {
2229                 /*
2230                  * SRIOV active scheme
2231                  * FIXME if support RSS together with VMDq & SRIOV
2232                  */
2233                 mrqc = E1000_MRQC_ENABLE_VMDQ;
2234                 /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
2235                 mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
2236                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2237         } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
2238                 /*
2239                  * SRIOV inactive scheme
2240                  */
2241                 switch (dev->data->dev_conf.rxmode.mq_mode) {
2242                         case ETH_MQ_RX_RSS:
2243                                 igb_rss_configure(dev);
2244                                 break;
2245                         case ETH_MQ_RX_VMDQ_ONLY:
2246                                 /*Configure general VMDQ only RX parameters*/
2247                                 igb_vmdq_rx_hw_configure(dev);
2248                                 break;
2249                         case ETH_MQ_RX_NONE:
2250                                 /* if mq_mode is none, disable rss mode.*/
2251                         default:
2252                                 igb_rss_disable(dev);
2253                                 break;
2254                 }
2255         }
2256
2257         return 0;
2258 }
2259
2260 int
2261 eth_igb_rx_init(struct rte_eth_dev *dev)
2262 {
2263         struct e1000_hw     *hw;
2264         struct igb_rx_queue *rxq;
2265         uint32_t rctl;
2266         uint32_t rxcsum;
2267         uint32_t srrctl;
2268         uint16_t buf_size;
2269         uint16_t rctl_bsize;
2270         uint16_t i;
2271         int ret;
2272
2273         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2274         srrctl = 0;
2275
2276         /*
2277          * Make sure receives are disabled while setting
2278          * up the descriptor ring.
2279          */
2280         rctl = E1000_READ_REG(hw, E1000_RCTL);
2281         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2282
2283         /*
2284          * Configure support of jumbo frames, if any.
2285          */
2286         if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
2287                 rctl |= E1000_RCTL_LPE;
2288
2289                 /*
2290                  * Set maximum packet length by default, and might be updated
2291                  * together with enabling/disabling dual VLAN.
2292                  */
2293                 E1000_WRITE_REG(hw, E1000_RLPML,
2294                         dev->data->dev_conf.rxmode.max_rx_pkt_len +
2295                                                 VLAN_TAG_SIZE);
2296         } else
2297                 rctl &= ~E1000_RCTL_LPE;
2298
2299         /* Configure and enable each RX queue. */
2300         rctl_bsize = 0;
2301         dev->rx_pkt_burst = eth_igb_recv_pkts;
2302         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2303                 uint64_t bus_addr;
2304                 uint32_t rxdctl;
2305
2306                 rxq = dev->data->rx_queues[i];
2307
2308                 rxq->flags = 0;
2309                 /*
2310                  * i350 and i354 vlan packets have vlan tags byte swapped.
2311                  */
2312                 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i354) {
2313                         rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2314                         PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2315                 } else {
2316                         PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2317                 }
2318
2319                 /* Allocate buffers for descriptor rings and set up queue */
2320                 ret = igb_alloc_rx_queue_mbufs(rxq);
2321                 if (ret)
2322                         return ret;
2323
2324                 /*
2325                  * Reset crc_len in case it was changed after queue setup by a
2326                  *  call to configure
2327                  */
2328                 rxq->crc_len =
2329                         (uint8_t)(dev->data->dev_conf.rxmode.hw_strip_crc ?
2330                                                         0 : ETHER_CRC_LEN);
2331
2332                 bus_addr = rxq->rx_ring_phys_addr;
2333                 E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
2334                                 rxq->nb_rx_desc *
2335                                 sizeof(union e1000_adv_rx_desc));
2336                 E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
2337                                 (uint32_t)(bus_addr >> 32));
2338                 E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
2339
2340                 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2341
2342                 /*
2343                  * Configure RX buffer size.
2344                  */
2345                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2346                         RTE_PKTMBUF_HEADROOM);
2347                 if (buf_size >= 1024) {
2348                         /*
2349                          * Configure the BSIZEPACKET field of the SRRCTL
2350                          * register of the queue.
2351                          * Value is in 1 KB resolution, from 1 KB to 127 KB.
2352                          * If this field is equal to 0b, then RCTL.BSIZE
2353                          * determines the RX packet buffer size.
2354                          */
2355                         srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2356                                    E1000_SRRCTL_BSIZEPKT_MASK);
2357                         buf_size = (uint16_t) ((srrctl &
2358                                                 E1000_SRRCTL_BSIZEPKT_MASK) <<
2359                                                E1000_SRRCTL_BSIZEPKT_SHIFT);
2360
2361                         /* It adds dual VLAN length for supporting dual VLAN */
2362                         if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2363                                                 2 * VLAN_TAG_SIZE) > buf_size){
2364                                 if (!dev->data->scattered_rx)
2365                                         PMD_INIT_LOG(DEBUG,
2366                                                      "forcing scatter mode");
2367                                 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2368                                 dev->data->scattered_rx = 1;
2369                         }
2370                 } else {
2371                         /*
2372                          * Use BSIZE field of the device RCTL register.
2373                          */
2374                         if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2375                                 rctl_bsize = buf_size;
2376                         if (!dev->data->scattered_rx)
2377                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2378                         dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2379                         dev->data->scattered_rx = 1;
2380                 }
2381
2382                 /* Set if packets are dropped when no descriptors available */
2383                 if (rxq->drop_en)
2384                         srrctl |= E1000_SRRCTL_DROP_EN;
2385
2386                 E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
2387
2388                 /* Enable this RX queue. */
2389                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
2390                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2391                 rxdctl &= 0xFFF00000;
2392                 rxdctl |= (rxq->pthresh & 0x1F);
2393                 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2394                 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2395                 E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
2396         }
2397
2398         if (dev->data->dev_conf.rxmode.enable_scatter) {
2399                 if (!dev->data->scattered_rx)
2400                         PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2401                 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2402                 dev->data->scattered_rx = 1;
2403         }
2404
2405         /*
2406          * Setup BSIZE field of RCTL register, if needed.
2407          * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
2408          * register, since the code above configures the SRRCTL register of
2409          * the RX queue in such a case.
2410          * All configurable sizes are:
2411          * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
2412          *  8192: rctl |= (E1000_RCTL_SZ_8192  | E1000_RCTL_BSEX);
2413          *  4096: rctl |= (E1000_RCTL_SZ_4096  | E1000_RCTL_BSEX);
2414          *  2048: rctl |= E1000_RCTL_SZ_2048;
2415          *  1024: rctl |= E1000_RCTL_SZ_1024;
2416          *   512: rctl |= E1000_RCTL_SZ_512;
2417          *   256: rctl |= E1000_RCTL_SZ_256;
2418          */
2419         if (rctl_bsize > 0) {
2420                 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
2421                         rctl |= E1000_RCTL_SZ_512;
2422                 else /* 256 <= buf_size < 512 - use 256 */
2423                         rctl |= E1000_RCTL_SZ_256;
2424         }
2425
2426         /*
2427          * Configure RSS if device configured with multiple RX queues.
2428          */
2429         igb_dev_mq_rx_configure(dev);
2430
2431         /* Update the rctl since igb_dev_mq_rx_configure may change its value */
2432         rctl |= E1000_READ_REG(hw, E1000_RCTL);
2433
2434         /*
2435          * Setup the Checksum Register.
2436          * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
2437          */
2438         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2439         rxcsum |= E1000_RXCSUM_PCSD;
2440
2441         /* Enable both L3/L4 rx checksum offload */
2442         if (dev->data->dev_conf.rxmode.hw_ip_checksum)
2443                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL |
2444                                 E1000_RXCSUM_CRCOFL);
2445         else
2446                 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL |
2447                                 E1000_RXCSUM_CRCOFL);
2448         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2449
2450         /* Setup the Receive Control Register. */
2451         if (dev->data->dev_conf.rxmode.hw_strip_crc) {
2452                 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
2453
2454                 /* set STRCRC bit in all queues */
2455                 if (hw->mac.type == e1000_i350 ||
2456                     hw->mac.type == e1000_i210 ||
2457                     hw->mac.type == e1000_i211 ||
2458                     hw->mac.type == e1000_i354) {
2459                         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2460                                 rxq = dev->data->rx_queues[i];
2461                                 uint32_t dvmolr = E1000_READ_REG(hw,
2462                                         E1000_DVMOLR(rxq->reg_idx));
2463                                 dvmolr |= E1000_DVMOLR_STRCRC;
2464                                 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2465                         }
2466                 }
2467         } else {
2468                 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
2469
2470                 /* clear STRCRC bit in all queues */
2471                 if (hw->mac.type == e1000_i350 ||
2472                     hw->mac.type == e1000_i210 ||
2473                     hw->mac.type == e1000_i211 ||
2474                     hw->mac.type == e1000_i354) {
2475                         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2476                                 rxq = dev->data->rx_queues[i];
2477                                 uint32_t dvmolr = E1000_READ_REG(hw,
2478                                         E1000_DVMOLR(rxq->reg_idx));
2479                                 dvmolr &= ~E1000_DVMOLR_STRCRC;
2480                                 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2481                         }
2482                 }
2483         }
2484
2485         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2486         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2487                 E1000_RCTL_RDMTS_HALF |
2488                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2489
2490         /* Make sure VLAN Filters are off. */
2491         if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_VMDQ_ONLY)
2492                 rctl &= ~E1000_RCTL_VFE;
2493         /* Don't store bad packets. */
2494         rctl &= ~E1000_RCTL_SBP;
2495
2496         /* Enable Receives. */
2497         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2498
2499         /*
2500          * Setup the HW Rx Head and Tail Descriptor Pointers.
2501          * This needs to be done after enable.
2502          */
2503         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2504                 rxq = dev->data->rx_queues[i];
2505                 E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
2506                 E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
2507         }
2508
2509         return 0;
2510 }
2511
2512 /*********************************************************************
2513  *
2514  *  Enable transmit unit.
2515  *
2516  **********************************************************************/
2517 void
2518 eth_igb_tx_init(struct rte_eth_dev *dev)
2519 {
2520         struct e1000_hw     *hw;
2521         struct igb_tx_queue *txq;
2522         uint32_t tctl;
2523         uint32_t txdctl;
2524         uint16_t i;
2525
2526         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2527
2528         /* Setup the Base and Length of the Tx Descriptor Rings. */
2529         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2530                 uint64_t bus_addr;
2531                 txq = dev->data->tx_queues[i];
2532                 bus_addr = txq->tx_ring_phys_addr;
2533
2534                 E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
2535                                 txq->nb_tx_desc *
2536                                 sizeof(union e1000_adv_tx_desc));
2537                 E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
2538                                 (uint32_t)(bus_addr >> 32));
2539                 E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
2540
2541                 /* Setup the HW Tx Head and Tail descriptor pointers. */
2542                 E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
2543                 E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
2544
2545                 /* Setup Transmit threshold registers. */
2546                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
2547                 txdctl |= txq->pthresh & 0x1F;
2548                 txdctl |= ((txq->hthresh & 0x1F) << 8);
2549                 txdctl |= ((txq->wthresh & 0x1F) << 16);
2550                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2551                 E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
2552         }
2553
2554         /* Program the Transmit Control Register. */
2555         tctl = E1000_READ_REG(hw, E1000_TCTL);
2556         tctl &= ~E1000_TCTL_CT;
2557         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2558                  (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2559
2560         e1000_config_collision_dist(hw);
2561
2562         /* This write will effectively turn on the transmit unit. */
2563         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2564 }
2565
2566 /*********************************************************************
2567  *
2568  *  Enable VF receive unit.
2569  *
2570  **********************************************************************/
2571 int
2572 eth_igbvf_rx_init(struct rte_eth_dev *dev)
2573 {
2574         struct e1000_hw     *hw;
2575         struct igb_rx_queue *rxq;
2576         uint32_t srrctl;
2577         uint16_t buf_size;
2578         uint16_t rctl_bsize;
2579         uint16_t i;
2580         int ret;
2581
2582         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2583
2584         /* setup MTU */
2585         e1000_rlpml_set_vf(hw,
2586                 (uint16_t)(dev->data->dev_conf.rxmode.max_rx_pkt_len +
2587                 VLAN_TAG_SIZE));
2588
2589         /* Configure and enable each RX queue. */
2590         rctl_bsize = 0;
2591         dev->rx_pkt_burst = eth_igb_recv_pkts;
2592         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2593                 uint64_t bus_addr;
2594                 uint32_t rxdctl;
2595
2596                 rxq = dev->data->rx_queues[i];
2597
2598                 rxq->flags = 0;
2599                 /*
2600                  * i350VF LB vlan packets have vlan tags byte swapped.
2601                  */
2602                 if (hw->mac.type == e1000_vfadapt_i350) {
2603                         rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2604                         PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2605                 } else {
2606                         PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2607                 }
2608
2609                 /* Allocate buffers for descriptor rings and set up queue */
2610                 ret = igb_alloc_rx_queue_mbufs(rxq);
2611                 if (ret)
2612                         return ret;
2613
2614                 bus_addr = rxq->rx_ring_phys_addr;
2615                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
2616                                 rxq->nb_rx_desc *
2617                                 sizeof(union e1000_adv_rx_desc));
2618                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
2619                                 (uint32_t)(bus_addr >> 32));
2620                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
2621
2622                 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2623
2624                 /*
2625                  * Configure RX buffer size.
2626                  */
2627                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2628                         RTE_PKTMBUF_HEADROOM);
2629                 if (buf_size >= 1024) {
2630                         /*
2631                          * Configure the BSIZEPACKET field of the SRRCTL
2632                          * register of the queue.
2633                          * Value is in 1 KB resolution, from 1 KB to 127 KB.
2634                          * If this field is equal to 0b, then RCTL.BSIZE
2635                          * determines the RX packet buffer size.
2636                          */
2637                         srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2638                                    E1000_SRRCTL_BSIZEPKT_MASK);
2639                         buf_size = (uint16_t) ((srrctl &
2640                                                 E1000_SRRCTL_BSIZEPKT_MASK) <<
2641                                                E1000_SRRCTL_BSIZEPKT_SHIFT);
2642
2643                         /* It adds dual VLAN length for supporting dual VLAN */
2644                         if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2645                                                 2 * VLAN_TAG_SIZE) > buf_size){
2646                                 if (!dev->data->scattered_rx)
2647                                         PMD_INIT_LOG(DEBUG,
2648                                                      "forcing scatter mode");
2649                                 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2650                                 dev->data->scattered_rx = 1;
2651                         }
2652                 } else {
2653                         /*
2654                          * Use BSIZE field of the device RCTL register.
2655                          */
2656                         if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2657                                 rctl_bsize = buf_size;
2658                         if (!dev->data->scattered_rx)
2659                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2660                         dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2661                         dev->data->scattered_rx = 1;
2662                 }
2663
2664                 /* Set if packets are dropped when no descriptors available */
2665                 if (rxq->drop_en)
2666                         srrctl |= E1000_SRRCTL_DROP_EN;
2667
2668                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
2669
2670                 /* Enable this RX queue. */
2671                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
2672                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2673                 rxdctl &= 0xFFF00000;
2674                 rxdctl |= (rxq->pthresh & 0x1F);
2675                 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2676                 if (hw->mac.type == e1000_vfadapt) {
2677                         /*
2678                          * Workaround of 82576 VF Erratum
2679                          * force set WTHRESH to 1
2680                          * to avoid Write-Back not triggered sometimes
2681                          */
2682                         rxdctl |= 0x10000;
2683                         PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !");
2684                 }
2685                 else
2686                         rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2687                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2688         }
2689
2690         if (dev->data->dev_conf.rxmode.enable_scatter) {
2691                 if (!dev->data->scattered_rx)
2692                         PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2693                 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2694                 dev->data->scattered_rx = 1;
2695         }
2696
2697         /*
2698          * Setup the HW Rx Head and Tail Descriptor Pointers.
2699          * This needs to be done after enable.
2700          */
2701         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2702                 rxq = dev->data->rx_queues[i];
2703                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2704                 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2705         }
2706
2707         return 0;
2708 }
2709
2710 /*********************************************************************
2711  *
2712  *  Enable VF transmit unit.
2713  *
2714  **********************************************************************/
2715 void
2716 eth_igbvf_tx_init(struct rte_eth_dev *dev)
2717 {
2718         struct e1000_hw     *hw;
2719         struct igb_tx_queue *txq;
2720         uint32_t txdctl;
2721         uint16_t i;
2722
2723         hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2724
2725         /* Setup the Base and Length of the Tx Descriptor Rings. */
2726         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2727                 uint64_t bus_addr;
2728
2729                 txq = dev->data->tx_queues[i];
2730                 bus_addr = txq->tx_ring_phys_addr;
2731                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2732                                 txq->nb_tx_desc *
2733                                 sizeof(union e1000_adv_tx_desc));
2734                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2735                                 (uint32_t)(bus_addr >> 32));
2736                 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2737
2738                 /* Setup the HW Tx Head and Tail descriptor pointers. */
2739                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2740                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2741
2742                 /* Setup Transmit threshold registers. */
2743                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2744                 txdctl |= txq->pthresh & 0x1F;
2745                 txdctl |= ((txq->hthresh & 0x1F) << 8);
2746                 if (hw->mac.type == e1000_82576) {
2747                         /*
2748                          * Workaround of 82576 VF Erratum
2749                          * force set WTHRESH to 1
2750                          * to avoid Write-Back not triggered sometimes
2751                          */
2752                         txdctl |= 0x10000;
2753                         PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !");
2754                 }
2755                 else
2756                         txdctl |= ((txq->wthresh & 0x1F) << 16);
2757                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2758                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
2759         }
2760
2761 }
2762
2763 void
2764 igb_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2765         struct rte_eth_rxq_info *qinfo)
2766 {
2767         struct igb_rx_queue *rxq;
2768
2769         rxq = dev->data->rx_queues[queue_id];
2770
2771         qinfo->mp = rxq->mb_pool;
2772         qinfo->scattered_rx = dev->data->scattered_rx;
2773         qinfo->nb_desc = rxq->nb_rx_desc;
2774
2775         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2776         qinfo->conf.rx_drop_en = rxq->drop_en;
2777 }
2778
2779 void
2780 igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2781         struct rte_eth_txq_info *qinfo)
2782 {
2783         struct igb_tx_queue *txq;
2784
2785         txq = dev->data->tx_queues[queue_id];
2786
2787         qinfo->nb_desc = txq->nb_tx_desc;
2788
2789         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2790         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2791         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2792 }