Imported Upstream version 16.11
[deb_dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73
74 #include "ixgbe_logs.h"
75 #include "base/ixgbe_api.h"
76 #include "base/ixgbe_vf.h"
77 #include "ixgbe_ethdev.h"
78 #include "base/ixgbe_dcb.h"
79 #include "base/ixgbe_common.h"
80 #include "ixgbe_rxtx.h"
81
82 /* Bit Mask to indicate what bits required for building TX context */
83 #define IXGBE_TX_OFFLOAD_MASK (                  \
84                 PKT_TX_VLAN_PKT |                \
85                 PKT_TX_IP_CKSUM |                \
86                 PKT_TX_L4_MASK |                 \
87                 PKT_TX_TCP_SEG |                 \
88                 PKT_TX_OUTER_IP_CKSUM)
89
90 #if 1
91 #define RTE_PMD_USE_PREFETCH
92 #endif
93
94 #ifdef RTE_PMD_USE_PREFETCH
95 /*
96  * Prefetch a cache line into all cache levels.
97  */
98 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
99 #else
100 #define rte_ixgbe_prefetch(p)   do {} while (0)
101 #endif
102
103 /*********************************************************************
104  *
105  *  TX functions
106  *
107  **********************************************************************/
108
109 /*
110  * Check for descriptors with their DD bit set and free mbufs.
111  * Return the total number of buffers freed.
112  */
113 static inline int __attribute__((always_inline))
114 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
115 {
116         struct ixgbe_tx_entry *txep;
117         uint32_t status;
118         int i, nb_free = 0;
119         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
120
121         /* check DD bit on threshold descriptor */
122         status = txq->tx_ring[txq->tx_next_dd].wb.status;
123         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
124                 return 0;
125
126         /*
127          * first buffer to free from S/W ring is at index
128          * tx_next_dd - (tx_rs_thresh-1)
129          */
130         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
131
132         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
133                 /* free buffers one at a time */
134                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
135                 txep->mbuf = NULL;
136
137                 if (unlikely(m == NULL))
138                         continue;
139
140                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
141                     (nb_free > 0 && m->pool != free[0]->pool)) {
142                         rte_mempool_put_bulk(free[0]->pool,
143                                              (void **)free, nb_free);
144                         nb_free = 0;
145                 }
146
147                 free[nb_free++] = m;
148         }
149
150         if (nb_free > 0)
151                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
152
153         /* buffers were freed, update counters */
154         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
155         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
156         if (txq->tx_next_dd >= txq->nb_tx_desc)
157                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
158
159         return txq->tx_rs_thresh;
160 }
161
162 /* Populate 4 descriptors with data from 4 mbufs */
163 static inline void
164 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
165 {
166         uint64_t buf_dma_addr;
167         uint32_t pkt_len;
168         int i;
169
170         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
171                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
172                 pkt_len = (*pkts)->data_len;
173
174                 /* write data to descriptor */
175                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
176
177                 txdp->read.cmd_type_len =
178                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
179
180                 txdp->read.olinfo_status =
181                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
182
183                 rte_prefetch0(&(*pkts)->pool);
184         }
185 }
186
187 /* Populate 1 descriptor with data from 1 mbuf */
188 static inline void
189 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
190 {
191         uint64_t buf_dma_addr;
192         uint32_t pkt_len;
193
194         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
195         pkt_len = (*pkts)->data_len;
196
197         /* write data to descriptor */
198         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
199         txdp->read.cmd_type_len =
200                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
201         txdp->read.olinfo_status =
202                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
203         rte_prefetch0(&(*pkts)->pool);
204 }
205
206 /*
207  * Fill H/W descriptor ring with mbuf data.
208  * Copy mbuf pointers to the S/W ring.
209  */
210 static inline void
211 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
212                       uint16_t nb_pkts)
213 {
214         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
215         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
216         const int N_PER_LOOP = 4;
217         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
218         int mainpart, leftover;
219         int i, j;
220
221         /*
222          * Process most of the packets in chunks of N pkts.  Any
223          * leftover packets will get processed one at a time.
224          */
225         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
226         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
227         for (i = 0; i < mainpart; i += N_PER_LOOP) {
228                 /* Copy N mbuf pointers to the S/W ring */
229                 for (j = 0; j < N_PER_LOOP; ++j) {
230                         (txep + i + j)->mbuf = *(pkts + i + j);
231                 }
232                 tx4(txdp + i, pkts + i);
233         }
234
235         if (unlikely(leftover > 0)) {
236                 for (i = 0; i < leftover; ++i) {
237                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
238                         tx1(txdp + mainpart + i, pkts + mainpart + i);
239                 }
240         }
241 }
242
243 static inline uint16_t
244 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
245              uint16_t nb_pkts)
246 {
247         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
248         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
249         uint16_t n = 0;
250
251         /*
252          * Begin scanning the H/W ring for done descriptors when the
253          * number of available descriptors drops below tx_free_thresh.  For
254          * each done descriptor, free the associated buffer.
255          */
256         if (txq->nb_tx_free < txq->tx_free_thresh)
257                 ixgbe_tx_free_bufs(txq);
258
259         /* Only use descriptors that are available */
260         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
261         if (unlikely(nb_pkts == 0))
262                 return 0;
263
264         /* Use exactly nb_pkts descriptors */
265         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
266
267         /*
268          * At this point, we know there are enough descriptors in the
269          * ring to transmit all the packets.  This assumes that each
270          * mbuf contains a single segment, and that no new offloads
271          * are expected, which would require a new context descriptor.
272          */
273
274         /*
275          * See if we're going to wrap-around. If so, handle the top
276          * of the descriptor ring first, then do the bottom.  If not,
277          * the processing looks just like the "bottom" part anyway...
278          */
279         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
280                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
281                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
282
283                 /*
284                  * We know that the last descriptor in the ring will need to
285                  * have its RS bit set because tx_rs_thresh has to be
286                  * a divisor of the ring size
287                  */
288                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
289                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
290                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
291
292                 txq->tx_tail = 0;
293         }
294
295         /* Fill H/W descriptor ring with mbuf data */
296         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
297         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
298
299         /*
300          * Determine if RS bit should be set
301          * This is what we actually want:
302          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
303          * but instead of subtracting 1 and doing >=, we can just do
304          * greater than without subtracting.
305          */
306         if (txq->tx_tail > txq->tx_next_rs) {
307                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
308                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
309                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
310                                                 txq->tx_rs_thresh);
311                 if (txq->tx_next_rs >= txq->nb_tx_desc)
312                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
313         }
314
315         /*
316          * Check for wrap-around. This would only happen if we used
317          * up to the last descriptor in the ring, no more, no less.
318          */
319         if (txq->tx_tail >= txq->nb_tx_desc)
320                 txq->tx_tail = 0;
321
322         /* update tail pointer */
323         rte_wmb();
324         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
325
326         return nb_pkts;
327 }
328
329 uint16_t
330 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
331                        uint16_t nb_pkts)
332 {
333         uint16_t nb_tx;
334
335         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
336         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
337                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
338
339         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
340         nb_tx = 0;
341         while (nb_pkts) {
342                 uint16_t ret, n;
343
344                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
345                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
346                 nb_tx = (uint16_t)(nb_tx + ret);
347                 nb_pkts = (uint16_t)(nb_pkts - ret);
348                 if (ret < n)
349                         break;
350         }
351
352         return nb_tx;
353 }
354
355 static inline void
356 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
357                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
358                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
359 {
360         uint32_t type_tucmd_mlhl;
361         uint32_t mss_l4len_idx = 0;
362         uint32_t ctx_idx;
363         uint32_t vlan_macip_lens;
364         union ixgbe_tx_offload tx_offload_mask;
365         uint32_t seqnum_seed = 0;
366
367         ctx_idx = txq->ctx_curr;
368         tx_offload_mask.data[0] = 0;
369         tx_offload_mask.data[1] = 0;
370         type_tucmd_mlhl = 0;
371
372         /* Specify which HW CTX to upload. */
373         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
374
375         if (ol_flags & PKT_TX_VLAN_PKT) {
376                 tx_offload_mask.vlan_tci |= ~0;
377         }
378
379         /* check if TCP segmentation required for this packet */
380         if (ol_flags & PKT_TX_TCP_SEG) {
381                 /* implies IP cksum in IPv4 */
382                 if (ol_flags & PKT_TX_IP_CKSUM)
383                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
384                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
385                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
386                 else
387                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
388                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
390
391                 tx_offload_mask.l2_len |= ~0;
392                 tx_offload_mask.l3_len |= ~0;
393                 tx_offload_mask.l4_len |= ~0;
394                 tx_offload_mask.tso_segsz |= ~0;
395                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
396                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
397         } else { /* no TSO, check if hardware checksum is needed */
398                 if (ol_flags & PKT_TX_IP_CKSUM) {
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
400                         tx_offload_mask.l2_len |= ~0;
401                         tx_offload_mask.l3_len |= ~0;
402                 }
403
404                 switch (ol_flags & PKT_TX_L4_MASK) {
405                 case PKT_TX_UDP_CKSUM:
406                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
407                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
408                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
409                         tx_offload_mask.l2_len |= ~0;
410                         tx_offload_mask.l3_len |= ~0;
411                         break;
412                 case PKT_TX_TCP_CKSUM:
413                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
414                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
415                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
416                         tx_offload_mask.l2_len |= ~0;
417                         tx_offload_mask.l3_len |= ~0;
418                         break;
419                 case PKT_TX_SCTP_CKSUM:
420                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
421                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
422                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
423                         tx_offload_mask.l2_len |= ~0;
424                         tx_offload_mask.l3_len |= ~0;
425                         break;
426                 default:
427                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
428                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
429                         break;
430                 }
431         }
432
433         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
434                 tx_offload_mask.outer_l2_len |= ~0;
435                 tx_offload_mask.outer_l3_len |= ~0;
436                 tx_offload_mask.l2_len |= ~0;
437                 seqnum_seed |= tx_offload.outer_l3_len
438                                << IXGBE_ADVTXD_OUTER_IPLEN;
439                 seqnum_seed |= tx_offload.l2_len
440                                << IXGBE_ADVTXD_TUNNEL_LEN;
441         }
442
443         txq->ctx_cache[ctx_idx].flags = ol_flags;
444         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
445                 tx_offload_mask.data[0] & tx_offload.data[0];
446         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
447                 tx_offload_mask.data[1] & tx_offload.data[1];
448         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
449
450         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
451         vlan_macip_lens = tx_offload.l3_len;
452         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
453                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
454                                     IXGBE_ADVTXD_MACLEN_SHIFT);
455         else
456                 vlan_macip_lens |= (tx_offload.l2_len <<
457                                     IXGBE_ADVTXD_MACLEN_SHIFT);
458         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
459         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
460         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
461         ctx_txd->seqnum_seed     = seqnum_seed;
462 }
463
464 /*
465  * Check which hardware context can be used. Use the existing match
466  * or create a new context descriptor.
467  */
468 static inline uint32_t
469 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
470                    union ixgbe_tx_offload tx_offload)
471 {
472         /* If match with the current used context */
473         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
474                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
475                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
476                      & tx_offload.data[0])) &&
477                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
478                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
479                      & tx_offload.data[1]))))
480                 return txq->ctx_curr;
481
482         /* What if match with the next context  */
483         txq->ctx_curr ^= 1;
484         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
485                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
486                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
487                      & tx_offload.data[0])) &&
488                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
489                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
490                      & tx_offload.data[1]))))
491                 return txq->ctx_curr;
492
493         /* Mismatch, use the previous context */
494         return IXGBE_CTX_NUM;
495 }
496
497 static inline uint32_t
498 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
499 {
500         uint32_t tmp = 0;
501
502         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
503                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
504         if (ol_flags & PKT_TX_IP_CKSUM)
505                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
506         if (ol_flags & PKT_TX_TCP_SEG)
507                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
508         return tmp;
509 }
510
511 static inline uint32_t
512 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
513 {
514         uint32_t cmdtype = 0;
515
516         if (ol_flags & PKT_TX_VLAN_PKT)
517                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
518         if (ol_flags & PKT_TX_TCP_SEG)
519                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
520         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
521                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
522         return cmdtype;
523 }
524
525 /* Default RS bit threshold values */
526 #ifndef DEFAULT_TX_RS_THRESH
527 #define DEFAULT_TX_RS_THRESH   32
528 #endif
529 #ifndef DEFAULT_TX_FREE_THRESH
530 #define DEFAULT_TX_FREE_THRESH 32
531 #endif
532
533 /* Reset transmit descriptors after they have been used */
534 static inline int
535 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
536 {
537         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
538         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
539         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
540         uint16_t nb_tx_desc = txq->nb_tx_desc;
541         uint16_t desc_to_clean_to;
542         uint16_t nb_tx_to_clean;
543         uint32_t status;
544
545         /* Determine the last descriptor needing to be cleaned */
546         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
547         if (desc_to_clean_to >= nb_tx_desc)
548                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
549
550         /* Check to make sure the last descriptor to clean is done */
551         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
552         status = txr[desc_to_clean_to].wb.status;
553         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
554                 PMD_TX_FREE_LOG(DEBUG,
555                                 "TX descriptor %4u is not done"
556                                 "(port=%d queue=%d)",
557                                 desc_to_clean_to,
558                                 txq->port_id, txq->queue_id);
559                 /* Failed to clean any descriptors, better luck next time */
560                 return -(1);
561         }
562
563         /* Figure out how many descriptors will be cleaned */
564         if (last_desc_cleaned > desc_to_clean_to)
565                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
566                                                         desc_to_clean_to);
567         else
568                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
569                                                 last_desc_cleaned);
570
571         PMD_TX_FREE_LOG(DEBUG,
572                         "Cleaning %4u TX descriptors: %4u to %4u "
573                         "(port=%d queue=%d)",
574                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
575                         txq->port_id, txq->queue_id);
576
577         /*
578          * The last descriptor to clean is done, so that means all the
579          * descriptors from the last descriptor that was cleaned
580          * up to the last descriptor with the RS bit set
581          * are done. Only reset the threshold descriptor.
582          */
583         txr[desc_to_clean_to].wb.status = 0;
584
585         /* Update the txq to reflect the last descriptor that was cleaned */
586         txq->last_desc_cleaned = desc_to_clean_to;
587         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
588
589         /* No Error */
590         return 0;
591 }
592
593 uint16_t
594 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
595                 uint16_t nb_pkts)
596 {
597         struct ixgbe_tx_queue *txq;
598         struct ixgbe_tx_entry *sw_ring;
599         struct ixgbe_tx_entry *txe, *txn;
600         volatile union ixgbe_adv_tx_desc *txr;
601         volatile union ixgbe_adv_tx_desc *txd, *txp;
602         struct rte_mbuf     *tx_pkt;
603         struct rte_mbuf     *m_seg;
604         uint64_t buf_dma_addr;
605         uint32_t olinfo_status;
606         uint32_t cmd_type_len;
607         uint32_t pkt_len;
608         uint16_t slen;
609         uint64_t ol_flags;
610         uint16_t tx_id;
611         uint16_t tx_last;
612         uint16_t nb_tx;
613         uint16_t nb_used;
614         uint64_t tx_ol_req;
615         uint32_t ctx = 0;
616         uint32_t new_ctx;
617         union ixgbe_tx_offload tx_offload;
618
619         tx_offload.data[0] = 0;
620         tx_offload.data[1] = 0;
621         txq = tx_queue;
622         sw_ring = txq->sw_ring;
623         txr     = txq->tx_ring;
624         tx_id   = txq->tx_tail;
625         txe = &sw_ring[tx_id];
626         txp = NULL;
627
628         /* Determine if the descriptor ring needs to be cleaned. */
629         if (txq->nb_tx_free < txq->tx_free_thresh)
630                 ixgbe_xmit_cleanup(txq);
631
632         rte_prefetch0(&txe->mbuf->pool);
633
634         /* TX loop */
635         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
636                 new_ctx = 0;
637                 tx_pkt = *tx_pkts++;
638                 pkt_len = tx_pkt->pkt_len;
639
640                 /*
641                  * Determine how many (if any) context descriptors
642                  * are needed for offload functionality.
643                  */
644                 ol_flags = tx_pkt->ol_flags;
645
646                 /* If hardware offload required */
647                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
648                 if (tx_ol_req) {
649                         tx_offload.l2_len = tx_pkt->l2_len;
650                         tx_offload.l3_len = tx_pkt->l3_len;
651                         tx_offload.l4_len = tx_pkt->l4_len;
652                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
653                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
654                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
655                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
656
657                         /* If new context need be built or reuse the exist ctx. */
658                         ctx = what_advctx_update(txq, tx_ol_req,
659                                 tx_offload);
660                         /* Only allocate context descriptor if required*/
661                         new_ctx = (ctx == IXGBE_CTX_NUM);
662                         ctx = txq->ctx_curr;
663                 }
664
665                 /*
666                  * Keep track of how many descriptors are used this loop
667                  * This will always be the number of segments + the number of
668                  * Context descriptors required to transmit the packet
669                  */
670                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
671
672                 if (txp != NULL &&
673                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
674                         /* set RS on the previous packet in the burst */
675                         txp->read.cmd_type_len |=
676                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
677
678                 /*
679                  * The number of descriptors that must be allocated for a
680                  * packet is the number of segments of that packet, plus 1
681                  * Context Descriptor for the hardware offload, if any.
682                  * Determine the last TX descriptor to allocate in the TX ring
683                  * for the packet, starting from the current position (tx_id)
684                  * in the ring.
685                  */
686                 tx_last = (uint16_t) (tx_id + nb_used - 1);
687
688                 /* Circular ring */
689                 if (tx_last >= txq->nb_tx_desc)
690                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
691
692                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
693                            " tx_first=%u tx_last=%u",
694                            (unsigned) txq->port_id,
695                            (unsigned) txq->queue_id,
696                            (unsigned) pkt_len,
697                            (unsigned) tx_id,
698                            (unsigned) tx_last);
699
700                 /*
701                  * Make sure there are enough TX descriptors available to
702                  * transmit the entire packet.
703                  * nb_used better be less than or equal to txq->tx_rs_thresh
704                  */
705                 if (nb_used > txq->nb_tx_free) {
706                         PMD_TX_FREE_LOG(DEBUG,
707                                         "Not enough free TX descriptors "
708                                         "nb_used=%4u nb_free=%4u "
709                                         "(port=%d queue=%d)",
710                                         nb_used, txq->nb_tx_free,
711                                         txq->port_id, txq->queue_id);
712
713                         if (ixgbe_xmit_cleanup(txq) != 0) {
714                                 /* Could not clean any descriptors */
715                                 if (nb_tx == 0)
716                                         return 0;
717                                 goto end_of_tx;
718                         }
719
720                         /* nb_used better be <= txq->tx_rs_thresh */
721                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
722                                 PMD_TX_FREE_LOG(DEBUG,
723                                         "The number of descriptors needed to "
724                                         "transmit the packet exceeds the "
725                                         "RS bit threshold. This will impact "
726                                         "performance."
727                                         "nb_used=%4u nb_free=%4u "
728                                         "tx_rs_thresh=%4u. "
729                                         "(port=%d queue=%d)",
730                                         nb_used, txq->nb_tx_free,
731                                         txq->tx_rs_thresh,
732                                         txq->port_id, txq->queue_id);
733                                 /*
734                                  * Loop here until there are enough TX
735                                  * descriptors or until the ring cannot be
736                                  * cleaned.
737                                  */
738                                 while (nb_used > txq->nb_tx_free) {
739                                         if (ixgbe_xmit_cleanup(txq) != 0) {
740                                                 /*
741                                                  * Could not clean any
742                                                  * descriptors
743                                                  */
744                                                 if (nb_tx == 0)
745                                                         return 0;
746                                                 goto end_of_tx;
747                                         }
748                                 }
749                         }
750                 }
751
752                 /*
753                  * By now there are enough free TX descriptors to transmit
754                  * the packet.
755                  */
756
757                 /*
758                  * Set common flags of all TX Data Descriptors.
759                  *
760                  * The following bits must be set in all Data Descriptors:
761                  *   - IXGBE_ADVTXD_DTYP_DATA
762                  *   - IXGBE_ADVTXD_DCMD_DEXT
763                  *
764                  * The following bits must be set in the first Data Descriptor
765                  * and are ignored in the other ones:
766                  *   - IXGBE_ADVTXD_DCMD_IFCS
767                  *   - IXGBE_ADVTXD_MAC_1588
768                  *   - IXGBE_ADVTXD_DCMD_VLE
769                  *
770                  * The following bits must only be set in the last Data
771                  * Descriptor:
772                  *   - IXGBE_TXD_CMD_EOP
773                  *
774                  * The following bits can be set in any Data Descriptor, but
775                  * are only set in the last Data Descriptor:
776                  *   - IXGBE_TXD_CMD_RS
777                  */
778                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
779                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
780
781 #ifdef RTE_LIBRTE_IEEE1588
782                 if (ol_flags & PKT_TX_IEEE1588_TMST)
783                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
784 #endif
785
786                 olinfo_status = 0;
787                 if (tx_ol_req) {
788
789                         if (ol_flags & PKT_TX_TCP_SEG) {
790                                 /* when TSO is on, paylen in descriptor is the
791                                  * not the packet len but the tcp payload len */
792                                 pkt_len -= (tx_offload.l2_len +
793                                         tx_offload.l3_len + tx_offload.l4_len);
794                         }
795
796                         /*
797                          * Setup the TX Advanced Context Descriptor if required
798                          */
799                         if (new_ctx) {
800                                 volatile struct ixgbe_adv_tx_context_desc *
801                                     ctx_txd;
802
803                                 ctx_txd = (volatile struct
804                                     ixgbe_adv_tx_context_desc *)
805                                     &txr[tx_id];
806
807                                 txn = &sw_ring[txe->next_id];
808                                 rte_prefetch0(&txn->mbuf->pool);
809
810                                 if (txe->mbuf != NULL) {
811                                         rte_pktmbuf_free_seg(txe->mbuf);
812                                         txe->mbuf = NULL;
813                                 }
814
815                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
816                                         tx_offload);
817
818                                 txe->last_id = tx_last;
819                                 tx_id = txe->next_id;
820                                 txe = txn;
821                         }
822
823                         /*
824                          * Setup the TX Advanced Data Descriptor,
825                          * This path will go through
826                          * whatever new/reuse the context descriptor
827                          */
828                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
829                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
830                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
831                 }
832
833                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
834
835                 m_seg = tx_pkt;
836                 do {
837                         txd = &txr[tx_id];
838                         txn = &sw_ring[txe->next_id];
839                         rte_prefetch0(&txn->mbuf->pool);
840
841                         if (txe->mbuf != NULL)
842                                 rte_pktmbuf_free_seg(txe->mbuf);
843                         txe->mbuf = m_seg;
844
845                         /*
846                          * Set up Transmit Data Descriptor.
847                          */
848                         slen = m_seg->data_len;
849                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
850                         txd->read.buffer_addr =
851                                 rte_cpu_to_le_64(buf_dma_addr);
852                         txd->read.cmd_type_len =
853                                 rte_cpu_to_le_32(cmd_type_len | slen);
854                         txd->read.olinfo_status =
855                                 rte_cpu_to_le_32(olinfo_status);
856                         txe->last_id = tx_last;
857                         tx_id = txe->next_id;
858                         txe = txn;
859                         m_seg = m_seg->next;
860                 } while (m_seg != NULL);
861
862                 /*
863                  * The last packet data descriptor needs End Of Packet (EOP)
864                  */
865                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
866                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
867                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
868
869                 /* Set RS bit only on threshold packets' last descriptor */
870                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
871                         PMD_TX_FREE_LOG(DEBUG,
872                                         "Setting RS bit on TXD id="
873                                         "%4u (port=%d queue=%d)",
874                                         tx_last, txq->port_id, txq->queue_id);
875
876                         cmd_type_len |= IXGBE_TXD_CMD_RS;
877
878                         /* Update txq RS bit counters */
879                         txq->nb_tx_used = 0;
880                         txp = NULL;
881                 } else
882                         txp = txd;
883
884                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
885         }
886
887 end_of_tx:
888         /* set RS on last packet in the burst */
889         if (txp != NULL)
890                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
891
892         rte_wmb();
893
894         /*
895          * Set the Transmit Descriptor Tail (TDT)
896          */
897         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
898                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
899                    (unsigned) tx_id, (unsigned) nb_tx);
900         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
901         txq->tx_tail = tx_id;
902
903         return nb_tx;
904 }
905
906 /*********************************************************************
907  *
908  *  RX functions
909  *
910  **********************************************************************/
911
912 #define IXGBE_PACKET_TYPE_ETHER                         0X00
913 #define IXGBE_PACKET_TYPE_IPV4                          0X01
914 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
915 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
916 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
917 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
918 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
919 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
920 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
921 #define IXGBE_PACKET_TYPE_IPV6                          0X04
922 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
923 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
924 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
925 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
926 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
927 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
928 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
929 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
930 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
931 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
932 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
933 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
934 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
935 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
936 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
937 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
938 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
939 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
940 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
941 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
942 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
943 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
944 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
945
946 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
947 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
948 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
949 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
950 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
951 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
952 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
953 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
954 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
955 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
956 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
957 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
958 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
959 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
960 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
961 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
962 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
963 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
964 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
965 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
966 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
967 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
968 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
969
970 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
971 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
972 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
973 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
974 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
975 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
976 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
977 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
978 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
979 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
980 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
981 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
982 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
983 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
984 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
985 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
986 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
987 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
988 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
989 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
990 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
991 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
992 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
993
994 #define IXGBE_PACKET_TYPE_MAX               0X80
995 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
996 #define IXGBE_PACKET_TYPE_SHIFT             0X04
997
998 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
999 static inline uint32_t
1000 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1001 {
1002         /**
1003          * Use 2 different table for normal packet and tunnel packet
1004          * to save the space.
1005          */
1006         static const uint32_t
1007                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1008                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1009                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1010                         RTE_PTYPE_L3_IPV4,
1011                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1012                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1013                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1014                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1015                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1016                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1017                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1018                         RTE_PTYPE_L3_IPV4_EXT,
1019                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1020                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1021                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1022                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1023                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1024                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1025                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1026                         RTE_PTYPE_L3_IPV6,
1027                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1028                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1029                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1030                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1031                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1032                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1033                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1034                         RTE_PTYPE_L3_IPV6_EXT,
1035                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1036                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1037                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1038                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1039                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1040                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1041                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1042                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1043                         RTE_PTYPE_INNER_L3_IPV6,
1044                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1045                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1046                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1047                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1048                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1049                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1050                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1051                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1052                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1053                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1054                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1055                         RTE_PTYPE_INNER_L3_IPV6,
1056                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1057                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1058                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1059                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1060                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1061                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1062                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1063                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1064                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1065                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1066                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1067                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1068                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1069                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1070                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1071                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1072                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1073                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1074                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1075                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1076                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1077                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1078                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1079                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1080                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1081                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1082                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1083                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1084                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1085                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1086                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1087                         RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1089                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1090         };
1091
1092         static const uint32_t
1093                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1094                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1095                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1096                         RTE_PTYPE_INNER_L2_ETHER,
1097                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1098                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1099                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1100                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1101                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1102                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1103                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1104                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1105                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1106                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1107                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1108                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1109                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1110                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1111                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1112                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1113                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1114                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1115                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1116                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1117                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1118                         RTE_PTYPE_INNER_L4_TCP,
1119                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1120                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1121                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1122                         RTE_PTYPE_INNER_L4_TCP,
1123                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1125                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1126                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1128                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1129                         RTE_PTYPE_INNER_L4_TCP,
1130                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1131                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1132                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1133                         RTE_PTYPE_INNER_L3_IPV4,
1134                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1135                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1136                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1137                         RTE_PTYPE_INNER_L4_UDP,
1138                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1140                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1141                         RTE_PTYPE_INNER_L4_UDP,
1142                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1143                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1144                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1145                         RTE_PTYPE_INNER_L4_SCTP,
1146                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1147                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1148                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1149                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1150                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1151                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1152                         RTE_PTYPE_INNER_L4_UDP,
1153                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1154                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1155                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1156                         RTE_PTYPE_INNER_L4_SCTP,
1157                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1158                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1159                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1160                         RTE_PTYPE_INNER_L3_IPV4,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1164                         RTE_PTYPE_INNER_L4_SCTP,
1165                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1166                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1167                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1168                         RTE_PTYPE_INNER_L4_SCTP,
1169                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1170                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1171                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1172                         RTE_PTYPE_INNER_L4_TCP,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1176                         RTE_PTYPE_INNER_L4_UDP,
1177
1178                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1179                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1180                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1181                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1182                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1183                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1184                         RTE_PTYPE_INNER_L3_IPV4,
1185                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1186                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1187                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1188                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1189                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1190                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1191                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1192                         RTE_PTYPE_INNER_L3_IPV6,
1193                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1194                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1195                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1196                         RTE_PTYPE_INNER_L3_IPV4,
1197                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1198                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1199                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1200                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1201                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1202                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1203                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1204                         RTE_PTYPE_INNER_L3_IPV4,
1205                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1206                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1207                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1208                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1209                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1210                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1211                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1212                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1213                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1214                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1215                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1216                         RTE_PTYPE_INNER_L3_IPV4,
1217                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1218                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1219                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1220                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1221                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1222                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1223                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1224                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1225                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1226                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1227                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1228                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1229                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1230                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1231                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1232                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1233                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1234                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1235                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1236                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1237                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1238                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1239                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1240                         RTE_PTYPE_INNER_L3_IPV4,
1241                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1242                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1243                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1244                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1245                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1246                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1247                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1248                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1249                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1250                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1251                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1252                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1253                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1254                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1255                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1256                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1257                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1258                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1259                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1260                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1261                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1262                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1263                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1264                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1265                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1266                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1267                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1268                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1269         };
1270
1271         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1272                 return RTE_PTYPE_UNKNOWN;
1273
1274         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1275
1276         /* For tunnel packet */
1277         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1278                 /* Remove the tunnel bit to save the space. */
1279                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1280                 return ptype_table_tn[pkt_info];
1281         }
1282
1283         /**
1284          * For x550, if it's not tunnel,
1285          * tunnel type bit should be set to 0.
1286          * Reuse 82599's mask.
1287          */
1288         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1289
1290         return ptype_table[pkt_info];
1291 }
1292
1293 static inline uint64_t
1294 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1295 {
1296         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1297                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1298                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1299                 PKT_RX_RSS_HASH, 0, 0, 0,
1300                 0, 0, 0,  PKT_RX_FDIR,
1301         };
1302 #ifdef RTE_LIBRTE_IEEE1588
1303         static uint64_t ip_pkt_etqf_map[8] = {
1304                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1305                 0, 0, 0, 0,
1306         };
1307
1308         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1309                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1310                                 ip_rss_types_map[pkt_info & 0XF];
1311         else
1312                 return ip_rss_types_map[pkt_info & 0XF];
1313 #else
1314         return ip_rss_types_map[pkt_info & 0XF];
1315 #endif
1316 }
1317
1318 static inline uint64_t
1319 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1320 {
1321         uint64_t pkt_flags;
1322
1323         /*
1324          * Check if VLAN present only.
1325          * Do not check whether L3/L4 rx checksum done by NIC or not,
1326          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1327          */
1328         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1329
1330 #ifdef RTE_LIBRTE_IEEE1588
1331         if (rx_status & IXGBE_RXD_STAT_TMST)
1332                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1333 #endif
1334         return pkt_flags;
1335 }
1336
1337 static inline uint64_t
1338 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1339 {
1340         uint64_t pkt_flags;
1341
1342         /*
1343          * Bit 31: IPE, IPv4 checksum error
1344          * Bit 30: L4I, L4I integrity error
1345          */
1346         static uint64_t error_to_pkt_flags_map[4] = {
1347                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1348                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1349                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1350                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1351         };
1352         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1353                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1354
1355         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1356             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1357                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1358         }
1359
1360         return pkt_flags;
1361 }
1362
1363 /*
1364  * LOOK_AHEAD defines how many desc statuses to check beyond the
1365  * current descriptor.
1366  * It must be a pound define for optimal performance.
1367  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1368  * function only works with LOOK_AHEAD=8.
1369  */
1370 #define LOOK_AHEAD 8
1371 #if (LOOK_AHEAD != 8)
1372 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1373 #endif
1374 static inline int
1375 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1376 {
1377         volatile union ixgbe_adv_rx_desc *rxdp;
1378         struct ixgbe_rx_entry *rxep;
1379         struct rte_mbuf *mb;
1380         uint16_t pkt_len;
1381         uint64_t pkt_flags;
1382         int nb_dd;
1383         uint32_t s[LOOK_AHEAD];
1384         uint32_t pkt_info[LOOK_AHEAD];
1385         int i, j, nb_rx = 0;
1386         uint32_t status;
1387         uint64_t vlan_flags = rxq->vlan_flags;
1388
1389         /* get references to current descriptor and S/W ring entry */
1390         rxdp = &rxq->rx_ring[rxq->rx_tail];
1391         rxep = &rxq->sw_ring[rxq->rx_tail];
1392
1393         status = rxdp->wb.upper.status_error;
1394         /* check to make sure there is at least 1 packet to receive */
1395         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1396                 return 0;
1397
1398         /*
1399          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1400          * reference packets that are ready to be received.
1401          */
1402         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1403              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1404                 /* Read desc statuses backwards to avoid race condition */
1405                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1406                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1407
1408                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1409                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1410                                                        lo_dword.data);
1411
1412                 /* Compute how many status bits were set */
1413                 nb_dd = 0;
1414                 for (j = 0; j < LOOK_AHEAD; ++j)
1415                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1416
1417                 nb_rx += nb_dd;
1418
1419                 /* Translate descriptor info to mbuf format */
1420                 for (j = 0; j < nb_dd; ++j) {
1421                         mb = rxep[j].mbuf;
1422                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1423                                   rxq->crc_len;
1424                         mb->data_len = pkt_len;
1425                         mb->pkt_len = pkt_len;
1426                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1427
1428                         /* convert descriptor fields to rte mbuf flags */
1429                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1430                                 vlan_flags);
1431                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1432                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1433                                         ((uint16_t)pkt_info[j]);
1434                         mb->ol_flags = pkt_flags;
1435                         mb->packet_type =
1436                                 ixgbe_rxd_pkt_info_to_pkt_type
1437                                         (pkt_info[j], rxq->pkt_type_mask);
1438
1439                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1440                                 mb->hash.rss = rte_le_to_cpu_32(
1441                                     rxdp[j].wb.lower.hi_dword.rss);
1442                         else if (pkt_flags & PKT_RX_FDIR) {
1443                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1444                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1445                                     IXGBE_ATR_HASH_MASK;
1446                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1447                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1448                         }
1449                 }
1450
1451                 /* Move mbuf pointers from the S/W ring to the stage */
1452                 for (j = 0; j < LOOK_AHEAD; ++j) {
1453                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1454                 }
1455
1456                 /* stop if all requested packets could not be received */
1457                 if (nb_dd != LOOK_AHEAD)
1458                         break;
1459         }
1460
1461         /* clear software ring entries so we can cleanup correctly */
1462         for (i = 0; i < nb_rx; ++i) {
1463                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1464         }
1465
1466
1467         return nb_rx;
1468 }
1469
1470 static inline int
1471 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1472 {
1473         volatile union ixgbe_adv_rx_desc *rxdp;
1474         struct ixgbe_rx_entry *rxep;
1475         struct rte_mbuf *mb;
1476         uint16_t alloc_idx;
1477         __le64 dma_addr;
1478         int diag, i;
1479
1480         /* allocate buffers in bulk directly into the S/W ring */
1481         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1482         rxep = &rxq->sw_ring[alloc_idx];
1483         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1484                                     rxq->rx_free_thresh);
1485         if (unlikely(diag != 0))
1486                 return -ENOMEM;
1487
1488         rxdp = &rxq->rx_ring[alloc_idx];
1489         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1490                 /* populate the static rte mbuf fields */
1491                 mb = rxep[i].mbuf;
1492                 if (reset_mbuf) {
1493                         mb->next = NULL;
1494                         mb->nb_segs = 1;
1495                         mb->port = rxq->port_id;
1496                 }
1497
1498                 rte_mbuf_refcnt_set(mb, 1);
1499                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1500
1501                 /* populate the descriptors */
1502                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1503                 rxdp[i].read.hdr_addr = 0;
1504                 rxdp[i].read.pkt_addr = dma_addr;
1505         }
1506
1507         /* update state of internal queue structure */
1508         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1509         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1510                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1511
1512         /* no errors */
1513         return 0;
1514 }
1515
1516 static inline uint16_t
1517 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1518                          uint16_t nb_pkts)
1519 {
1520         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1521         int i;
1522
1523         /* how many packets are ready to return? */
1524         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1525
1526         /* copy mbuf pointers to the application's packet list */
1527         for (i = 0; i < nb_pkts; ++i)
1528                 rx_pkts[i] = stage[i];
1529
1530         /* update internal queue state */
1531         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1532         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1533
1534         return nb_pkts;
1535 }
1536
1537 static inline uint16_t
1538 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1539              uint16_t nb_pkts)
1540 {
1541         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1542         uint16_t nb_rx = 0;
1543
1544         /* Any previously recv'd pkts will be returned from the Rx stage */
1545         if (rxq->rx_nb_avail)
1546                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1547
1548         /* Scan the H/W ring for packets to receive */
1549         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1550
1551         /* update internal queue state */
1552         rxq->rx_next_avail = 0;
1553         rxq->rx_nb_avail = nb_rx;
1554         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1555
1556         /* if required, allocate new buffers to replenish descriptors */
1557         if (rxq->rx_tail > rxq->rx_free_trigger) {
1558                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1559
1560                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1561                         int i, j;
1562
1563                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1564                                    "queue_id=%u", (unsigned) rxq->port_id,
1565                                    (unsigned) rxq->queue_id);
1566
1567                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1568                                 rxq->rx_free_thresh;
1569
1570                         /*
1571                          * Need to rewind any previous receives if we cannot
1572                          * allocate new buffers to replenish the old ones.
1573                          */
1574                         rxq->rx_nb_avail = 0;
1575                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1576                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1577                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1578
1579                         return 0;
1580                 }
1581
1582                 /* update tail pointer */
1583                 rte_wmb();
1584                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1585         }
1586
1587         if (rxq->rx_tail >= rxq->nb_rx_desc)
1588                 rxq->rx_tail = 0;
1589
1590         /* received any packets this loop? */
1591         if (rxq->rx_nb_avail)
1592                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1593
1594         return 0;
1595 }
1596
1597 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1598 uint16_t
1599 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1600                            uint16_t nb_pkts)
1601 {
1602         uint16_t nb_rx;
1603
1604         if (unlikely(nb_pkts == 0))
1605                 return 0;
1606
1607         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1608                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1609
1610         /* request is relatively large, chunk it up */
1611         nb_rx = 0;
1612         while (nb_pkts) {
1613                 uint16_t ret, n;
1614
1615                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1616                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1617                 nb_rx = (uint16_t)(nb_rx + ret);
1618                 nb_pkts = (uint16_t)(nb_pkts - ret);
1619                 if (ret < n)
1620                         break;
1621         }
1622
1623         return nb_rx;
1624 }
1625
1626 uint16_t
1627 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1628                 uint16_t nb_pkts)
1629 {
1630         struct ixgbe_rx_queue *rxq;
1631         volatile union ixgbe_adv_rx_desc *rx_ring;
1632         volatile union ixgbe_adv_rx_desc *rxdp;
1633         struct ixgbe_rx_entry *sw_ring;
1634         struct ixgbe_rx_entry *rxe;
1635         struct rte_mbuf *rxm;
1636         struct rte_mbuf *nmb;
1637         union ixgbe_adv_rx_desc rxd;
1638         uint64_t dma_addr;
1639         uint32_t staterr;
1640         uint32_t pkt_info;
1641         uint16_t pkt_len;
1642         uint16_t rx_id;
1643         uint16_t nb_rx;
1644         uint16_t nb_hold;
1645         uint64_t pkt_flags;
1646         uint64_t vlan_flags;
1647
1648         nb_rx = 0;
1649         nb_hold = 0;
1650         rxq = rx_queue;
1651         rx_id = rxq->rx_tail;
1652         rx_ring = rxq->rx_ring;
1653         sw_ring = rxq->sw_ring;
1654         vlan_flags = rxq->vlan_flags;
1655         while (nb_rx < nb_pkts) {
1656                 /*
1657                  * The order of operations here is important as the DD status
1658                  * bit must not be read after any other descriptor fields.
1659                  * rx_ring and rxdp are pointing to volatile data so the order
1660                  * of accesses cannot be reordered by the compiler. If they were
1661                  * not volatile, they could be reordered which could lead to
1662                  * using invalid descriptor fields when read from rxd.
1663                  */
1664                 rxdp = &rx_ring[rx_id];
1665                 staterr = rxdp->wb.upper.status_error;
1666                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1667                         break;
1668                 rxd = *rxdp;
1669
1670                 /*
1671                  * End of packet.
1672                  *
1673                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1674                  * is likely to be invalid and to be dropped by the various
1675                  * validation checks performed by the network stack.
1676                  *
1677                  * Allocate a new mbuf to replenish the RX ring descriptor.
1678                  * If the allocation fails:
1679                  *    - arrange for that RX descriptor to be the first one
1680                  *      being parsed the next time the receive function is
1681                  *      invoked [on the same queue].
1682                  *
1683                  *    - Stop parsing the RX ring and return immediately.
1684                  *
1685                  * This policy do not drop the packet received in the RX
1686                  * descriptor for which the allocation of a new mbuf failed.
1687                  * Thus, it allows that packet to be later retrieved if
1688                  * mbuf have been freed in the mean time.
1689                  * As a side effect, holding RX descriptors instead of
1690                  * systematically giving them back to the NIC may lead to
1691                  * RX ring exhaustion situations.
1692                  * However, the NIC can gracefully prevent such situations
1693                  * to happen by sending specific "back-pressure" flow control
1694                  * frames to its peer(s).
1695                  */
1696                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1697                            "ext_err_stat=0x%08x pkt_len=%u",
1698                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1699                            (unsigned) rx_id, (unsigned) staterr,
1700                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1701
1702                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1703                 if (nmb == NULL) {
1704                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1705                                    "queue_id=%u", (unsigned) rxq->port_id,
1706                                    (unsigned) rxq->queue_id);
1707                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1708                         break;
1709                 }
1710
1711                 nb_hold++;
1712                 rxe = &sw_ring[rx_id];
1713                 rx_id++;
1714                 if (rx_id == rxq->nb_rx_desc)
1715                         rx_id = 0;
1716
1717                 /* Prefetch next mbuf while processing current one. */
1718                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1719
1720                 /*
1721                  * When next RX descriptor is on a cache-line boundary,
1722                  * prefetch the next 4 RX descriptors and the next 8 pointers
1723                  * to mbufs.
1724                  */
1725                 if ((rx_id & 0x3) == 0) {
1726                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1727                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1728                 }
1729
1730                 rxm = rxe->mbuf;
1731                 rxe->mbuf = nmb;
1732                 dma_addr =
1733                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1734                 rxdp->read.hdr_addr = 0;
1735                 rxdp->read.pkt_addr = dma_addr;
1736
1737                 /*
1738                  * Initialize the returned mbuf.
1739                  * 1) setup generic mbuf fields:
1740                  *    - number of segments,
1741                  *    - next segment,
1742                  *    - packet length,
1743                  *    - RX port identifier.
1744                  * 2) integrate hardware offload data, if any:
1745                  *    - RSS flag & hash,
1746                  *    - IP checksum flag,
1747                  *    - VLAN TCI, if any,
1748                  *    - error flags.
1749                  */
1750                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1751                                       rxq->crc_len);
1752                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1753                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1754                 rxm->nb_segs = 1;
1755                 rxm->next = NULL;
1756                 rxm->pkt_len = pkt_len;
1757                 rxm->data_len = pkt_len;
1758                 rxm->port = rxq->port_id;
1759
1760                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1761                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1762                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1763
1764                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1765                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1766                 pkt_flags = pkt_flags |
1767                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1768                 rxm->ol_flags = pkt_flags;
1769                 rxm->packet_type =
1770                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1771                                                        rxq->pkt_type_mask);
1772
1773                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1774                         rxm->hash.rss = rte_le_to_cpu_32(
1775                                                 rxd.wb.lower.hi_dword.rss);
1776                 else if (pkt_flags & PKT_RX_FDIR) {
1777                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1778                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1779                                         IXGBE_ATR_HASH_MASK;
1780                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1781                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1782                 }
1783                 /*
1784                  * Store the mbuf address into the next entry of the array
1785                  * of returned packets.
1786                  */
1787                 rx_pkts[nb_rx++] = rxm;
1788         }
1789         rxq->rx_tail = rx_id;
1790
1791         /*
1792          * If the number of free RX descriptors is greater than the RX free
1793          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1794          * register.
1795          * Update the RDT with the value of the last processed RX descriptor
1796          * minus 1, to guarantee that the RDT register is never equal to the
1797          * RDH register, which creates a "full" ring situtation from the
1798          * hardware point of view...
1799          */
1800         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1801         if (nb_hold > rxq->rx_free_thresh) {
1802                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1803                            "nb_hold=%u nb_rx=%u",
1804                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1805                            (unsigned) rx_id, (unsigned) nb_hold,
1806                            (unsigned) nb_rx);
1807                 rx_id = (uint16_t) ((rx_id == 0) ?
1808                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1809                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1810                 nb_hold = 0;
1811         }
1812         rxq->nb_rx_hold = nb_hold;
1813         return nb_rx;
1814 }
1815
1816 /**
1817  * Detect an RSC descriptor.
1818  */
1819 static inline uint32_t
1820 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1821 {
1822         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1823                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1824 }
1825
1826 /**
1827  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1828  *
1829  * Fill the following info in the HEAD buffer of the Rx cluster:
1830  *    - RX port identifier
1831  *    - hardware offload data, if any:
1832  *      - RSS flag & hash
1833  *      - IP checksum flag
1834  *      - VLAN TCI, if any
1835  *      - error flags
1836  * @head HEAD of the packet cluster
1837  * @desc HW descriptor to get data from
1838  * @rxq Pointer to the Rx queue
1839  */
1840 static inline void
1841 ixgbe_fill_cluster_head_buf(
1842         struct rte_mbuf *head,
1843         union ixgbe_adv_rx_desc *desc,
1844         struct ixgbe_rx_queue *rxq,
1845         uint32_t staterr)
1846 {
1847         uint32_t pkt_info;
1848         uint64_t pkt_flags;
1849
1850         head->port = rxq->port_id;
1851
1852         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1853          * set in the pkt_flags field.
1854          */
1855         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1856         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1857         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1858         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1859         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1860         head->ol_flags = pkt_flags;
1861         head->packet_type =
1862                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1863
1864         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1865                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1866         else if (pkt_flags & PKT_RX_FDIR) {
1867                 head->hash.fdir.hash =
1868                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1869                                                           & IXGBE_ATR_HASH_MASK;
1870                 head->hash.fdir.id =
1871                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1872         }
1873 }
1874
1875 /**
1876  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1877  *
1878  * @rx_queue Rx queue handle
1879  * @rx_pkts table of received packets
1880  * @nb_pkts size of rx_pkts table
1881  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1882  *
1883  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1884  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1885  *
1886  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1887  * 1) When non-EOP RSC completion arrives:
1888  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1889  *       segment's data length.
1890  *    b) Set the "next" pointer of the current segment to point to the segment
1891  *       at the NEXTP index.
1892  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1893  *       in the sw_rsc_ring.
1894  * 2) When EOP arrives we just update the cluster's total length and offload
1895  *    flags and deliver the cluster up to the upper layers. In our case - put it
1896  *    in the rx_pkts table.
1897  *
1898  * Returns the number of received packets/clusters (according to the "bulk
1899  * receive" interface).
1900  */
1901 static inline uint16_t
1902 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1903                     bool bulk_alloc)
1904 {
1905         struct ixgbe_rx_queue *rxq = rx_queue;
1906         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1907         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1908         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1909         uint16_t rx_id = rxq->rx_tail;
1910         uint16_t nb_rx = 0;
1911         uint16_t nb_hold = rxq->nb_rx_hold;
1912         uint16_t prev_id = rxq->rx_tail;
1913
1914         while (nb_rx < nb_pkts) {
1915                 bool eop;
1916                 struct ixgbe_rx_entry *rxe;
1917                 struct ixgbe_scattered_rx_entry *sc_entry;
1918                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1919                 struct ixgbe_rx_entry *next_rxe = NULL;
1920                 struct rte_mbuf *first_seg;
1921                 struct rte_mbuf *rxm;
1922                 struct rte_mbuf *nmb;
1923                 union ixgbe_adv_rx_desc rxd;
1924                 uint16_t data_len;
1925                 uint16_t next_id;
1926                 volatile union ixgbe_adv_rx_desc *rxdp;
1927                 uint32_t staterr;
1928
1929 next_desc:
1930                 /*
1931                  * The code in this whole file uses the volatile pointer to
1932                  * ensure the read ordering of the status and the rest of the
1933                  * descriptor fields (on the compiler level only!!!). This is so
1934                  * UGLY - why not to just use the compiler barrier instead? DPDK
1935                  * even has the rte_compiler_barrier() for that.
1936                  *
1937                  * But most importantly this is just wrong because this doesn't
1938                  * ensure memory ordering in a general case at all. For
1939                  * instance, DPDK is supposed to work on Power CPUs where
1940                  * compiler barrier may just not be enough!
1941                  *
1942                  * I tried to write only this function properly to have a
1943                  * starting point (as a part of an LRO/RSC series) but the
1944                  * compiler cursed at me when I tried to cast away the
1945                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1946                  * keeping it the way it is for now.
1947                  *
1948                  * The code in this file is broken in so many other places and
1949                  * will just not work on a big endian CPU anyway therefore the
1950                  * lines below will have to be revisited together with the rest
1951                  * of the ixgbe PMD.
1952                  *
1953                  * TODO:
1954                  *    - Get rid of "volatile" crap and let the compiler do its
1955                  *      job.
1956                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1957                  *      memory ordering below.
1958                  */
1959                 rxdp = &rx_ring[rx_id];
1960                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1961
1962                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1963                         break;
1964
1965                 rxd = *rxdp;
1966
1967                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1968                                   "staterr=0x%x data_len=%u",
1969                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1970                            rte_le_to_cpu_16(rxd.wb.upper.length));
1971
1972                 if (!bulk_alloc) {
1973                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1974                         if (nmb == NULL) {
1975                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1976                                                   "port_id=%u queue_id=%u",
1977                                            rxq->port_id, rxq->queue_id);
1978
1979                                 rte_eth_devices[rxq->port_id].data->
1980                                                         rx_mbuf_alloc_failed++;
1981                                 break;
1982                         }
1983                 } else if (nb_hold > rxq->rx_free_thresh) {
1984                         uint16_t next_rdt = rxq->rx_free_trigger;
1985
1986                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1987                                 rte_wmb();
1988                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1989                                                     next_rdt);
1990                                 nb_hold -= rxq->rx_free_thresh;
1991                         } else {
1992                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1993                                                   "port_id=%u queue_id=%u",
1994                                            rxq->port_id, rxq->queue_id);
1995
1996                                 rte_eth_devices[rxq->port_id].data->
1997                                                         rx_mbuf_alloc_failed++;
1998                                 break;
1999                         }
2000                 }
2001
2002                 nb_hold++;
2003                 rxe = &sw_ring[rx_id];
2004                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2005
2006                 next_id = rx_id + 1;
2007                 if (next_id == rxq->nb_rx_desc)
2008                         next_id = 0;
2009
2010                 /* Prefetch next mbuf while processing current one. */
2011                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2012
2013                 /*
2014                  * When next RX descriptor is on a cache-line boundary,
2015                  * prefetch the next 4 RX descriptors and the next 4 pointers
2016                  * to mbufs.
2017                  */
2018                 if ((next_id & 0x3) == 0) {
2019                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2020                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2021                 }
2022
2023                 rxm = rxe->mbuf;
2024
2025                 if (!bulk_alloc) {
2026                         __le64 dma =
2027                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2028                         /*
2029                          * Update RX descriptor with the physical address of the
2030                          * new data buffer of the new allocated mbuf.
2031                          */
2032                         rxe->mbuf = nmb;
2033
2034                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2035                         rxdp->read.hdr_addr = 0;
2036                         rxdp->read.pkt_addr = dma;
2037                 } else
2038                         rxe->mbuf = NULL;
2039
2040                 /*
2041                  * Set data length & data buffer address of mbuf.
2042                  */
2043                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2044                 rxm->data_len = data_len;
2045
2046                 if (!eop) {
2047                         uint16_t nextp_id;
2048                         /*
2049                          * Get next descriptor index:
2050                          *  - For RSC it's in the NEXTP field.
2051                          *  - For a scattered packet - it's just a following
2052                          *    descriptor.
2053                          */
2054                         if (ixgbe_rsc_count(&rxd))
2055                                 nextp_id =
2056                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2057                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2058                         else
2059                                 nextp_id = next_id;
2060
2061                         next_sc_entry = &sw_sc_ring[nextp_id];
2062                         next_rxe = &sw_ring[nextp_id];
2063                         rte_ixgbe_prefetch(next_rxe);
2064                 }
2065
2066                 sc_entry = &sw_sc_ring[rx_id];
2067                 first_seg = sc_entry->fbuf;
2068                 sc_entry->fbuf = NULL;
2069
2070                 /*
2071                  * If this is the first buffer of the received packet,
2072                  * set the pointer to the first mbuf of the packet and
2073                  * initialize its context.
2074                  * Otherwise, update the total length and the number of segments
2075                  * of the current scattered packet, and update the pointer to
2076                  * the last mbuf of the current packet.
2077                  */
2078                 if (first_seg == NULL) {
2079                         first_seg = rxm;
2080                         first_seg->pkt_len = data_len;
2081                         first_seg->nb_segs = 1;
2082                 } else {
2083                         first_seg->pkt_len += data_len;
2084                         first_seg->nb_segs++;
2085                 }
2086
2087                 prev_id = rx_id;
2088                 rx_id = next_id;
2089
2090                 /*
2091                  * If this is not the last buffer of the received packet, update
2092                  * the pointer to the first mbuf at the NEXTP entry in the
2093                  * sw_sc_ring and continue to parse the RX ring.
2094                  */
2095                 if (!eop && next_rxe) {
2096                         rxm->next = next_rxe->mbuf;
2097                         next_sc_entry->fbuf = first_seg;
2098                         goto next_desc;
2099                 }
2100
2101                 /*
2102                  * This is the last buffer of the received packet - return
2103                  * the current cluster to the user.
2104                  */
2105                 rxm->next = NULL;
2106
2107                 /* Initialize the first mbuf of the returned packet */
2108                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2109
2110                 /*
2111                  * Deal with the case, when HW CRC srip is disabled.
2112                  * That can't happen when LRO is enabled, but still could
2113                  * happen for scattered RX mode.
2114                  */
2115                 first_seg->pkt_len -= rxq->crc_len;
2116                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2117                         struct rte_mbuf *lp;
2118
2119                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2120                                 ;
2121
2122                         first_seg->nb_segs--;
2123                         lp->data_len -= rxq->crc_len - rxm->data_len;
2124                         lp->next = NULL;
2125                         rte_pktmbuf_free_seg(rxm);
2126                 } else
2127                         rxm->data_len -= rxq->crc_len;
2128
2129                 /* Prefetch data of first segment, if configured to do so. */
2130                 rte_packet_prefetch((char *)first_seg->buf_addr +
2131                         first_seg->data_off);
2132
2133                 /*
2134                  * Store the mbuf address into the next entry of the array
2135                  * of returned packets.
2136                  */
2137                 rx_pkts[nb_rx++] = first_seg;
2138         }
2139
2140         /*
2141          * Record index of the next RX descriptor to probe.
2142          */
2143         rxq->rx_tail = rx_id;
2144
2145         /*
2146          * If the number of free RX descriptors is greater than the RX free
2147          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2148          * register.
2149          * Update the RDT with the value of the last processed RX descriptor
2150          * minus 1, to guarantee that the RDT register is never equal to the
2151          * RDH register, which creates a "full" ring situtation from the
2152          * hardware point of view...
2153          */
2154         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2155                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2156                            "nb_hold=%u nb_rx=%u",
2157                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2158
2159                 rte_wmb();
2160                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
2161                 nb_hold = 0;
2162         }
2163
2164         rxq->nb_rx_hold = nb_hold;
2165         return nb_rx;
2166 }
2167
2168 uint16_t
2169 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2170                                  uint16_t nb_pkts)
2171 {
2172         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2173 }
2174
2175 uint16_t
2176 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2177                                uint16_t nb_pkts)
2178 {
2179         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2180 }
2181
2182 /*********************************************************************
2183  *
2184  *  Queue management functions
2185  *
2186  **********************************************************************/
2187
2188 static void __attribute__((cold))
2189 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2190 {
2191         unsigned i;
2192
2193         if (txq->sw_ring != NULL) {
2194                 for (i = 0; i < txq->nb_tx_desc; i++) {
2195                         if (txq->sw_ring[i].mbuf != NULL) {
2196                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2197                                 txq->sw_ring[i].mbuf = NULL;
2198                         }
2199                 }
2200         }
2201 }
2202
2203 static void __attribute__((cold))
2204 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2205 {
2206         if (txq != NULL &&
2207             txq->sw_ring != NULL)
2208                 rte_free(txq->sw_ring);
2209 }
2210
2211 static void __attribute__((cold))
2212 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2213 {
2214         if (txq != NULL && txq->ops != NULL) {
2215                 txq->ops->release_mbufs(txq);
2216                 txq->ops->free_swring(txq);
2217                 rte_free(txq);
2218         }
2219 }
2220
2221 void __attribute__((cold))
2222 ixgbe_dev_tx_queue_release(void *txq)
2223 {
2224         ixgbe_tx_queue_release(txq);
2225 }
2226
2227 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2228 static void __attribute__((cold))
2229 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2230 {
2231         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2232         struct ixgbe_tx_entry *txe = txq->sw_ring;
2233         uint16_t prev, i;
2234
2235         /* Zero out HW ring memory */
2236         for (i = 0; i < txq->nb_tx_desc; i++) {
2237                 txq->tx_ring[i] = zeroed_desc;
2238         }
2239
2240         /* Initialize SW ring entries */
2241         prev = (uint16_t) (txq->nb_tx_desc - 1);
2242         for (i = 0; i < txq->nb_tx_desc; i++) {
2243                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2244
2245                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2246                 txe[i].mbuf = NULL;
2247                 txe[i].last_id = i;
2248                 txe[prev].next_id = i;
2249                 prev = i;
2250         }
2251
2252         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2253         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2254
2255         txq->tx_tail = 0;
2256         txq->nb_tx_used = 0;
2257         /*
2258          * Always allow 1 descriptor to be un-allocated to avoid
2259          * a H/W race condition
2260          */
2261         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2262         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2263         txq->ctx_curr = 0;
2264         memset((void *)&txq->ctx_cache, 0,
2265                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2266 }
2267
2268 static const struct ixgbe_txq_ops def_txq_ops = {
2269         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2270         .free_swring = ixgbe_tx_free_swring,
2271         .reset = ixgbe_reset_tx_queue,
2272 };
2273
2274 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2275  * the queue parameters. Used in tx_queue_setup by primary process and then
2276  * in dev_init by secondary process when attaching to an existing ethdev.
2277  */
2278 void __attribute__((cold))
2279 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2280 {
2281         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2282         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2283                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2284                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2285 #ifdef RTE_IXGBE_INC_VECTOR
2286                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2287                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2288                                         ixgbe_txq_vec_setup(txq) == 0)) {
2289                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2290                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2291                 } else
2292 #endif
2293                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2294         } else {
2295                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2296                 PMD_INIT_LOG(DEBUG,
2297                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2298                                 (unsigned long)txq->txq_flags,
2299                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2300                 PMD_INIT_LOG(DEBUG,
2301                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2302                                 (unsigned long)txq->tx_rs_thresh,
2303                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2304                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2305         }
2306 }
2307
2308 int __attribute__((cold))
2309 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2310                          uint16_t queue_idx,
2311                          uint16_t nb_desc,
2312                          unsigned int socket_id,
2313                          const struct rte_eth_txconf *tx_conf)
2314 {
2315         const struct rte_memzone *tz;
2316         struct ixgbe_tx_queue *txq;
2317         struct ixgbe_hw     *hw;
2318         uint16_t tx_rs_thresh, tx_free_thresh;
2319
2320         PMD_INIT_FUNC_TRACE();
2321         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2322
2323         /*
2324          * Validate number of transmit descriptors.
2325          * It must not exceed hardware maximum, and must be multiple
2326          * of IXGBE_ALIGN.
2327          */
2328         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2329                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2330                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2331                 return -EINVAL;
2332         }
2333
2334         /*
2335          * The following two parameters control the setting of the RS bit on
2336          * transmit descriptors.
2337          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2338          * descriptors have been used.
2339          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2340          * descriptors are used or if the number of descriptors required
2341          * to transmit a packet is greater than the number of free TX
2342          * descriptors.
2343          * The following constraints must be satisfied:
2344          *  tx_rs_thresh must be greater than 0.
2345          *  tx_rs_thresh must be less than the size of the ring minus 2.
2346          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2347          *  tx_rs_thresh must be a divisor of the ring size.
2348          *  tx_free_thresh must be greater than 0.
2349          *  tx_free_thresh must be less than the size of the ring minus 3.
2350          * One descriptor in the TX ring is used as a sentinel to avoid a
2351          * H/W race condition, hence the maximum threshold constraints.
2352          * When set to zero use default values.
2353          */
2354         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2355                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2356         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2357                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2358         if (tx_rs_thresh >= (nb_desc - 2)) {
2359                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2360                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2361                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2362                         (int)dev->data->port_id, (int)queue_idx);
2363                 return -(EINVAL);
2364         }
2365         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2366                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2367                         "(tx_rs_thresh=%u port=%d queue=%d)",
2368                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2369                         (int)dev->data->port_id, (int)queue_idx);
2370                 return -(EINVAL);
2371         }
2372         if (tx_free_thresh >= (nb_desc - 3)) {
2373                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2374                              "tx_free_thresh must be less than the number of "
2375                              "TX descriptors minus 3. (tx_free_thresh=%u "
2376                              "port=%d queue=%d)",
2377                              (unsigned int)tx_free_thresh,
2378                              (int)dev->data->port_id, (int)queue_idx);
2379                 return -(EINVAL);
2380         }
2381         if (tx_rs_thresh > tx_free_thresh) {
2382                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2383                              "tx_free_thresh. (tx_free_thresh=%u "
2384                              "tx_rs_thresh=%u port=%d queue=%d)",
2385                              (unsigned int)tx_free_thresh,
2386                              (unsigned int)tx_rs_thresh,
2387                              (int)dev->data->port_id,
2388                              (int)queue_idx);
2389                 return -(EINVAL);
2390         }
2391         if ((nb_desc % tx_rs_thresh) != 0) {
2392                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2393                              "number of TX descriptors. (tx_rs_thresh=%u "
2394                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2395                              (int)dev->data->port_id, (int)queue_idx);
2396                 return -(EINVAL);
2397         }
2398
2399         /*
2400          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2401          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2402          * by the NIC and all descriptors are written back after the NIC
2403          * accumulates WTHRESH descriptors.
2404          */
2405         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2406                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2407                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2408                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2409                              (int)dev->data->port_id, (int)queue_idx);
2410                 return -(EINVAL);
2411         }
2412
2413         /* Free memory prior to re-allocation if needed... */
2414         if (dev->data->tx_queues[queue_idx] != NULL) {
2415                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2416                 dev->data->tx_queues[queue_idx] = NULL;
2417         }
2418
2419         /* First allocate the tx queue data structure */
2420         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2421                                  RTE_CACHE_LINE_SIZE, socket_id);
2422         if (txq == NULL)
2423                 return -ENOMEM;
2424
2425         /*
2426          * Allocate TX ring hardware descriptors. A memzone large enough to
2427          * handle the maximum ring size is allocated in order to allow for
2428          * resizing in later calls to the queue setup function.
2429          */
2430         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2431                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2432                         IXGBE_ALIGN, socket_id);
2433         if (tz == NULL) {
2434                 ixgbe_tx_queue_release(txq);
2435                 return -ENOMEM;
2436         }
2437
2438         txq->nb_tx_desc = nb_desc;
2439         txq->tx_rs_thresh = tx_rs_thresh;
2440         txq->tx_free_thresh = tx_free_thresh;
2441         txq->pthresh = tx_conf->tx_thresh.pthresh;
2442         txq->hthresh = tx_conf->tx_thresh.hthresh;
2443         txq->wthresh = tx_conf->tx_thresh.wthresh;
2444         txq->queue_id = queue_idx;
2445         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2446                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2447         txq->port_id = dev->data->port_id;
2448         txq->txq_flags = tx_conf->txq_flags;
2449         txq->ops = &def_txq_ops;
2450         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2451
2452         /*
2453          * Modification to set VFTDT for virtual function if vf is detected
2454          */
2455         if (hw->mac.type == ixgbe_mac_82599_vf ||
2456             hw->mac.type == ixgbe_mac_X540_vf ||
2457             hw->mac.type == ixgbe_mac_X550_vf ||
2458             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2459             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2460                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2461         else
2462                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2463
2464         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2465         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2466
2467         /* Allocate software ring */
2468         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2469                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2470                                 RTE_CACHE_LINE_SIZE, socket_id);
2471         if (txq->sw_ring == NULL) {
2472                 ixgbe_tx_queue_release(txq);
2473                 return -ENOMEM;
2474         }
2475         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2476                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2477
2478         /* set up vector or scalar TX function as appropriate */
2479         ixgbe_set_tx_function(dev, txq);
2480
2481         txq->ops->reset(txq);
2482
2483         dev->data->tx_queues[queue_idx] = txq;
2484
2485
2486         return 0;
2487 }
2488
2489 /**
2490  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2491  *
2492  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2493  * in the sw_rsc_ring is not set to NULL but rather points to the next
2494  * mbuf of this RSC aggregation (that has not been completed yet and still
2495  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2496  * will just free first "nb_segs" segments of the cluster explicitly by calling
2497  * an rte_pktmbuf_free_seg().
2498  *
2499  * @m scattered cluster head
2500  */
2501 static void __attribute__((cold))
2502 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2503 {
2504         uint8_t i, nb_segs = m->nb_segs;
2505         struct rte_mbuf *next_seg;
2506
2507         for (i = 0; i < nb_segs; i++) {
2508                 next_seg = m->next;
2509                 rte_pktmbuf_free_seg(m);
2510                 m = next_seg;
2511         }
2512 }
2513
2514 static void __attribute__((cold))
2515 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2516 {
2517         unsigned i;
2518
2519 #ifdef RTE_IXGBE_INC_VECTOR
2520         /* SSE Vector driver has a different way of releasing mbufs. */
2521         if (rxq->rx_using_sse) {
2522                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2523                 return;
2524         }
2525 #endif
2526
2527         if (rxq->sw_ring != NULL) {
2528                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2529                         if (rxq->sw_ring[i].mbuf != NULL) {
2530                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2531                                 rxq->sw_ring[i].mbuf = NULL;
2532                         }
2533                 }
2534                 if (rxq->rx_nb_avail) {
2535                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2536                                 struct rte_mbuf *mb;
2537
2538                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2539                                 rte_pktmbuf_free_seg(mb);
2540                         }
2541                         rxq->rx_nb_avail = 0;
2542                 }
2543         }
2544
2545         if (rxq->sw_sc_ring)
2546                 for (i = 0; i < rxq->nb_rx_desc; i++)
2547                         if (rxq->sw_sc_ring[i].fbuf) {
2548                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2549                                 rxq->sw_sc_ring[i].fbuf = NULL;
2550                         }
2551 }
2552
2553 static void __attribute__((cold))
2554 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2555 {
2556         if (rxq != NULL) {
2557                 ixgbe_rx_queue_release_mbufs(rxq);
2558                 rte_free(rxq->sw_ring);
2559                 rte_free(rxq->sw_sc_ring);
2560                 rte_free(rxq);
2561         }
2562 }
2563
2564 void __attribute__((cold))
2565 ixgbe_dev_rx_queue_release(void *rxq)
2566 {
2567         ixgbe_rx_queue_release(rxq);
2568 }
2569
2570 /*
2571  * Check if Rx Burst Bulk Alloc function can be used.
2572  * Return
2573  *        0: the preconditions are satisfied and the bulk allocation function
2574  *           can be used.
2575  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2576  *           function must be used.
2577  */
2578 static inline int __attribute__((cold))
2579 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2580 {
2581         int ret = 0;
2582
2583         /*
2584          * Make sure the following pre-conditions are satisfied:
2585          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2586          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2587          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2588          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2589          * Scattered packets are not supported.  This should be checked
2590          * outside of this function.
2591          */
2592         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2593                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2594                              "rxq->rx_free_thresh=%d, "
2595                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2596                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2597                 ret = -EINVAL;
2598         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2599                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2600                              "rxq->rx_free_thresh=%d, "
2601                              "rxq->nb_rx_desc=%d",
2602                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2603                 ret = -EINVAL;
2604         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2605                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2606                              "rxq->nb_rx_desc=%d, "
2607                              "rxq->rx_free_thresh=%d",
2608                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2609                 ret = -EINVAL;
2610         } else if (!(rxq->nb_rx_desc <
2611                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2612                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2613                              "rxq->nb_rx_desc=%d, "
2614                              "IXGBE_MAX_RING_DESC=%d, "
2615                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2616                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2617                              RTE_PMD_IXGBE_RX_MAX_BURST);
2618                 ret = -EINVAL;
2619         }
2620
2621         return ret;
2622 }
2623
2624 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2625 static void __attribute__((cold))
2626 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2627 {
2628         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2629         unsigned i;
2630         uint16_t len = rxq->nb_rx_desc;
2631
2632         /*
2633          * By default, the Rx queue setup function allocates enough memory for
2634          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2635          * extra memory at the end of the descriptor ring to be zero'd out. A
2636          * pre-condition for using the Rx burst bulk alloc function is that the
2637          * number of descriptors is less than or equal to
2638          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2639          * constraints here to see if we need to zero out memory after the end
2640          * of the H/W descriptor ring.
2641          */
2642         if (adapter->rx_bulk_alloc_allowed)
2643                 /* zero out extra memory */
2644                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2645
2646         /*
2647          * Zero out HW ring memory. Zero out extra memory at the end of
2648          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2649          * reads extra memory as zeros.
2650          */
2651         for (i = 0; i < len; i++) {
2652                 rxq->rx_ring[i] = zeroed_desc;
2653         }
2654
2655         /*
2656          * initialize extra software ring entries. Space for these extra
2657          * entries is always allocated
2658          */
2659         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2660         for (i = rxq->nb_rx_desc; i < len; ++i) {
2661                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2662         }
2663
2664         rxq->rx_nb_avail = 0;
2665         rxq->rx_next_avail = 0;
2666         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2667         rxq->rx_tail = 0;
2668         rxq->nb_rx_hold = 0;
2669         rxq->pkt_first_seg = NULL;
2670         rxq->pkt_last_seg = NULL;
2671
2672 #ifdef RTE_IXGBE_INC_VECTOR
2673         rxq->rxrearm_start = 0;
2674         rxq->rxrearm_nb = 0;
2675 #endif
2676 }
2677
2678 int __attribute__((cold))
2679 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2680                          uint16_t queue_idx,
2681                          uint16_t nb_desc,
2682                          unsigned int socket_id,
2683                          const struct rte_eth_rxconf *rx_conf,
2684                          struct rte_mempool *mp)
2685 {
2686         const struct rte_memzone *rz;
2687         struct ixgbe_rx_queue *rxq;
2688         struct ixgbe_hw     *hw;
2689         uint16_t len;
2690         struct ixgbe_adapter *adapter =
2691                 (struct ixgbe_adapter *)dev->data->dev_private;
2692
2693         PMD_INIT_FUNC_TRACE();
2694         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2695
2696         /*
2697          * Validate number of receive descriptors.
2698          * It must not exceed hardware maximum, and must be multiple
2699          * of IXGBE_ALIGN.
2700          */
2701         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2702                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2703                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2704                 return -EINVAL;
2705         }
2706
2707         /* Free memory prior to re-allocation if needed... */
2708         if (dev->data->rx_queues[queue_idx] != NULL) {
2709                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2710                 dev->data->rx_queues[queue_idx] = NULL;
2711         }
2712
2713         /* First allocate the rx queue data structure */
2714         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2715                                  RTE_CACHE_LINE_SIZE, socket_id);
2716         if (rxq == NULL)
2717                 return -ENOMEM;
2718         rxq->mb_pool = mp;
2719         rxq->nb_rx_desc = nb_desc;
2720         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2721         rxq->queue_id = queue_idx;
2722         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2723                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2724         rxq->port_id = dev->data->port_id;
2725         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2726                                                         0 : ETHER_CRC_LEN);
2727         rxq->drop_en = rx_conf->rx_drop_en;
2728         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2729
2730         /*
2731          * The packet type in RX descriptor is different for different NICs.
2732          * Some bits are used for x550 but reserved for other NICS.
2733          * So set different masks for different NICs.
2734          */
2735         if (hw->mac.type == ixgbe_mac_X550 ||
2736             hw->mac.type == ixgbe_mac_X550EM_x ||
2737             hw->mac.type == ixgbe_mac_X550EM_a ||
2738             hw->mac.type == ixgbe_mac_X550_vf ||
2739             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2740             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2741                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2742         else
2743                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2744
2745         /*
2746          * Allocate RX ring hardware descriptors. A memzone large enough to
2747          * handle the maximum ring size is allocated in order to allow for
2748          * resizing in later calls to the queue setup function.
2749          */
2750         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2751                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2752         if (rz == NULL) {
2753                 ixgbe_rx_queue_release(rxq);
2754                 return -ENOMEM;
2755         }
2756
2757         /*
2758          * Zero init all the descriptors in the ring.
2759          */
2760         memset(rz->addr, 0, RX_RING_SZ);
2761
2762         /*
2763          * Modified to setup VFRDT for Virtual Function
2764          */
2765         if (hw->mac.type == ixgbe_mac_82599_vf ||
2766             hw->mac.type == ixgbe_mac_X540_vf ||
2767             hw->mac.type == ixgbe_mac_X550_vf ||
2768             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2769             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2770                 rxq->rdt_reg_addr =
2771                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2772                 rxq->rdh_reg_addr =
2773                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2774         } else {
2775                 rxq->rdt_reg_addr =
2776                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2777                 rxq->rdh_reg_addr =
2778                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2779         }
2780
2781         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2782         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2783
2784         /*
2785          * Certain constraints must be met in order to use the bulk buffer
2786          * allocation Rx burst function. If any of Rx queues doesn't meet them
2787          * the feature should be disabled for the whole port.
2788          */
2789         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2790                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2791                                     "preconditions - canceling the feature for "
2792                                     "the whole port[%d]",
2793                              rxq->queue_id, rxq->port_id);
2794                 adapter->rx_bulk_alloc_allowed = false;
2795         }
2796
2797         /*
2798          * Allocate software ring. Allow for space at the end of the
2799          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2800          * function does not access an invalid memory region.
2801          */
2802         len = nb_desc;
2803         if (adapter->rx_bulk_alloc_allowed)
2804                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2805
2806         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2807                                           sizeof(struct ixgbe_rx_entry) * len,
2808                                           RTE_CACHE_LINE_SIZE, socket_id);
2809         if (!rxq->sw_ring) {
2810                 ixgbe_rx_queue_release(rxq);
2811                 return -ENOMEM;
2812         }
2813
2814         /*
2815          * Always allocate even if it's not going to be needed in order to
2816          * simplify the code.
2817          *
2818          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2819          * be requested in ixgbe_dev_rx_init(), which is called later from
2820          * dev_start() flow.
2821          */
2822         rxq->sw_sc_ring =
2823                 rte_zmalloc_socket("rxq->sw_sc_ring",
2824                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2825                                    RTE_CACHE_LINE_SIZE, socket_id);
2826         if (!rxq->sw_sc_ring) {
2827                 ixgbe_rx_queue_release(rxq);
2828                 return -ENOMEM;
2829         }
2830
2831         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2832                             "dma_addr=0x%"PRIx64,
2833                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2834                      rxq->rx_ring_phys_addr);
2835
2836         if (!rte_is_power_of_2(nb_desc)) {
2837                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2838                                     "preconditions - canceling the feature for "
2839                                     "the whole port[%d]",
2840                              rxq->queue_id, rxq->port_id);
2841                 adapter->rx_vec_allowed = false;
2842         } else
2843                 ixgbe_rxq_vec_setup(rxq);
2844
2845         dev->data->rx_queues[queue_idx] = rxq;
2846
2847         ixgbe_reset_rx_queue(adapter, rxq);
2848
2849         return 0;
2850 }
2851
2852 uint32_t
2853 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2854 {
2855 #define IXGBE_RXQ_SCAN_INTERVAL 4
2856         volatile union ixgbe_adv_rx_desc *rxdp;
2857         struct ixgbe_rx_queue *rxq;
2858         uint32_t desc = 0;
2859
2860         if (rx_queue_id >= dev->data->nb_rx_queues) {
2861                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2862                 return 0;
2863         }
2864
2865         rxq = dev->data->rx_queues[rx_queue_id];
2866         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2867
2868         while ((desc < rxq->nb_rx_desc) &&
2869                 (rxdp->wb.upper.status_error &
2870                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2871                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2872                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2873                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2874                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2875                                 desc - rxq->nb_rx_desc]);
2876         }
2877
2878         return desc;
2879 }
2880
2881 int
2882 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2883 {
2884         volatile union ixgbe_adv_rx_desc *rxdp;
2885         struct ixgbe_rx_queue *rxq = rx_queue;
2886         uint32_t desc;
2887
2888         if (unlikely(offset >= rxq->nb_rx_desc))
2889                 return 0;
2890         desc = rxq->rx_tail + offset;
2891         if (desc >= rxq->nb_rx_desc)
2892                 desc -= rxq->nb_rx_desc;
2893
2894         rxdp = &rxq->rx_ring[desc];
2895         return !!(rxdp->wb.upper.status_error &
2896                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2897 }
2898
2899 void __attribute__((cold))
2900 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2901 {
2902         unsigned i;
2903         struct ixgbe_adapter *adapter =
2904                 (struct ixgbe_adapter *)dev->data->dev_private;
2905
2906         PMD_INIT_FUNC_TRACE();
2907
2908         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2909                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2910
2911                 if (txq != NULL) {
2912                         txq->ops->release_mbufs(txq);
2913                         txq->ops->reset(txq);
2914                 }
2915         }
2916
2917         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2918                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2919
2920                 if (rxq != NULL) {
2921                         ixgbe_rx_queue_release_mbufs(rxq);
2922                         ixgbe_reset_rx_queue(adapter, rxq);
2923                 }
2924         }
2925 }
2926
2927 void
2928 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2929 {
2930         unsigned i;
2931
2932         PMD_INIT_FUNC_TRACE();
2933
2934         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2935                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2936                 dev->data->rx_queues[i] = NULL;
2937         }
2938         dev->data->nb_rx_queues = 0;
2939
2940         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2941                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2942                 dev->data->tx_queues[i] = NULL;
2943         }
2944         dev->data->nb_tx_queues = 0;
2945 }
2946
2947 /*********************************************************************
2948  *
2949  *  Device RX/TX init functions
2950  *
2951  **********************************************************************/
2952
2953 /**
2954  * Receive Side Scaling (RSS)
2955  * See section 7.1.2.8 in the following document:
2956  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2957  *
2958  * Principles:
2959  * The source and destination IP addresses of the IP header and the source
2960  * and destination ports of TCP/UDP headers, if any, of received packets are
2961  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2962  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2963  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2964  * RSS output index which is used as the RX queue index where to store the
2965  * received packets.
2966  * The following output is supplied in the RX write-back descriptor:
2967  *     - 32-bit result of the Microsoft RSS hash function,
2968  *     - 4-bit RSS type field.
2969  */
2970
2971 /*
2972  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2973  * Used as the default key.
2974  */
2975 static uint8_t rss_intel_key[40] = {
2976         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2977         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2978         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2979         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2980         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2981 };
2982
2983 static void
2984 ixgbe_rss_disable(struct rte_eth_dev *dev)
2985 {
2986         struct ixgbe_hw *hw;
2987         uint32_t mrqc;
2988         uint32_t mrqc_reg;
2989
2990         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2991         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2992         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2993         mrqc &= ~IXGBE_MRQC_RSSEN;
2994         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2995 }
2996
2997 static void
2998 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2999 {
3000         uint8_t  *hash_key;
3001         uint32_t mrqc;
3002         uint32_t rss_key;
3003         uint64_t rss_hf;
3004         uint16_t i;
3005         uint32_t mrqc_reg;
3006         uint32_t rssrk_reg;
3007
3008         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3009         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3010
3011         hash_key = rss_conf->rss_key;
3012         if (hash_key != NULL) {
3013                 /* Fill in RSS hash key */
3014                 for (i = 0; i < 10; i++) {
3015                         rss_key  = hash_key[(i * 4)];
3016                         rss_key |= hash_key[(i * 4) + 1] << 8;
3017                         rss_key |= hash_key[(i * 4) + 2] << 16;
3018                         rss_key |= hash_key[(i * 4) + 3] << 24;
3019                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3020                 }
3021         }
3022
3023         /* Set configured hashing protocols in MRQC register */
3024         rss_hf = rss_conf->rss_hf;
3025         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3026         if (rss_hf & ETH_RSS_IPV4)
3027                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3028         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3029                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3030         if (rss_hf & ETH_RSS_IPV6)
3031                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3032         if (rss_hf & ETH_RSS_IPV6_EX)
3033                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3034         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3035                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3036         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3037                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3038         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3039                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3040         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3041                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3042         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3043                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3044         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3045 }
3046
3047 int
3048 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3049                           struct rte_eth_rss_conf *rss_conf)
3050 {
3051         struct ixgbe_hw *hw;
3052         uint32_t mrqc;
3053         uint64_t rss_hf;
3054         uint32_t mrqc_reg;
3055
3056         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3057
3058         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3059                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3060                         "NIC.");
3061                 return -ENOTSUP;
3062         }
3063         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3064
3065         /*
3066          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3067          *     "RSS enabling cannot be done dynamically while it must be
3068          *      preceded by a software reset"
3069          * Before changing anything, first check that the update RSS operation
3070          * does not attempt to disable RSS, if RSS was enabled at
3071          * initialization time, or does not attempt to enable RSS, if RSS was
3072          * disabled at initialization time.
3073          */
3074         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3075         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3076         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3077                 if (rss_hf != 0) /* Enable RSS */
3078                         return -(EINVAL);
3079                 return 0; /* Nothing to do */
3080         }
3081         /* RSS enabled */
3082         if (rss_hf == 0) /* Disable RSS */
3083                 return -(EINVAL);
3084         ixgbe_hw_rss_hash_set(hw, rss_conf);
3085         return 0;
3086 }
3087
3088 int
3089 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3090                             struct rte_eth_rss_conf *rss_conf)
3091 {
3092         struct ixgbe_hw *hw;
3093         uint8_t *hash_key;
3094         uint32_t mrqc;
3095         uint32_t rss_key;
3096         uint64_t rss_hf;
3097         uint16_t i;
3098         uint32_t mrqc_reg;
3099         uint32_t rssrk_reg;
3100
3101         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3102         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3103         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3104         hash_key = rss_conf->rss_key;
3105         if (hash_key != NULL) {
3106                 /* Return RSS hash key */
3107                 for (i = 0; i < 10; i++) {
3108                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3109                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3110                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3111                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3112                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3113                 }
3114         }
3115
3116         /* Get RSS functions configured in MRQC register */
3117         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3118         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3119                 rss_conf->rss_hf = 0;
3120                 return 0;
3121         }
3122         rss_hf = 0;
3123         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3124                 rss_hf |= ETH_RSS_IPV4;
3125         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3126                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3127         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3128                 rss_hf |= ETH_RSS_IPV6;
3129         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3130                 rss_hf |= ETH_RSS_IPV6_EX;
3131         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3132                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3133         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3134                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3135         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3136                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3137         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3138                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3139         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3140                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3141         rss_conf->rss_hf = rss_hf;
3142         return 0;
3143 }
3144
3145 static void
3146 ixgbe_rss_configure(struct rte_eth_dev *dev)
3147 {
3148         struct rte_eth_rss_conf rss_conf;
3149         struct ixgbe_hw *hw;
3150         uint32_t reta;
3151         uint16_t i;
3152         uint16_t j;
3153         uint16_t sp_reta_size;
3154         uint32_t reta_reg;
3155
3156         PMD_INIT_FUNC_TRACE();
3157         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3158
3159         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3160
3161         /*
3162          * Fill in redirection table
3163          * The byte-swap is needed because NIC registers are in
3164          * little-endian order.
3165          */
3166         reta = 0;
3167         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3168                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3169
3170                 if (j == dev->data->nb_rx_queues)
3171                         j = 0;
3172                 reta = (reta << 8) | j;
3173                 if ((i & 3) == 3)
3174                         IXGBE_WRITE_REG(hw, reta_reg,
3175                                         rte_bswap32(reta));
3176         }
3177
3178         /*
3179          * Configure the RSS key and the RSS protocols used to compute
3180          * the RSS hash of input packets.
3181          */
3182         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3183         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3184                 ixgbe_rss_disable(dev);
3185                 return;
3186         }
3187         if (rss_conf.rss_key == NULL)
3188                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3189         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3190 }
3191
3192 #define NUM_VFTA_REGISTERS 128
3193 #define NIC_RX_BUFFER_SIZE 0x200
3194 #define X550_RX_BUFFER_SIZE 0x180
3195
3196 static void
3197 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3198 {
3199         struct rte_eth_vmdq_dcb_conf *cfg;
3200         struct ixgbe_hw *hw;
3201         enum rte_eth_nb_pools num_pools;
3202         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3203         uint16_t pbsize;
3204         uint8_t nb_tcs; /* number of traffic classes */
3205         int i;
3206
3207         PMD_INIT_FUNC_TRACE();
3208         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3209         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3210         num_pools = cfg->nb_queue_pools;
3211         /* Check we have a valid number of pools */
3212         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3213                 ixgbe_rss_disable(dev);
3214                 return;
3215         }
3216         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3217         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3218
3219         /*
3220          * RXPBSIZE
3221          * split rx buffer up into sections, each for 1 traffic class
3222          */
3223         switch (hw->mac.type) {
3224         case ixgbe_mac_X550:
3225         case ixgbe_mac_X550EM_x:
3226         case ixgbe_mac_X550EM_a:
3227                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3228                 break;
3229         default:
3230                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3231                 break;
3232         }
3233         for (i = 0; i < nb_tcs; i++) {
3234                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3235
3236                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3237                 /* clear 10 bits. */
3238                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3239                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3240         }
3241         /* zero alloc all unused TCs */
3242         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3243                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3244
3245                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3246                 /* clear 10 bits. */
3247                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3248         }
3249
3250         /* MRQC: enable vmdq and dcb */
3251         mrqc = (num_pools == ETH_16_POOLS) ?
3252                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3253         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3254
3255         /* PFVTCTL: turn on virtualisation and set the default pool */
3256         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3257         if (cfg->enable_default_pool) {
3258                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3259         } else {
3260                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3261         }
3262
3263         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3264
3265         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3266         queue_mapping = 0;
3267         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3268                 /*
3269                  * mapping is done with 3 bits per priority,
3270                  * so shift by i*3 each time
3271                  */
3272                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3273
3274         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3275
3276         /* RTRPCS: DCB related */
3277         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3278
3279         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3280         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3281         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3282         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3283
3284         /* VFTA - enable all vlan filters */
3285         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3286                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3287         }
3288
3289         /* VFRE: pool enabling for receive - 16 or 32 */
3290         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3291                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3292
3293         /*
3294          * MPSAR - allow pools to read specific mac addresses
3295          * In this case, all pools should be able to read from mac addr 0
3296          */
3297         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3298         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3299
3300         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3301         for (i = 0; i < cfg->nb_pool_maps; i++) {
3302                 /* set vlan id in VF register and set the valid bit */
3303                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3304                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3305                 /*
3306                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3307                  * pools, we only need to use the first half of the register
3308                  * i.e. bits 0-31
3309                  */
3310                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3311         }
3312 }
3313
3314 /**
3315  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3316  * @dev: pointer to eth_dev structure
3317  * @dcb_config: pointer to ixgbe_dcb_config structure
3318  */
3319 static void
3320 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3321                        struct ixgbe_dcb_config *dcb_config)
3322 {
3323         uint32_t reg;
3324         uint32_t q;
3325         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3326
3327         PMD_INIT_FUNC_TRACE();
3328         if (hw->mac.type != ixgbe_mac_82598EB) {
3329                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3330                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3331                 reg |= IXGBE_RTTDCS_ARBDIS;
3332                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3333
3334                 /* Enable DCB for Tx with 8 TCs */
3335                 if (dcb_config->num_tcs.pg_tcs == 8) {
3336                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3337                 } else {
3338                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3339                 }
3340                 if (dcb_config->vt_mode)
3341                         reg |= IXGBE_MTQC_VT_ENA;
3342                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3343
3344                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3345                         /* Disable drop for all queues in VMDQ mode*/
3346                         for (q = 0; q < 128; q++)
3347                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3348                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3349                 } else {
3350                         /* Enable drop for all queues in SRIOV mode */
3351                         for (q = 0; q < 128; q++)
3352                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3353                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE));
3354                 }
3355
3356                 /* Enable the Tx desc arbiter */
3357                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3358                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3359                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3360
3361                 /* Enable Security TX Buffer IFG for DCB */
3362                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3363                 reg |= IXGBE_SECTX_DCB;
3364                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3365         }
3366 }
3367
3368 /**
3369  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3370  * @dev: pointer to rte_eth_dev structure
3371  * @dcb_config: pointer to ixgbe_dcb_config structure
3372  */
3373 static void
3374 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3375                         struct ixgbe_dcb_config *dcb_config)
3376 {
3377         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3378                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3379         struct ixgbe_hw *hw =
3380                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3381
3382         PMD_INIT_FUNC_TRACE();
3383         if (hw->mac.type != ixgbe_mac_82598EB)
3384                 /*PF VF Transmit Enable*/
3385                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3386                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3387
3388         /*Configure general DCB TX parameters*/
3389         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3390 }
3391
3392 static void
3393 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3394                         struct ixgbe_dcb_config *dcb_config)
3395 {
3396         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3397                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3398         struct ixgbe_dcb_tc_config *tc;
3399         uint8_t i, j;
3400
3401         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3402         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3403                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3404                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3405         } else {
3406                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3407                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3408         }
3409         /* User Priority to Traffic Class mapping */
3410         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3411                 j = vmdq_rx_conf->dcb_tc[i];
3412                 tc = &dcb_config->tc_config[j];
3413                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3414                                                 (uint8_t)(1 << j);
3415         }
3416 }
3417
3418 static void
3419 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3420                         struct ixgbe_dcb_config *dcb_config)
3421 {
3422         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3423                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3424         struct ixgbe_dcb_tc_config *tc;
3425         uint8_t i, j;
3426
3427         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3428         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3429                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3430                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3431         } else {
3432                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3433                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3434         }
3435
3436         /* User Priority to Traffic Class mapping */
3437         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3438                 j = vmdq_tx_conf->dcb_tc[i];
3439                 tc = &dcb_config->tc_config[j];
3440                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3441                                                 (uint8_t)(1 << j);
3442         }
3443 }
3444
3445 static void
3446 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3447                 struct ixgbe_dcb_config *dcb_config)
3448 {
3449         struct rte_eth_dcb_rx_conf *rx_conf =
3450                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3451         struct ixgbe_dcb_tc_config *tc;
3452         uint8_t i, j;
3453
3454         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3455         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3456
3457         /* User Priority to Traffic Class mapping */
3458         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3459                 j = rx_conf->dcb_tc[i];
3460                 tc = &dcb_config->tc_config[j];
3461                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3462                                                 (uint8_t)(1 << j);
3463         }
3464 }
3465
3466 static void
3467 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3468                 struct ixgbe_dcb_config *dcb_config)
3469 {
3470         struct rte_eth_dcb_tx_conf *tx_conf =
3471                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3472         struct ixgbe_dcb_tc_config *tc;
3473         uint8_t i, j;
3474
3475         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3476         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3477
3478         /* User Priority to Traffic Class mapping */
3479         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3480                 j = tx_conf->dcb_tc[i];
3481                 tc = &dcb_config->tc_config[j];
3482                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3483                                                 (uint8_t)(1 << j);
3484         }
3485 }
3486
3487 /**
3488  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3489  * @hw: pointer to hardware structure
3490  * @dcb_config: pointer to ixgbe_dcb_config structure
3491  */
3492 static void
3493 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3494                struct ixgbe_dcb_config *dcb_config)
3495 {
3496         uint32_t reg;
3497         uint32_t vlanctrl;
3498         uint8_t i;
3499
3500         PMD_INIT_FUNC_TRACE();
3501         /*
3502          * Disable the arbiter before changing parameters
3503          * (always enable recycle mode; WSP)
3504          */
3505         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3506         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3507
3508         if (hw->mac.type != ixgbe_mac_82598EB) {
3509                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3510                 if (dcb_config->num_tcs.pg_tcs == 4) {
3511                         if (dcb_config->vt_mode)
3512                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3513                                         IXGBE_MRQC_VMDQRT4TCEN;
3514                         else {
3515                                 /* no matter the mode is DCB or DCB_RSS, just
3516                                  * set the MRQE to RSSXTCEN. RSS is controlled
3517                                  * by RSS_FIELD
3518                                  */
3519                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3520                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3521                                         IXGBE_MRQC_RTRSS4TCEN;
3522                         }
3523                 }
3524                 if (dcb_config->num_tcs.pg_tcs == 8) {
3525                         if (dcb_config->vt_mode)
3526                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3527                                         IXGBE_MRQC_VMDQRT8TCEN;
3528                         else {
3529                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3530                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3531                                         IXGBE_MRQC_RTRSS8TCEN;
3532                         }
3533                 }
3534
3535                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3536         }
3537
3538         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3539         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3540         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3541         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3542
3543         /* VFTA - enable all vlan filters */
3544         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3545                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3546         }
3547
3548         /*
3549          * Configure Rx packet plane (recycle mode; WSP) and
3550          * enable arbiter
3551          */
3552         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3553         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3554 }
3555
3556 static void
3557 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3558                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3559 {
3560         switch (hw->mac.type) {
3561         case ixgbe_mac_82598EB:
3562                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3563                 break;
3564         case ixgbe_mac_82599EB:
3565         case ixgbe_mac_X540:
3566         case ixgbe_mac_X550:
3567         case ixgbe_mac_X550EM_x:
3568         case ixgbe_mac_X550EM_a:
3569                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3570                                                   tsa, map);
3571                 break;
3572         default:
3573                 break;
3574         }
3575 }
3576
3577 static void
3578 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3579                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3580 {
3581         switch (hw->mac.type) {
3582         case ixgbe_mac_82598EB:
3583                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3584                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3585                 break;
3586         case ixgbe_mac_82599EB:
3587         case ixgbe_mac_X540:
3588         case ixgbe_mac_X550:
3589         case ixgbe_mac_X550EM_x:
3590         case ixgbe_mac_X550EM_a:
3591                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3592                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3593                 break;
3594         default:
3595                 break;
3596         }
3597 }
3598
3599 #define DCB_RX_CONFIG  1
3600 #define DCB_TX_CONFIG  1
3601 #define DCB_TX_PB      1024
3602 /**
3603  * ixgbe_dcb_hw_configure - Enable DCB and configure
3604  * general DCB in VT mode and non-VT mode parameters
3605  * @dev: pointer to rte_eth_dev structure
3606  * @dcb_config: pointer to ixgbe_dcb_config structure
3607  */
3608 static int
3609 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3610                         struct ixgbe_dcb_config *dcb_config)
3611 {
3612         int     ret = 0;
3613         uint8_t i, pfc_en, nb_tcs;
3614         uint16_t pbsize, rx_buffer_size;
3615         uint8_t config_dcb_rx = 0;
3616         uint8_t config_dcb_tx = 0;
3617         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3618         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3619         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3620         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3621         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3622         struct ixgbe_dcb_tc_config *tc;
3623         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3624         struct ixgbe_hw *hw =
3625                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3626
3627         switch (dev->data->dev_conf.rxmode.mq_mode) {
3628         case ETH_MQ_RX_VMDQ_DCB:
3629                 dcb_config->vt_mode = true;
3630                 if (hw->mac.type != ixgbe_mac_82598EB) {
3631                         config_dcb_rx = DCB_RX_CONFIG;
3632                         /*
3633                          *get dcb and VT rx configuration parameters
3634                          *from rte_eth_conf
3635                          */
3636                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3637                         /*Configure general VMDQ and DCB RX parameters*/
3638                         ixgbe_vmdq_dcb_configure(dev);
3639                 }
3640                 break;
3641         case ETH_MQ_RX_DCB:
3642         case ETH_MQ_RX_DCB_RSS:
3643                 dcb_config->vt_mode = false;
3644                 config_dcb_rx = DCB_RX_CONFIG;
3645                 /* Get dcb TX configuration parameters from rte_eth_conf */
3646                 ixgbe_dcb_rx_config(dev, dcb_config);
3647                 /*Configure general DCB RX parameters*/
3648                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3649                 break;
3650         default:
3651                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3652                 break;
3653         }
3654         switch (dev->data->dev_conf.txmode.mq_mode) {
3655         case ETH_MQ_TX_VMDQ_DCB:
3656                 dcb_config->vt_mode = true;
3657                 config_dcb_tx = DCB_TX_CONFIG;
3658                 /* get DCB and VT TX configuration parameters
3659                  * from rte_eth_conf
3660                  */
3661                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3662                 /*Configure general VMDQ and DCB TX parameters*/
3663                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3664                 break;
3665
3666         case ETH_MQ_TX_DCB:
3667                 dcb_config->vt_mode = false;
3668                 config_dcb_tx = DCB_TX_CONFIG;
3669                 /*get DCB TX configuration parameters from rte_eth_conf*/
3670                 ixgbe_dcb_tx_config(dev, dcb_config);
3671                 /*Configure general DCB TX parameters*/
3672                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3673                 break;
3674         default:
3675                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3676                 break;
3677         }
3678
3679         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3680         /* Unpack map */
3681         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3682         if (nb_tcs == ETH_4_TCS) {
3683                 /* Avoid un-configured priority mapping to TC0 */
3684                 uint8_t j = 4;
3685                 uint8_t mask = 0xFF;
3686
3687                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3688                         mask = (uint8_t)(mask & (~(1 << map[i])));
3689                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3690                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3691                                 map[j++] = i;
3692                         mask >>= 1;
3693                 }
3694                 /* Re-configure 4 TCs BW */
3695                 for (i = 0; i < nb_tcs; i++) {
3696                         tc = &dcb_config->tc_config[i];
3697                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3698                                                 (uint8_t)(100 / nb_tcs);
3699                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3700                                                 (uint8_t)(100 / nb_tcs);
3701                 }
3702                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3703                         tc = &dcb_config->tc_config[i];
3704                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3705                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3706                 }
3707         }
3708
3709         switch (hw->mac.type) {
3710         case ixgbe_mac_X550:
3711         case ixgbe_mac_X550EM_x:
3712         case ixgbe_mac_X550EM_a:
3713                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3714                 break;
3715         default:
3716                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3717                 break;
3718         }
3719
3720         if (config_dcb_rx) {
3721                 /* Set RX buffer size */
3722                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3723                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3724
3725                 for (i = 0; i < nb_tcs; i++) {
3726                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3727                 }
3728                 /* zero alloc all unused TCs */
3729                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3730                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3731                 }
3732         }
3733         if (config_dcb_tx) {
3734                 /* Only support an equally distributed
3735                  *  Tx packet buffer strategy.
3736                  */
3737                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3738                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3739
3740                 for (i = 0; i < nb_tcs; i++) {
3741                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3742                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3743                 }
3744                 /* Clear unused TCs, if any, to zero buffer size*/
3745                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3746                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3747                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3748                 }
3749         }
3750
3751         /*Calculates traffic class credits*/
3752         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3753                                 IXGBE_DCB_TX_CONFIG);
3754         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3755                                 IXGBE_DCB_RX_CONFIG);
3756
3757         if (config_dcb_rx) {
3758                 /* Unpack CEE standard containers */
3759                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3760                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3761                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3762                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3763                 /* Configure PG(ETS) RX */
3764                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3765         }
3766
3767         if (config_dcb_tx) {
3768                 /* Unpack CEE standard containers */
3769                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3770                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3771                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3772                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3773                 /* Configure PG(ETS) TX */
3774                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3775         }
3776
3777         /*Configure queue statistics registers*/
3778         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3779
3780         /* Check if the PFC is supported */
3781         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3782                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3783                 for (i = 0; i < nb_tcs; i++) {
3784                         /*
3785                         * If the TC count is 8,and the default high_water is 48,
3786                         * the low_water is 16 as default.
3787                         */
3788                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3789                         hw->fc.low_water[i] = pbsize / 4;
3790                         /* Enable pfc for this TC */
3791                         tc = &dcb_config->tc_config[i];
3792                         tc->pfc = ixgbe_dcb_pfc_enabled;
3793                 }
3794                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3795                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3796                         pfc_en &= 0x0F;
3797                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3798         }
3799
3800         return ret;
3801 }
3802
3803 /**
3804  * ixgbe_configure_dcb - Configure DCB  Hardware
3805  * @dev: pointer to rte_eth_dev
3806  */
3807 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3808 {
3809         struct ixgbe_dcb_config *dcb_cfg =
3810                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3811         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3812
3813         PMD_INIT_FUNC_TRACE();
3814
3815         /* check support mq_mode for DCB */
3816         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3817             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3818             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3819                 return;
3820
3821         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3822                 return;
3823
3824         /** Configure DCB hardware **/
3825         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3826 }
3827
3828 /*
3829  * VMDq only support for 10 GbE NIC.
3830  */
3831 static void
3832 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3833 {
3834         struct rte_eth_vmdq_rx_conf *cfg;
3835         struct ixgbe_hw *hw;
3836         enum rte_eth_nb_pools num_pools;
3837         uint32_t mrqc, vt_ctl, vlanctrl;
3838         uint32_t vmolr = 0;
3839         int i;
3840
3841         PMD_INIT_FUNC_TRACE();
3842         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3843         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3844         num_pools = cfg->nb_queue_pools;
3845
3846         ixgbe_rss_disable(dev);
3847
3848         /* MRQC: enable vmdq */
3849         mrqc = IXGBE_MRQC_VMDQEN;
3850         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3851
3852         /* PFVTCTL: turn on virtualisation and set the default pool */
3853         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3854         if (cfg->enable_default_pool)
3855                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3856         else
3857                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3858
3859         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3860
3861         for (i = 0; i < (int)num_pools; i++) {
3862                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3863                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3864         }
3865
3866         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3867         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3868         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3869         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3870
3871         /* VFTA - enable all vlan filters */
3872         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3873                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3874
3875         /* VFRE: pool enabling for receive - 64 */
3876         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3877         if (num_pools == ETH_64_POOLS)
3878                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3879
3880         /*
3881          * MPSAR - allow pools to read specific mac addresses
3882          * In this case, all pools should be able to read from mac addr 0
3883          */
3884         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3885         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3886
3887         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3888         for (i = 0; i < cfg->nb_pool_maps; i++) {
3889                 /* set vlan id in VF register and set the valid bit */
3890                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3891                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3892                 /*
3893                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3894                  * pools, we only need to use the first half of the register
3895                  * i.e. bits 0-31
3896                  */
3897                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3898                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3899                                         (cfg->pool_map[i].pools & UINT32_MAX));
3900                 else
3901                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3902                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3903
3904         }
3905
3906         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3907         if (cfg->enable_loop_back) {
3908                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3909                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3910                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3911         }
3912
3913         IXGBE_WRITE_FLUSH(hw);
3914 }
3915
3916 /*
3917  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3918  * @hw: pointer to hardware structure
3919  */
3920 static void
3921 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3922 {
3923         uint32_t reg;
3924         uint32_t q;
3925
3926         PMD_INIT_FUNC_TRACE();
3927         /*PF VF Transmit Enable*/
3928         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3929         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3930
3931         /* Disable the Tx desc arbiter so that MTQC can be changed */
3932         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3933         reg |= IXGBE_RTTDCS_ARBDIS;
3934         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3935
3936         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3937         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3938
3939         /* Disable drop for all queues */
3940         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3941                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3942                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3943
3944         /* Enable the Tx desc arbiter */
3945         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3946         reg &= ~IXGBE_RTTDCS_ARBDIS;
3947         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3948
3949         IXGBE_WRITE_FLUSH(hw);
3950 }
3951
3952 static int __attribute__((cold))
3953 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3954 {
3955         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3956         uint64_t dma_addr;
3957         unsigned int i;
3958
3959         /* Initialize software ring entries */
3960         for (i = 0; i < rxq->nb_rx_desc; i++) {
3961                 volatile union ixgbe_adv_rx_desc *rxd;
3962                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
3963
3964                 if (mbuf == NULL) {
3965                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3966                                      (unsigned) rxq->queue_id);
3967                         return -ENOMEM;
3968                 }
3969
3970                 rte_mbuf_refcnt_set(mbuf, 1);
3971                 mbuf->next = NULL;
3972                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3973                 mbuf->nb_segs = 1;
3974                 mbuf->port = rxq->port_id;
3975
3976                 dma_addr =
3977                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
3978                 rxd = &rxq->rx_ring[i];
3979                 rxd->read.hdr_addr = 0;
3980                 rxd->read.pkt_addr = dma_addr;
3981                 rxe[i].mbuf = mbuf;
3982         }
3983
3984         return 0;
3985 }
3986
3987 static int
3988 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3989 {
3990         struct ixgbe_hw *hw;
3991         uint32_t mrqc;
3992
3993         ixgbe_rss_configure(dev);
3994
3995         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3996
3997         /* MRQC: enable VF RSS */
3998         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3999         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4000         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4001         case ETH_64_POOLS:
4002                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4003                 break;
4004
4005         case ETH_32_POOLS:
4006                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4007                 break;
4008
4009         default:
4010                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4011                 return -EINVAL;
4012         }
4013
4014         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4015
4016         return 0;
4017 }
4018
4019 static int
4020 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4021 {
4022         struct ixgbe_hw *hw =
4023                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4024
4025         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4026         case ETH_64_POOLS:
4027                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4028                         IXGBE_MRQC_VMDQEN);
4029                 break;
4030
4031         case ETH_32_POOLS:
4032                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4033                         IXGBE_MRQC_VMDQRT4TCEN);
4034                 break;
4035
4036         case ETH_16_POOLS:
4037                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4038                         IXGBE_MRQC_VMDQRT8TCEN);
4039                 break;
4040         default:
4041                 PMD_INIT_LOG(ERR,
4042                         "invalid pool number in IOV mode");
4043                 break;
4044         }
4045         return 0;
4046 }
4047
4048 static int
4049 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4050 {
4051         struct ixgbe_hw *hw =
4052                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4053
4054         if (hw->mac.type == ixgbe_mac_82598EB)
4055                 return 0;
4056
4057         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4058                 /*
4059                  * SRIOV inactive scheme
4060                  * any DCB/RSS w/o VMDq multi-queue setting
4061                  */
4062                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4063                 case ETH_MQ_RX_RSS:
4064                 case ETH_MQ_RX_DCB_RSS:
4065                 case ETH_MQ_RX_VMDQ_RSS:
4066                         ixgbe_rss_configure(dev);
4067                         break;
4068
4069                 case ETH_MQ_RX_VMDQ_DCB:
4070                         ixgbe_vmdq_dcb_configure(dev);
4071                         break;
4072
4073                 case ETH_MQ_RX_VMDQ_ONLY:
4074                         ixgbe_vmdq_rx_hw_configure(dev);
4075                         break;
4076
4077                 case ETH_MQ_RX_NONE:
4078                 default:
4079                         /* if mq_mode is none, disable rss mode.*/
4080                         ixgbe_rss_disable(dev);
4081                         break;
4082                 }
4083         } else {
4084                 /*
4085                  * SRIOV active scheme
4086                  * Support RSS together with VMDq & SRIOV
4087                  */
4088                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4089                 case ETH_MQ_RX_RSS:
4090                 case ETH_MQ_RX_VMDQ_RSS:
4091                         ixgbe_config_vf_rss(dev);
4092                         break;
4093                 case ETH_MQ_RX_VMDQ_DCB:
4094                         ixgbe_vmdq_dcb_configure(dev);
4095                         break;
4096                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
4097                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4098                         PMD_INIT_LOG(ERR,
4099                                 "Could not support DCB/RSS with VMDq & SRIOV");
4100                         return -1;
4101                 default:
4102                         ixgbe_config_vf_default(dev);
4103                         break;
4104                 }
4105         }
4106
4107         return 0;
4108 }
4109
4110 static int
4111 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4112 {
4113         struct ixgbe_hw *hw =
4114                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4115         uint32_t mtqc;
4116         uint32_t rttdcs;
4117
4118         if (hw->mac.type == ixgbe_mac_82598EB)
4119                 return 0;
4120
4121         /* disable arbiter before setting MTQC */
4122         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4123         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4124         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4125
4126         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4127                 /*
4128                  * SRIOV inactive scheme
4129                  * any DCB w/o VMDq multi-queue setting
4130                  */
4131                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4132                         ixgbe_vmdq_tx_hw_configure(hw);
4133                 else {
4134                         mtqc = IXGBE_MTQC_64Q_1PB;
4135                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4136                 }
4137         } else {
4138                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4139
4140                 /*
4141                  * SRIOV active scheme
4142                  * FIXME if support DCB together with VMDq & SRIOV
4143                  */
4144                 case ETH_64_POOLS:
4145                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4146                         break;
4147                 case ETH_32_POOLS:
4148                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4149                         break;
4150                 case ETH_16_POOLS:
4151                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4152                                 IXGBE_MTQC_8TC_8TQ;
4153                         break;
4154                 default:
4155                         mtqc = IXGBE_MTQC_64Q_1PB;
4156                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4157                 }
4158                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4159         }
4160
4161         /* re-enable arbiter */
4162         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4163         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4164
4165         return 0;
4166 }
4167
4168 /**
4169  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4170  *
4171  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4172  * spec rev. 3.0 chapter 8.2.3.8.13.
4173  *
4174  * @pool Memory pool of the Rx queue
4175  */
4176 static inline uint32_t
4177 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4178 {
4179         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4180
4181         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4182         uint16_t maxdesc =
4183                 IPV4_MAX_PKT_LEN /
4184                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4185
4186         if (maxdesc >= 16)
4187                 return IXGBE_RSCCTL_MAXDESC_16;
4188         else if (maxdesc >= 8)
4189                 return IXGBE_RSCCTL_MAXDESC_8;
4190         else if (maxdesc >= 4)
4191                 return IXGBE_RSCCTL_MAXDESC_4;
4192         else
4193                 return IXGBE_RSCCTL_MAXDESC_1;
4194 }
4195
4196 /**
4197  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4198  * interrupt
4199  *
4200  * (Taken from FreeBSD tree)
4201  * (yes this is all very magic and confusing :)
4202  *
4203  * @dev port handle
4204  * @entry the register array entry
4205  * @vector the MSIX vector for this queue
4206  * @type RX/TX/MISC
4207  */
4208 static void
4209 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4210 {
4211         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4212         u32 ivar, index;
4213
4214         vector |= IXGBE_IVAR_ALLOC_VAL;
4215
4216         switch (hw->mac.type) {
4217
4218         case ixgbe_mac_82598EB:
4219                 if (type == -1)
4220                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4221                 else
4222                         entry += (type * 64);
4223                 index = (entry >> 2) & 0x1F;
4224                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4225                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4226                 ivar |= (vector << (8 * (entry & 0x3)));
4227                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4228                 break;
4229
4230         case ixgbe_mac_82599EB:
4231         case ixgbe_mac_X540:
4232                 if (type == -1) { /* MISC IVAR */
4233                         index = (entry & 1) * 8;
4234                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4235                         ivar &= ~(0xFF << index);
4236                         ivar |= (vector << index);
4237                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4238                 } else {        /* RX/TX IVARS */
4239                         index = (16 * (entry & 1)) + (8 * type);
4240                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4241                         ivar &= ~(0xFF << index);
4242                         ivar |= (vector << index);
4243                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4244                 }
4245
4246                 break;
4247
4248         default:
4249                 break;
4250         }
4251 }
4252
4253 void __attribute__((cold))
4254 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4255 {
4256         uint16_t i, rx_using_sse;
4257         struct ixgbe_adapter *adapter =
4258                 (struct ixgbe_adapter *)dev->data->dev_private;
4259
4260         /*
4261          * In order to allow Vector Rx there are a few configuration
4262          * conditions to be met and Rx Bulk Allocation should be allowed.
4263          */
4264         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4265             !adapter->rx_bulk_alloc_allowed) {
4266                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4267                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4268                                     "not enabled",
4269                              dev->data->port_id);
4270
4271                 adapter->rx_vec_allowed = false;
4272         }
4273
4274         /*
4275          * Initialize the appropriate LRO callback.
4276          *
4277          * If all queues satisfy the bulk allocation preconditions
4278          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4279          * Otherwise use a single allocation version.
4280          */
4281         if (dev->data->lro) {
4282                 if (adapter->rx_bulk_alloc_allowed) {
4283                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4284                                            "allocation version");
4285                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4286                 } else {
4287                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4288                                            "allocation version");
4289                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4290                 }
4291         } else if (dev->data->scattered_rx) {
4292                 /*
4293                  * Set the non-LRO scattered callback: there are Vector and
4294                  * single allocation versions.
4295                  */
4296                 if (adapter->rx_vec_allowed) {
4297                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4298                                             "callback (port=%d).",
4299                                      dev->data->port_id);
4300
4301                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4302                 } else if (adapter->rx_bulk_alloc_allowed) {
4303                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4304                                            "allocation callback (port=%d).",
4305                                      dev->data->port_id);
4306                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4307                 } else {
4308                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4309                                             "single allocation) "
4310                                             "Scattered Rx callback "
4311                                             "(port=%d).",
4312                                      dev->data->port_id);
4313
4314                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4315                 }
4316         /*
4317          * Below we set "simple" callbacks according to port/queues parameters.
4318          * If parameters allow we are going to choose between the following
4319          * callbacks:
4320          *    - Vector
4321          *    - Bulk Allocation
4322          *    - Single buffer allocation (the simplest one)
4323          */
4324         } else if (adapter->rx_vec_allowed) {
4325                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4326                                     "burst size no less than %d (port=%d).",
4327                              RTE_IXGBE_DESCS_PER_LOOP,
4328                              dev->data->port_id);
4329
4330                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4331         } else if (adapter->rx_bulk_alloc_allowed) {
4332                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4333                                     "satisfied. Rx Burst Bulk Alloc function "
4334                                     "will be used on port=%d.",
4335                              dev->data->port_id);
4336
4337                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4338         } else {
4339                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4340                                     "satisfied, or Scattered Rx is requested "
4341                                     "(port=%d).",
4342                              dev->data->port_id);
4343
4344                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4345         }
4346
4347         /* Propagate information about RX function choice through all queues. */
4348
4349         rx_using_sse =
4350                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4351                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4352
4353         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4354                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4355
4356                 rxq->rx_using_sse = rx_using_sse;
4357         }
4358 }
4359
4360 /**
4361  * ixgbe_set_rsc - configure RSC related port HW registers
4362  *
4363  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4364  * of 82599 Spec (x540 configuration is virtually the same).
4365  *
4366  * @dev port handle
4367  *
4368  * Returns 0 in case of success or a non-zero error code
4369  */
4370 static int
4371 ixgbe_set_rsc(struct rte_eth_dev *dev)
4372 {
4373         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4374         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4375         struct rte_eth_dev_info dev_info = { 0 };
4376         bool rsc_capable = false;
4377         uint16_t i;
4378         uint32_t rdrxctl;
4379
4380         /* Sanity check */
4381         dev->dev_ops->dev_infos_get(dev, &dev_info);
4382         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4383                 rsc_capable = true;
4384
4385         if (!rsc_capable && rx_conf->enable_lro) {
4386                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4387                                    "support it");
4388                 return -EINVAL;
4389         }
4390
4391         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4392
4393         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4394                 /*
4395                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4396                  * 3.0 RSC configuration requires HW CRC stripping being
4397                  * enabled. If user requested both HW CRC stripping off
4398                  * and RSC on - return an error.
4399                  */
4400                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4401                                     "is disabled");
4402                 return -EINVAL;
4403         }
4404
4405         /* RFCTL configuration  */
4406         if (rsc_capable) {
4407                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4408
4409                 if (rx_conf->enable_lro)
4410                         /*
4411                          * Since NFS packets coalescing is not supported - clear
4412                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4413                          * enabled.
4414                          */
4415                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4416                                    IXGBE_RFCTL_NFSR_DIS);
4417                 else
4418                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4419
4420                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4421         }
4422
4423         /* If LRO hasn't been requested - we are done here. */
4424         if (!rx_conf->enable_lro)
4425                 return 0;
4426
4427         /* Set RDRXCTL.RSCACKC bit */
4428         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4429         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4430         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4431
4432         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4433         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4434                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4435                 uint32_t srrctl =
4436                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4437                 uint32_t rscctl =
4438                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4439                 uint32_t psrtype =
4440                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4441                 uint32_t eitr =
4442                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4443
4444                 /*
4445                  * ixgbe PMD doesn't support header-split at the moment.
4446                  *
4447                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4448                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4449                  * should be configured even if header split is not
4450                  * enabled. We will configure it 128 bytes following the
4451                  * recommendation in the spec.
4452                  */
4453                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4454                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4455                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4456
4457                 /*
4458                  * TODO: Consider setting the Receive Descriptor Minimum
4459                  * Threshold Size for an RSC case. This is not an obviously
4460                  * beneficiary option but the one worth considering...
4461                  */
4462
4463                 rscctl |= IXGBE_RSCCTL_RSCEN;
4464                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4465                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4466
4467                 /*
4468                  * RSC: Set ITR interval corresponding to 2K ints/s.
4469                  *
4470                  * Full-sized RSC aggregations for a 10Gb/s link will
4471                  * arrive at about 20K aggregation/s rate.
4472                  *
4473                  * 2K inst/s rate will make only 10% of the
4474                  * aggregations to be closed due to the interrupt timer
4475                  * expiration for a streaming at wire-speed case.
4476                  *
4477                  * For a sparse streaming case this setting will yield
4478                  * at most 500us latency for a single RSC aggregation.
4479                  */
4480                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4481                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4482
4483                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4484                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4485                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4486                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4487
4488                 /*
4489                  * RSC requires the mapping of the queue to the
4490                  * interrupt vector.
4491                  */
4492                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4493         }
4494
4495         dev->data->lro = 1;
4496
4497         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4498
4499         return 0;
4500 }
4501
4502 /*
4503  * Initializes Receive Unit.
4504  */
4505 int __attribute__((cold))
4506 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4507 {
4508         struct ixgbe_hw     *hw;
4509         struct ixgbe_rx_queue *rxq;
4510         uint64_t bus_addr;
4511         uint32_t rxctrl;
4512         uint32_t fctrl;
4513         uint32_t hlreg0;
4514         uint32_t maxfrs;
4515         uint32_t srrctl;
4516         uint32_t rdrxctl;
4517         uint32_t rxcsum;
4518         uint16_t buf_size;
4519         uint16_t i;
4520         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4521         int rc;
4522
4523         PMD_INIT_FUNC_TRACE();
4524         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4525
4526         /*
4527          * Make sure receives are disabled while setting
4528          * up the RX context (registers, descriptor rings, etc.).
4529          */
4530         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4531         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4532
4533         /* Enable receipt of broadcasted frames */
4534         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4535         fctrl |= IXGBE_FCTRL_BAM;
4536         fctrl |= IXGBE_FCTRL_DPF;
4537         fctrl |= IXGBE_FCTRL_PMCF;
4538         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4539
4540         /*
4541          * Configure CRC stripping, if any.
4542          */
4543         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4544         if (rx_conf->hw_strip_crc)
4545                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4546         else
4547                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4548
4549         /*
4550          * Configure jumbo frame support, if any.
4551          */
4552         if (rx_conf->jumbo_frame == 1) {
4553                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4554                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4555                 maxfrs &= 0x0000FFFF;
4556                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4557                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4558         } else
4559                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4560
4561         /*
4562          * If loopback mode is configured for 82599, set LPBK bit.
4563          */
4564         if (hw->mac.type == ixgbe_mac_82599EB &&
4565                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4566                 hlreg0 |= IXGBE_HLREG0_LPBK;
4567         else
4568                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4569
4570         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4571
4572         /* Setup RX queues */
4573         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4574                 rxq = dev->data->rx_queues[i];
4575
4576                 /*
4577                  * Reset crc_len in case it was changed after queue setup by a
4578                  * call to configure.
4579                  */
4580                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4581
4582                 /* Setup the Base and Length of the Rx Descriptor Rings */
4583                 bus_addr = rxq->rx_ring_phys_addr;
4584                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4585                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4586                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4587                                 (uint32_t)(bus_addr >> 32));
4588                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4589                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4590                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4591                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4592
4593                 /* Configure the SRRCTL register */
4594 #ifdef RTE_HEADER_SPLIT_ENABLE
4595                 /*
4596                  * Configure Header Split
4597                  */
4598                 if (rx_conf->header_split) {
4599                         if (hw->mac.type == ixgbe_mac_82599EB) {
4600                                 /* Must setup the PSRTYPE register */
4601                                 uint32_t psrtype;
4602
4603                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4604                                         IXGBE_PSRTYPE_UDPHDR   |
4605                                         IXGBE_PSRTYPE_IPV4HDR  |
4606                                         IXGBE_PSRTYPE_IPV6HDR;
4607                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4608                         }
4609                         srrctl = ((rx_conf->split_hdr_size <<
4610                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4611                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4612                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4613                 } else
4614 #endif
4615                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4616
4617                 /* Set if packets are dropped when no descriptors available */
4618                 if (rxq->drop_en)
4619                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4620
4621                 /*
4622                  * Configure the RX buffer size in the BSIZEPACKET field of
4623                  * the SRRCTL register of the queue.
4624                  * The value is in 1 KB resolution. Valid values can be from
4625                  * 1 KB to 16 KB.
4626                  */
4627                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4628                         RTE_PKTMBUF_HEADROOM);
4629                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4630                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4631
4632                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4633
4634                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4635                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4636
4637                 /* It adds dual VLAN length for supporting dual VLAN */
4638                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4639                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4640                         dev->data->scattered_rx = 1;
4641         }
4642
4643         if (rx_conf->enable_scatter)
4644                 dev->data->scattered_rx = 1;
4645
4646         /*
4647          * Device configured with multiple RX queues.
4648          */
4649         ixgbe_dev_mq_rx_configure(dev);
4650
4651         /*
4652          * Setup the Checksum Register.
4653          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4654          * Enable IP/L4 checkum computation by hardware if requested to do so.
4655          */
4656         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4657         rxcsum |= IXGBE_RXCSUM_PCSD;
4658         if (rx_conf->hw_ip_checksum)
4659                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4660         else
4661                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4662
4663         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4664
4665         if (hw->mac.type == ixgbe_mac_82599EB ||
4666             hw->mac.type == ixgbe_mac_X540) {
4667                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4668                 if (rx_conf->hw_strip_crc)
4669                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4670                 else
4671                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4672                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4673                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4674         }
4675
4676         rc = ixgbe_set_rsc(dev);
4677         if (rc)
4678                 return rc;
4679
4680         ixgbe_set_rx_function(dev);
4681
4682         return 0;
4683 }
4684
4685 /*
4686  * Initializes Transmit Unit.
4687  */
4688 void __attribute__((cold))
4689 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4690 {
4691         struct ixgbe_hw     *hw;
4692         struct ixgbe_tx_queue *txq;
4693         uint64_t bus_addr;
4694         uint32_t hlreg0;
4695         uint32_t txctrl;
4696         uint16_t i;
4697
4698         PMD_INIT_FUNC_TRACE();
4699         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4700
4701         /* Enable TX CRC (checksum offload requirement) and hw padding
4702          * (TSO requirement)
4703          */
4704         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4705         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4706         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4707
4708         /* Setup the Base and Length of the Tx Descriptor Rings */
4709         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4710                 txq = dev->data->tx_queues[i];
4711
4712                 bus_addr = txq->tx_ring_phys_addr;
4713                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4714                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4715                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4716                                 (uint32_t)(bus_addr >> 32));
4717                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4718                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4719                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4720                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4721                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4722
4723                 /*
4724                  * Disable Tx Head Writeback RO bit, since this hoses
4725                  * bookkeeping if things aren't delivered in order.
4726                  */
4727                 switch (hw->mac.type) {
4728                 case ixgbe_mac_82598EB:
4729                         txctrl = IXGBE_READ_REG(hw,
4730                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4731                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4732                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4733                                         txctrl);
4734                         break;
4735
4736                 case ixgbe_mac_82599EB:
4737                 case ixgbe_mac_X540:
4738                 case ixgbe_mac_X550:
4739                 case ixgbe_mac_X550EM_x:
4740                 case ixgbe_mac_X550EM_a:
4741                 default:
4742                         txctrl = IXGBE_READ_REG(hw,
4743                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4744                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4745                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4746                                         txctrl);
4747                         break;
4748                 }
4749         }
4750
4751         /* Device configured with multiple TX queues. */
4752         ixgbe_dev_mq_tx_configure(dev);
4753 }
4754
4755 /*
4756  * Set up link for 82599 loopback mode Tx->Rx.
4757  */
4758 static inline void __attribute__((cold))
4759 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4760 {
4761         PMD_INIT_FUNC_TRACE();
4762
4763         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4764                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4765                                 IXGBE_SUCCESS) {
4766                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4767                         /* ignore error */
4768                         return;
4769                 }
4770         }
4771
4772         /* Restart link */
4773         IXGBE_WRITE_REG(hw,
4774                         IXGBE_AUTOC,
4775                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4776         ixgbe_reset_pipeline_82599(hw);
4777
4778         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4779         msec_delay(50);
4780 }
4781
4782
4783 /*
4784  * Start Transmit and Receive Units.
4785  */
4786 int __attribute__((cold))
4787 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4788 {
4789         struct ixgbe_hw     *hw;
4790         struct ixgbe_tx_queue *txq;
4791         struct ixgbe_rx_queue *rxq;
4792         uint32_t txdctl;
4793         uint32_t dmatxctl;
4794         uint32_t rxctrl;
4795         uint16_t i;
4796         int ret = 0;
4797
4798         PMD_INIT_FUNC_TRACE();
4799         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4800
4801         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4802                 txq = dev->data->tx_queues[i];
4803                 /* Setup Transmit Threshold Registers */
4804                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4805                 txdctl |= txq->pthresh & 0x7F;
4806                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4807                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4808                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4809         }
4810
4811         if (hw->mac.type != ixgbe_mac_82598EB) {
4812                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4813                 dmatxctl |= IXGBE_DMATXCTL_TE;
4814                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4815         }
4816
4817         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4818                 txq = dev->data->tx_queues[i];
4819                 if (!txq->tx_deferred_start) {
4820                         ret = ixgbe_dev_tx_queue_start(dev, i);
4821                         if (ret < 0)
4822                                 return ret;
4823                 }
4824         }
4825
4826         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4827                 rxq = dev->data->rx_queues[i];
4828                 if (!rxq->rx_deferred_start) {
4829                         ret = ixgbe_dev_rx_queue_start(dev, i);
4830                         if (ret < 0)
4831                                 return ret;
4832                 }
4833         }
4834
4835         /* Enable Receive engine */
4836         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4837         if (hw->mac.type == ixgbe_mac_82598EB)
4838                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4839         rxctrl |= IXGBE_RXCTRL_RXEN;
4840         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4841
4842         /* If loopback mode is enabled for 82599, set up the link accordingly */
4843         if (hw->mac.type == ixgbe_mac_82599EB &&
4844                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4845                 ixgbe_setup_loopback_link_82599(hw);
4846
4847         return 0;
4848 }
4849
4850 /*
4851  * Start Receive Units for specified queue.
4852  */
4853 int __attribute__((cold))
4854 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4855 {
4856         struct ixgbe_hw     *hw;
4857         struct ixgbe_rx_queue *rxq;
4858         uint32_t rxdctl;
4859         int poll_ms;
4860
4861         PMD_INIT_FUNC_TRACE();
4862         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4863
4864         if (rx_queue_id < dev->data->nb_rx_queues) {
4865                 rxq = dev->data->rx_queues[rx_queue_id];
4866
4867                 /* Allocate buffers for descriptor rings */
4868                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4869                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4870                                      rx_queue_id);
4871                         return -1;
4872                 }
4873                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4874                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4875                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4876
4877                 /* Wait until RX Enable ready */
4878                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4879                 do {
4880                         rte_delay_ms(1);
4881                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4882                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4883                 if (!poll_ms)
4884                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4885                                      rx_queue_id);
4886                 rte_wmb();
4887                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4888                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4889                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4890         } else
4891                 return -1;
4892
4893         return 0;
4894 }
4895
4896 /*
4897  * Stop Receive Units for specified queue.
4898  */
4899 int __attribute__((cold))
4900 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4901 {
4902         struct ixgbe_hw     *hw;
4903         struct ixgbe_adapter *adapter =
4904                 (struct ixgbe_adapter *)dev->data->dev_private;
4905         struct ixgbe_rx_queue *rxq;
4906         uint32_t rxdctl;
4907         int poll_ms;
4908
4909         PMD_INIT_FUNC_TRACE();
4910         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4911
4912         if (rx_queue_id < dev->data->nb_rx_queues) {
4913                 rxq = dev->data->rx_queues[rx_queue_id];
4914
4915                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4916                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4917                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4918
4919                 /* Wait until RX Enable bit clear */
4920                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4921                 do {
4922                         rte_delay_ms(1);
4923                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4924                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4925                 if (!poll_ms)
4926                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4927                                      rx_queue_id);
4928
4929                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4930
4931                 ixgbe_rx_queue_release_mbufs(rxq);
4932                 ixgbe_reset_rx_queue(adapter, rxq);
4933                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4934         } else
4935                 return -1;
4936
4937         return 0;
4938 }
4939
4940
4941 /*
4942  * Start Transmit Units for specified queue.
4943  */
4944 int __attribute__((cold))
4945 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4946 {
4947         struct ixgbe_hw     *hw;
4948         struct ixgbe_tx_queue *txq;
4949         uint32_t txdctl;
4950         int poll_ms;
4951
4952         PMD_INIT_FUNC_TRACE();
4953         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4954
4955         if (tx_queue_id < dev->data->nb_tx_queues) {
4956                 txq = dev->data->tx_queues[tx_queue_id];
4957                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4958                 txdctl |= IXGBE_TXDCTL_ENABLE;
4959                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4960
4961                 /* Wait until TX Enable ready */
4962                 if (hw->mac.type == ixgbe_mac_82599EB) {
4963                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4964                         do {
4965                                 rte_delay_ms(1);
4966                                 txdctl = IXGBE_READ_REG(hw,
4967                                         IXGBE_TXDCTL(txq->reg_idx));
4968                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4969                         if (!poll_ms)
4970                                 PMD_INIT_LOG(ERR, "Could not enable "
4971                                              "Tx Queue %d", tx_queue_id);
4972                 }
4973                 rte_wmb();
4974                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4975                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4976                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4977         } else
4978                 return -1;
4979
4980         return 0;
4981 }
4982
4983 /*
4984  * Stop Transmit Units for specified queue.
4985  */
4986 int __attribute__((cold))
4987 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4988 {
4989         struct ixgbe_hw     *hw;
4990         struct ixgbe_tx_queue *txq;
4991         uint32_t txdctl;
4992         uint32_t txtdh, txtdt;
4993         int poll_ms;
4994
4995         PMD_INIT_FUNC_TRACE();
4996         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4997
4998         if (tx_queue_id >= dev->data->nb_tx_queues)
4999                 return -1;
5000
5001         txq = dev->data->tx_queues[tx_queue_id];
5002
5003         /* Wait until TX queue is empty */
5004         if (hw->mac.type == ixgbe_mac_82599EB) {
5005                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5006                 do {
5007                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5008                         txtdh = IXGBE_READ_REG(hw,
5009                                                IXGBE_TDH(txq->reg_idx));
5010                         txtdt = IXGBE_READ_REG(hw,
5011                                                IXGBE_TDT(txq->reg_idx));
5012                 } while (--poll_ms && (txtdh != txtdt));
5013                 if (!poll_ms)
5014                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5015                                      "when stopping.", tx_queue_id);
5016         }
5017
5018         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5019         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5020         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5021
5022         /* Wait until TX Enable bit clear */
5023         if (hw->mac.type == ixgbe_mac_82599EB) {
5024                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5025                 do {
5026                         rte_delay_ms(1);
5027                         txdctl = IXGBE_READ_REG(hw,
5028                                                 IXGBE_TXDCTL(txq->reg_idx));
5029                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5030                 if (!poll_ms)
5031                         PMD_INIT_LOG(ERR, "Could not disable "
5032                                      "Tx Queue %d", tx_queue_id);
5033         }
5034
5035         if (txq->ops != NULL) {
5036                 txq->ops->release_mbufs(txq);
5037                 txq->ops->reset(txq);
5038         }
5039         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5040
5041         return 0;
5042 }
5043
5044 void
5045 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5046         struct rte_eth_rxq_info *qinfo)
5047 {
5048         struct ixgbe_rx_queue *rxq;
5049
5050         rxq = dev->data->rx_queues[queue_id];
5051
5052         qinfo->mp = rxq->mb_pool;
5053         qinfo->scattered_rx = dev->data->scattered_rx;
5054         qinfo->nb_desc = rxq->nb_rx_desc;
5055
5056         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5057         qinfo->conf.rx_drop_en = rxq->drop_en;
5058         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5059 }
5060
5061 void
5062 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5063         struct rte_eth_txq_info *qinfo)
5064 {
5065         struct ixgbe_tx_queue *txq;
5066
5067         txq = dev->data->tx_queues[queue_id];
5068
5069         qinfo->nb_desc = txq->nb_tx_desc;
5070
5071         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5072         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5073         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5074
5075         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5076         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5077         qinfo->conf.txq_flags = txq->txq_flags;
5078         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5079 }
5080
5081 /*
5082  * [VF] Initializes Receive Unit.
5083  */
5084 int __attribute__((cold))
5085 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5086 {
5087         struct ixgbe_hw     *hw;
5088         struct ixgbe_rx_queue *rxq;
5089         uint64_t bus_addr;
5090         uint32_t srrctl, psrtype = 0;
5091         uint16_t buf_size;
5092         uint16_t i;
5093         int ret;
5094
5095         PMD_INIT_FUNC_TRACE();
5096         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5097
5098         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5099                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5100                         "it should be power of 2");
5101                 return -1;
5102         }
5103
5104         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5105                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5106                         "it should be equal to or less than %d",
5107                         hw->mac.max_rx_queues);
5108                 return -1;
5109         }
5110
5111         /*
5112          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5113          * disables the VF receipt of packets if the PF MTU is > 1500.
5114          * This is done to deal with 82599 limitations that imposes
5115          * the PF and all VFs to share the same MTU.
5116          * Then, the PF driver enables again the VF receipt of packet when
5117          * the VF driver issues a IXGBE_VF_SET_LPE request.
5118          * In the meantime, the VF device cannot be used, even if the VF driver
5119          * and the Guest VM network stack are ready to accept packets with a
5120          * size up to the PF MTU.
5121          * As a work-around to this PF behaviour, force the call to
5122          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5123          * VF packets received can work in all cases.
5124          */
5125         ixgbevf_rlpml_set_vf(hw,
5126                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5127
5128         /* Setup RX queues */
5129         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5130                 rxq = dev->data->rx_queues[i];
5131
5132                 /* Allocate buffers for descriptor rings */
5133                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5134                 if (ret)
5135                         return ret;
5136
5137                 /* Setup the Base and Length of the Rx Descriptor Rings */
5138                 bus_addr = rxq->rx_ring_phys_addr;
5139
5140                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5141                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5142                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5143                                 (uint32_t)(bus_addr >> 32));
5144                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5145                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5146                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5147                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5148
5149
5150                 /* Configure the SRRCTL register */
5151 #ifdef RTE_HEADER_SPLIT_ENABLE
5152                 /*
5153                  * Configure Header Split
5154                  */
5155                 if (dev->data->dev_conf.rxmode.header_split) {
5156                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5157                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5158                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5159                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5160                 } else
5161 #endif
5162                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5163
5164                 /* Set if packets are dropped when no descriptors available */
5165                 if (rxq->drop_en)
5166                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5167
5168                 /*
5169                  * Configure the RX buffer size in the BSIZEPACKET field of
5170                  * the SRRCTL register of the queue.
5171                  * The value is in 1 KB resolution. Valid values can be from
5172                  * 1 KB to 16 KB.
5173                  */
5174                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5175                         RTE_PKTMBUF_HEADROOM);
5176                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5177                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5178
5179                 /*
5180                  * VF modification to write virtual function SRRCTL register
5181                  */
5182                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5183
5184                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5185                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5186
5187                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5188                     /* It adds dual VLAN length for supporting dual VLAN */
5189                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5190                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5191                         if (!dev->data->scattered_rx)
5192                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5193                         dev->data->scattered_rx = 1;
5194                 }
5195         }
5196
5197 #ifdef RTE_HEADER_SPLIT_ENABLE
5198         if (dev->data->dev_conf.rxmode.header_split)
5199                 /* Must setup the PSRTYPE register */
5200                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5201                         IXGBE_PSRTYPE_UDPHDR   |
5202                         IXGBE_PSRTYPE_IPV4HDR  |
5203                         IXGBE_PSRTYPE_IPV6HDR;
5204 #endif
5205
5206         /* Set RQPL for VF RSS according to max Rx queue */
5207         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5208                 IXGBE_PSRTYPE_RQPL_SHIFT;
5209         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5210
5211         ixgbe_set_rx_function(dev);
5212
5213         return 0;
5214 }
5215
5216 /*
5217  * [VF] Initializes Transmit Unit.
5218  */
5219 void __attribute__((cold))
5220 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5221 {
5222         struct ixgbe_hw     *hw;
5223         struct ixgbe_tx_queue *txq;
5224         uint64_t bus_addr;
5225         uint32_t txctrl;
5226         uint16_t i;
5227
5228         PMD_INIT_FUNC_TRACE();
5229         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5230
5231         /* Setup the Base and Length of the Tx Descriptor Rings */
5232         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5233                 txq = dev->data->tx_queues[i];
5234                 bus_addr = txq->tx_ring_phys_addr;
5235                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5236                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5237                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5238                                 (uint32_t)(bus_addr >> 32));
5239                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5240                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5241                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5242                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5243                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5244
5245                 /*
5246                  * Disable Tx Head Writeback RO bit, since this hoses
5247                  * bookkeeping if things aren't delivered in order.
5248                  */
5249                 txctrl = IXGBE_READ_REG(hw,
5250                                 IXGBE_VFDCA_TXCTRL(i));
5251                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5252                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5253                                 txctrl);
5254         }
5255 }
5256
5257 /*
5258  * [VF] Start Transmit and Receive Units.
5259  */
5260 void __attribute__((cold))
5261 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5262 {
5263         struct ixgbe_hw     *hw;
5264         struct ixgbe_tx_queue *txq;
5265         struct ixgbe_rx_queue *rxq;
5266         uint32_t txdctl;
5267         uint32_t rxdctl;
5268         uint16_t i;
5269         int poll_ms;
5270
5271         PMD_INIT_FUNC_TRACE();
5272         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5273
5274         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5275                 txq = dev->data->tx_queues[i];
5276                 /* Setup Transmit Threshold Registers */
5277                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5278                 txdctl |= txq->pthresh & 0x7F;
5279                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5280                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5281                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5282         }
5283
5284         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5285
5286                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5287                 txdctl |= IXGBE_TXDCTL_ENABLE;
5288                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5289
5290                 poll_ms = 10;
5291                 /* Wait until TX Enable ready */
5292                 do {
5293                         rte_delay_ms(1);
5294                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5295                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5296                 if (!poll_ms)
5297                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5298         }
5299         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5300
5301                 rxq = dev->data->rx_queues[i];
5302
5303                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5304                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5305                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5306
5307                 /* Wait until RX Enable ready */
5308                 poll_ms = 10;
5309                 do {
5310                         rte_delay_ms(1);
5311                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5312                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5313                 if (!poll_ms)
5314                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5315                 rte_wmb();
5316                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5317
5318         }
5319 }
5320
5321 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5322 int __attribute__((weak))
5323 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5324 {
5325         return -1;
5326 }
5327
5328 uint16_t __attribute__((weak))
5329 ixgbe_recv_pkts_vec(
5330         void __rte_unused *rx_queue,
5331         struct rte_mbuf __rte_unused **rx_pkts,
5332         uint16_t __rte_unused nb_pkts)
5333 {
5334         return 0;
5335 }
5336
5337 uint16_t __attribute__((weak))
5338 ixgbe_recv_scattered_pkts_vec(
5339         void __rte_unused *rx_queue,
5340         struct rte_mbuf __rte_unused **rx_pkts,
5341         uint16_t __rte_unused nb_pkts)
5342 {
5343         return 0;
5344 }
5345
5346 int __attribute__((weak))
5347 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5348 {
5349         return -1;
5350 }