Imported Upstream version 16.11.2
[deb_dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73
74 #include "ixgbe_logs.h"
75 #include "base/ixgbe_api.h"
76 #include "base/ixgbe_vf.h"
77 #include "ixgbe_ethdev.h"
78 #include "base/ixgbe_dcb.h"
79 #include "base/ixgbe_common.h"
80 #include "ixgbe_rxtx.h"
81
82 /* Bit Mask to indicate what bits required for building TX context */
83 #define IXGBE_TX_OFFLOAD_MASK (                  \
84                 PKT_TX_VLAN_PKT |                \
85                 PKT_TX_IP_CKSUM |                \
86                 PKT_TX_L4_MASK |                 \
87                 PKT_TX_TCP_SEG |                 \
88                 PKT_TX_OUTER_IP_CKSUM)
89
90 #if 1
91 #define RTE_PMD_USE_PREFETCH
92 #endif
93
94 #ifdef RTE_PMD_USE_PREFETCH
95 /*
96  * Prefetch a cache line into all cache levels.
97  */
98 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
99 #else
100 #define rte_ixgbe_prefetch(p)   do {} while (0)
101 #endif
102
103 /*********************************************************************
104  *
105  *  TX functions
106  *
107  **********************************************************************/
108
109 /*
110  * Check for descriptors with their DD bit set and free mbufs.
111  * Return the total number of buffers freed.
112  */
113 static inline int __attribute__((always_inline))
114 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
115 {
116         struct ixgbe_tx_entry *txep;
117         uint32_t status;
118         int i, nb_free = 0;
119         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
120
121         /* check DD bit on threshold descriptor */
122         status = txq->tx_ring[txq->tx_next_dd].wb.status;
123         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
124                 return 0;
125
126         /*
127          * first buffer to free from S/W ring is at index
128          * tx_next_dd - (tx_rs_thresh-1)
129          */
130         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
131
132         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
133                 /* free buffers one at a time */
134                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
135                 txep->mbuf = NULL;
136
137                 if (unlikely(m == NULL))
138                         continue;
139
140                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
141                     (nb_free > 0 && m->pool != free[0]->pool)) {
142                         rte_mempool_put_bulk(free[0]->pool,
143                                              (void **)free, nb_free);
144                         nb_free = 0;
145                 }
146
147                 free[nb_free++] = m;
148         }
149
150         if (nb_free > 0)
151                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
152
153         /* buffers were freed, update counters */
154         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
155         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
156         if (txq->tx_next_dd >= txq->nb_tx_desc)
157                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
158
159         return txq->tx_rs_thresh;
160 }
161
162 /* Populate 4 descriptors with data from 4 mbufs */
163 static inline void
164 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
165 {
166         uint64_t buf_dma_addr;
167         uint32_t pkt_len;
168         int i;
169
170         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
171                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
172                 pkt_len = (*pkts)->data_len;
173
174                 /* write data to descriptor */
175                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
176
177                 txdp->read.cmd_type_len =
178                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
179
180                 txdp->read.olinfo_status =
181                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
182
183                 rte_prefetch0(&(*pkts)->pool);
184         }
185 }
186
187 /* Populate 1 descriptor with data from 1 mbuf */
188 static inline void
189 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
190 {
191         uint64_t buf_dma_addr;
192         uint32_t pkt_len;
193
194         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
195         pkt_len = (*pkts)->data_len;
196
197         /* write data to descriptor */
198         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
199         txdp->read.cmd_type_len =
200                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
201         txdp->read.olinfo_status =
202                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
203         rte_prefetch0(&(*pkts)->pool);
204 }
205
206 /*
207  * Fill H/W descriptor ring with mbuf data.
208  * Copy mbuf pointers to the S/W ring.
209  */
210 static inline void
211 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
212                       uint16_t nb_pkts)
213 {
214         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
215         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
216         const int N_PER_LOOP = 4;
217         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
218         int mainpart, leftover;
219         int i, j;
220
221         /*
222          * Process most of the packets in chunks of N pkts.  Any
223          * leftover packets will get processed one at a time.
224          */
225         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
226         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
227         for (i = 0; i < mainpart; i += N_PER_LOOP) {
228                 /* Copy N mbuf pointers to the S/W ring */
229                 for (j = 0; j < N_PER_LOOP; ++j) {
230                         (txep + i + j)->mbuf = *(pkts + i + j);
231                 }
232                 tx4(txdp + i, pkts + i);
233         }
234
235         if (unlikely(leftover > 0)) {
236                 for (i = 0; i < leftover; ++i) {
237                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
238                         tx1(txdp + mainpart + i, pkts + mainpart + i);
239                 }
240         }
241 }
242
243 static inline uint16_t
244 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
245              uint16_t nb_pkts)
246 {
247         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
248         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
249         uint16_t n = 0;
250
251         /*
252          * Begin scanning the H/W ring for done descriptors when the
253          * number of available descriptors drops below tx_free_thresh.  For
254          * each done descriptor, free the associated buffer.
255          */
256         if (txq->nb_tx_free < txq->tx_free_thresh)
257                 ixgbe_tx_free_bufs(txq);
258
259         /* Only use descriptors that are available */
260         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
261         if (unlikely(nb_pkts == 0))
262                 return 0;
263
264         /* Use exactly nb_pkts descriptors */
265         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
266
267         /*
268          * At this point, we know there are enough descriptors in the
269          * ring to transmit all the packets.  This assumes that each
270          * mbuf contains a single segment, and that no new offloads
271          * are expected, which would require a new context descriptor.
272          */
273
274         /*
275          * See if we're going to wrap-around. If so, handle the top
276          * of the descriptor ring first, then do the bottom.  If not,
277          * the processing looks just like the "bottom" part anyway...
278          */
279         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
280                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
281                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
282
283                 /*
284                  * We know that the last descriptor in the ring will need to
285                  * have its RS bit set because tx_rs_thresh has to be
286                  * a divisor of the ring size
287                  */
288                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
289                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
290                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
291
292                 txq->tx_tail = 0;
293         }
294
295         /* Fill H/W descriptor ring with mbuf data */
296         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
297         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
298
299         /*
300          * Determine if RS bit should be set
301          * This is what we actually want:
302          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
303          * but instead of subtracting 1 and doing >=, we can just do
304          * greater than without subtracting.
305          */
306         if (txq->tx_tail > txq->tx_next_rs) {
307                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
308                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
309                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
310                                                 txq->tx_rs_thresh);
311                 if (txq->tx_next_rs >= txq->nb_tx_desc)
312                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
313         }
314
315         /*
316          * Check for wrap-around. This would only happen if we used
317          * up to the last descriptor in the ring, no more, no less.
318          */
319         if (txq->tx_tail >= txq->nb_tx_desc)
320                 txq->tx_tail = 0;
321
322         /* update tail pointer */
323         rte_wmb();
324         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
325
326         return nb_pkts;
327 }
328
329 uint16_t
330 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
331                        uint16_t nb_pkts)
332 {
333         uint16_t nb_tx;
334
335         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
336         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
337                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
338
339         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
340         nb_tx = 0;
341         while (nb_pkts) {
342                 uint16_t ret, n;
343
344                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
345                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
346                 nb_tx = (uint16_t)(nb_tx + ret);
347                 nb_pkts = (uint16_t)(nb_pkts - ret);
348                 if (ret < n)
349                         break;
350         }
351
352         return nb_tx;
353 }
354
355 static inline void
356 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
357                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
358                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
359 {
360         uint32_t type_tucmd_mlhl;
361         uint32_t mss_l4len_idx = 0;
362         uint32_t ctx_idx;
363         uint32_t vlan_macip_lens;
364         union ixgbe_tx_offload tx_offload_mask;
365         uint32_t seqnum_seed = 0;
366
367         ctx_idx = txq->ctx_curr;
368         tx_offload_mask.data[0] = 0;
369         tx_offload_mask.data[1] = 0;
370         type_tucmd_mlhl = 0;
371
372         /* Specify which HW CTX to upload. */
373         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
374
375         if (ol_flags & PKT_TX_VLAN_PKT) {
376                 tx_offload_mask.vlan_tci |= ~0;
377         }
378
379         /* check if TCP segmentation required for this packet */
380         if (ol_flags & PKT_TX_TCP_SEG) {
381                 /* implies IP cksum in IPv4 */
382                 if (ol_flags & PKT_TX_IP_CKSUM)
383                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
384                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
385                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
386                 else
387                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
388                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
390
391                 tx_offload_mask.l2_len |= ~0;
392                 tx_offload_mask.l3_len |= ~0;
393                 tx_offload_mask.l4_len |= ~0;
394                 tx_offload_mask.tso_segsz |= ~0;
395                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
396                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
397         } else { /* no TSO, check if hardware checksum is needed */
398                 if (ol_flags & PKT_TX_IP_CKSUM) {
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
400                         tx_offload_mask.l2_len |= ~0;
401                         tx_offload_mask.l3_len |= ~0;
402                 }
403
404                 switch (ol_flags & PKT_TX_L4_MASK) {
405                 case PKT_TX_UDP_CKSUM:
406                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
407                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
408                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
409                         tx_offload_mask.l2_len |= ~0;
410                         tx_offload_mask.l3_len |= ~0;
411                         break;
412                 case PKT_TX_TCP_CKSUM:
413                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
414                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
415                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
416                         tx_offload_mask.l2_len |= ~0;
417                         tx_offload_mask.l3_len |= ~0;
418                         break;
419                 case PKT_TX_SCTP_CKSUM:
420                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
421                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
422                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
423                         tx_offload_mask.l2_len |= ~0;
424                         tx_offload_mask.l3_len |= ~0;
425                         break;
426                 default:
427                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
428                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
429                         break;
430                 }
431         }
432
433         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
434                 tx_offload_mask.outer_l2_len |= ~0;
435                 tx_offload_mask.outer_l3_len |= ~0;
436                 tx_offload_mask.l2_len |= ~0;
437                 seqnum_seed |= tx_offload.outer_l3_len
438                                << IXGBE_ADVTXD_OUTER_IPLEN;
439                 seqnum_seed |= tx_offload.l2_len
440                                << IXGBE_ADVTXD_TUNNEL_LEN;
441         }
442
443         txq->ctx_cache[ctx_idx].flags = ol_flags;
444         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
445                 tx_offload_mask.data[0] & tx_offload.data[0];
446         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
447                 tx_offload_mask.data[1] & tx_offload.data[1];
448         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
449
450         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
451         vlan_macip_lens = tx_offload.l3_len;
452         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
453                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
454                                     IXGBE_ADVTXD_MACLEN_SHIFT);
455         else
456                 vlan_macip_lens |= (tx_offload.l2_len <<
457                                     IXGBE_ADVTXD_MACLEN_SHIFT);
458         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
459         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
460         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
461         ctx_txd->seqnum_seed     = seqnum_seed;
462 }
463
464 /*
465  * Check which hardware context can be used. Use the existing match
466  * or create a new context descriptor.
467  */
468 static inline uint32_t
469 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
470                    union ixgbe_tx_offload tx_offload)
471 {
472         /* If match with the current used context */
473         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
474                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
475                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
476                      & tx_offload.data[0])) &&
477                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
478                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
479                      & tx_offload.data[1]))))
480                 return txq->ctx_curr;
481
482         /* What if match with the next context  */
483         txq->ctx_curr ^= 1;
484         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
485                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
486                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
487                      & tx_offload.data[0])) &&
488                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
489                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
490                      & tx_offload.data[1]))))
491                 return txq->ctx_curr;
492
493         /* Mismatch, use the previous context */
494         return IXGBE_CTX_NUM;
495 }
496
497 static inline uint32_t
498 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
499 {
500         uint32_t tmp = 0;
501
502         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
503                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
504         if (ol_flags & PKT_TX_IP_CKSUM)
505                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
506         if (ol_flags & PKT_TX_TCP_SEG)
507                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
508         return tmp;
509 }
510
511 static inline uint32_t
512 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
513 {
514         uint32_t cmdtype = 0;
515
516         if (ol_flags & PKT_TX_VLAN_PKT)
517                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
518         if (ol_flags & PKT_TX_TCP_SEG)
519                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
520         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
521                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
522         return cmdtype;
523 }
524
525 /* Default RS bit threshold values */
526 #ifndef DEFAULT_TX_RS_THRESH
527 #define DEFAULT_TX_RS_THRESH   32
528 #endif
529 #ifndef DEFAULT_TX_FREE_THRESH
530 #define DEFAULT_TX_FREE_THRESH 32
531 #endif
532
533 /* Reset transmit descriptors after they have been used */
534 static inline int
535 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
536 {
537         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
538         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
539         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
540         uint16_t nb_tx_desc = txq->nb_tx_desc;
541         uint16_t desc_to_clean_to;
542         uint16_t nb_tx_to_clean;
543         uint32_t status;
544
545         /* Determine the last descriptor needing to be cleaned */
546         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
547         if (desc_to_clean_to >= nb_tx_desc)
548                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
549
550         /* Check to make sure the last descriptor to clean is done */
551         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
552         status = txr[desc_to_clean_to].wb.status;
553         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
554                 PMD_TX_FREE_LOG(DEBUG,
555                                 "TX descriptor %4u is not done"
556                                 "(port=%d queue=%d)",
557                                 desc_to_clean_to,
558                                 txq->port_id, txq->queue_id);
559                 /* Failed to clean any descriptors, better luck next time */
560                 return -(1);
561         }
562
563         /* Figure out how many descriptors will be cleaned */
564         if (last_desc_cleaned > desc_to_clean_to)
565                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
566                                                         desc_to_clean_to);
567         else
568                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
569                                                 last_desc_cleaned);
570
571         PMD_TX_FREE_LOG(DEBUG,
572                         "Cleaning %4u TX descriptors: %4u to %4u "
573                         "(port=%d queue=%d)",
574                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
575                         txq->port_id, txq->queue_id);
576
577         /*
578          * The last descriptor to clean is done, so that means all the
579          * descriptors from the last descriptor that was cleaned
580          * up to the last descriptor with the RS bit set
581          * are done. Only reset the threshold descriptor.
582          */
583         txr[desc_to_clean_to].wb.status = 0;
584
585         /* Update the txq to reflect the last descriptor that was cleaned */
586         txq->last_desc_cleaned = desc_to_clean_to;
587         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
588
589         /* No Error */
590         return 0;
591 }
592
593 uint16_t
594 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
595                 uint16_t nb_pkts)
596 {
597         struct ixgbe_tx_queue *txq;
598         struct ixgbe_tx_entry *sw_ring;
599         struct ixgbe_tx_entry *txe, *txn;
600         volatile union ixgbe_adv_tx_desc *txr;
601         volatile union ixgbe_adv_tx_desc *txd, *txp;
602         struct rte_mbuf     *tx_pkt;
603         struct rte_mbuf     *m_seg;
604         uint64_t buf_dma_addr;
605         uint32_t olinfo_status;
606         uint32_t cmd_type_len;
607         uint32_t pkt_len;
608         uint16_t slen;
609         uint64_t ol_flags;
610         uint16_t tx_id;
611         uint16_t tx_last;
612         uint16_t nb_tx;
613         uint16_t nb_used;
614         uint64_t tx_ol_req;
615         uint32_t ctx = 0;
616         uint32_t new_ctx;
617         union ixgbe_tx_offload tx_offload;
618
619         tx_offload.data[0] = 0;
620         tx_offload.data[1] = 0;
621         txq = tx_queue;
622         sw_ring = txq->sw_ring;
623         txr     = txq->tx_ring;
624         tx_id   = txq->tx_tail;
625         txe = &sw_ring[tx_id];
626         txp = NULL;
627
628         /* Determine if the descriptor ring needs to be cleaned. */
629         if (txq->nb_tx_free < txq->tx_free_thresh)
630                 ixgbe_xmit_cleanup(txq);
631
632         rte_prefetch0(&txe->mbuf->pool);
633
634         /* TX loop */
635         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
636                 new_ctx = 0;
637                 tx_pkt = *tx_pkts++;
638                 pkt_len = tx_pkt->pkt_len;
639
640                 /*
641                  * Determine how many (if any) context descriptors
642                  * are needed for offload functionality.
643                  */
644                 ol_flags = tx_pkt->ol_flags;
645
646                 /* If hardware offload required */
647                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
648                 if (tx_ol_req) {
649                         tx_offload.l2_len = tx_pkt->l2_len;
650                         tx_offload.l3_len = tx_pkt->l3_len;
651                         tx_offload.l4_len = tx_pkt->l4_len;
652                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
653                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
654                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
655                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
656
657                         /* If new context need be built or reuse the exist ctx. */
658                         ctx = what_advctx_update(txq, tx_ol_req,
659                                 tx_offload);
660                         /* Only allocate context descriptor if required*/
661                         new_ctx = (ctx == IXGBE_CTX_NUM);
662                         ctx = txq->ctx_curr;
663                 }
664
665                 /*
666                  * Keep track of how many descriptors are used this loop
667                  * This will always be the number of segments + the number of
668                  * Context descriptors required to transmit the packet
669                  */
670                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
671
672                 if (txp != NULL &&
673                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
674                         /* set RS on the previous packet in the burst */
675                         txp->read.cmd_type_len |=
676                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
677
678                 /*
679                  * The number of descriptors that must be allocated for a
680                  * packet is the number of segments of that packet, plus 1
681                  * Context Descriptor for the hardware offload, if any.
682                  * Determine the last TX descriptor to allocate in the TX ring
683                  * for the packet, starting from the current position (tx_id)
684                  * in the ring.
685                  */
686                 tx_last = (uint16_t) (tx_id + nb_used - 1);
687
688                 /* Circular ring */
689                 if (tx_last >= txq->nb_tx_desc)
690                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
691
692                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
693                            " tx_first=%u tx_last=%u",
694                            (unsigned) txq->port_id,
695                            (unsigned) txq->queue_id,
696                            (unsigned) pkt_len,
697                            (unsigned) tx_id,
698                            (unsigned) tx_last);
699
700                 /*
701                  * Make sure there are enough TX descriptors available to
702                  * transmit the entire packet.
703                  * nb_used better be less than or equal to txq->tx_rs_thresh
704                  */
705                 if (nb_used > txq->nb_tx_free) {
706                         PMD_TX_FREE_LOG(DEBUG,
707                                         "Not enough free TX descriptors "
708                                         "nb_used=%4u nb_free=%4u "
709                                         "(port=%d queue=%d)",
710                                         nb_used, txq->nb_tx_free,
711                                         txq->port_id, txq->queue_id);
712
713                         if (ixgbe_xmit_cleanup(txq) != 0) {
714                                 /* Could not clean any descriptors */
715                                 if (nb_tx == 0)
716                                         return 0;
717                                 goto end_of_tx;
718                         }
719
720                         /* nb_used better be <= txq->tx_rs_thresh */
721                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
722                                 PMD_TX_FREE_LOG(DEBUG,
723                                         "The number of descriptors needed to "
724                                         "transmit the packet exceeds the "
725                                         "RS bit threshold. This will impact "
726                                         "performance."
727                                         "nb_used=%4u nb_free=%4u "
728                                         "tx_rs_thresh=%4u. "
729                                         "(port=%d queue=%d)",
730                                         nb_used, txq->nb_tx_free,
731                                         txq->tx_rs_thresh,
732                                         txq->port_id, txq->queue_id);
733                                 /*
734                                  * Loop here until there are enough TX
735                                  * descriptors or until the ring cannot be
736                                  * cleaned.
737                                  */
738                                 while (nb_used > txq->nb_tx_free) {
739                                         if (ixgbe_xmit_cleanup(txq) != 0) {
740                                                 /*
741                                                  * Could not clean any
742                                                  * descriptors
743                                                  */
744                                                 if (nb_tx == 0)
745                                                         return 0;
746                                                 goto end_of_tx;
747                                         }
748                                 }
749                         }
750                 }
751
752                 /*
753                  * By now there are enough free TX descriptors to transmit
754                  * the packet.
755                  */
756
757                 /*
758                  * Set common flags of all TX Data Descriptors.
759                  *
760                  * The following bits must be set in all Data Descriptors:
761                  *   - IXGBE_ADVTXD_DTYP_DATA
762                  *   - IXGBE_ADVTXD_DCMD_DEXT
763                  *
764                  * The following bits must be set in the first Data Descriptor
765                  * and are ignored in the other ones:
766                  *   - IXGBE_ADVTXD_DCMD_IFCS
767                  *   - IXGBE_ADVTXD_MAC_1588
768                  *   - IXGBE_ADVTXD_DCMD_VLE
769                  *
770                  * The following bits must only be set in the last Data
771                  * Descriptor:
772                  *   - IXGBE_TXD_CMD_EOP
773                  *
774                  * The following bits can be set in any Data Descriptor, but
775                  * are only set in the last Data Descriptor:
776                  *   - IXGBE_TXD_CMD_RS
777                  */
778                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
779                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
780
781 #ifdef RTE_LIBRTE_IEEE1588
782                 if (ol_flags & PKT_TX_IEEE1588_TMST)
783                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
784 #endif
785
786                 olinfo_status = 0;
787                 if (tx_ol_req) {
788
789                         if (ol_flags & PKT_TX_TCP_SEG) {
790                                 /* when TSO is on, paylen in descriptor is the
791                                  * not the packet len but the tcp payload len */
792                                 pkt_len -= (tx_offload.l2_len +
793                                         tx_offload.l3_len + tx_offload.l4_len);
794                         }
795
796                         /*
797                          * Setup the TX Advanced Context Descriptor if required
798                          */
799                         if (new_ctx) {
800                                 volatile struct ixgbe_adv_tx_context_desc *
801                                     ctx_txd;
802
803                                 ctx_txd = (volatile struct
804                                     ixgbe_adv_tx_context_desc *)
805                                     &txr[tx_id];
806
807                                 txn = &sw_ring[txe->next_id];
808                                 rte_prefetch0(&txn->mbuf->pool);
809
810                                 if (txe->mbuf != NULL) {
811                                         rte_pktmbuf_free_seg(txe->mbuf);
812                                         txe->mbuf = NULL;
813                                 }
814
815                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
816                                         tx_offload);
817
818                                 txe->last_id = tx_last;
819                                 tx_id = txe->next_id;
820                                 txe = txn;
821                         }
822
823                         /*
824                          * Setup the TX Advanced Data Descriptor,
825                          * This path will go through
826                          * whatever new/reuse the context descriptor
827                          */
828                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
829                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
830                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
831                 }
832
833                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
834
835                 m_seg = tx_pkt;
836                 do {
837                         txd = &txr[tx_id];
838                         txn = &sw_ring[txe->next_id];
839                         rte_prefetch0(&txn->mbuf->pool);
840
841                         if (txe->mbuf != NULL)
842                                 rte_pktmbuf_free_seg(txe->mbuf);
843                         txe->mbuf = m_seg;
844
845                         /*
846                          * Set up Transmit Data Descriptor.
847                          */
848                         slen = m_seg->data_len;
849                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
850                         txd->read.buffer_addr =
851                                 rte_cpu_to_le_64(buf_dma_addr);
852                         txd->read.cmd_type_len =
853                                 rte_cpu_to_le_32(cmd_type_len | slen);
854                         txd->read.olinfo_status =
855                                 rte_cpu_to_le_32(olinfo_status);
856                         txe->last_id = tx_last;
857                         tx_id = txe->next_id;
858                         txe = txn;
859                         m_seg = m_seg->next;
860                 } while (m_seg != NULL);
861
862                 /*
863                  * The last packet data descriptor needs End Of Packet (EOP)
864                  */
865                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
866                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
867                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
868
869                 /* Set RS bit only on threshold packets' last descriptor */
870                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
871                         PMD_TX_FREE_LOG(DEBUG,
872                                         "Setting RS bit on TXD id="
873                                         "%4u (port=%d queue=%d)",
874                                         tx_last, txq->port_id, txq->queue_id);
875
876                         cmd_type_len |= IXGBE_TXD_CMD_RS;
877
878                         /* Update txq RS bit counters */
879                         txq->nb_tx_used = 0;
880                         txp = NULL;
881                 } else
882                         txp = txd;
883
884                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
885         }
886
887 end_of_tx:
888         /* set RS on last packet in the burst */
889         if (txp != NULL)
890                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
891
892         rte_wmb();
893
894         /*
895          * Set the Transmit Descriptor Tail (TDT)
896          */
897         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
898                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
899                    (unsigned) tx_id, (unsigned) nb_tx);
900         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
901         txq->tx_tail = tx_id;
902
903         return nb_tx;
904 }
905
906 /*********************************************************************
907  *
908  *  RX functions
909  *
910  **********************************************************************/
911
912 #define IXGBE_PACKET_TYPE_ETHER                         0X00
913 #define IXGBE_PACKET_TYPE_IPV4                          0X01
914 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
915 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
916 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
917 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
918 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
919 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
920 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
921 #define IXGBE_PACKET_TYPE_IPV6                          0X04
922 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
923 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
924 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
925 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
926 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
927 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
928 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
929 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
930 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
931 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
932 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
933 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
934 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
935 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
936 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
937 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
938 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
939 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
940 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
941 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
942 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
943 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
944 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
945
946 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
947 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
948 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
949 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
950 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
951 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
952 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
953 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
954 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
955 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
956 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
957 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
958 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
959 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
960 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
961 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
962 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
963 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
964 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
965 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
966 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
967 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
968 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
969
970 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
971 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
972 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
973 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
974 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
975 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
976 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
977 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
978 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
979 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
980 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
981 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
982 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
983 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
984 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
985 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
986 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
987 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
988 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
989 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
990 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
991 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
992 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
993
994 #define IXGBE_PACKET_TYPE_MAX               0X80
995 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
996 #define IXGBE_PACKET_TYPE_SHIFT             0X04
997
998 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
999 static inline uint32_t
1000 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1001 {
1002         /**
1003          * Use 2 different table for normal packet and tunnel packet
1004          * to save the space.
1005          */
1006         static const uint32_t
1007                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1008                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1009                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1010                         RTE_PTYPE_L3_IPV4,
1011                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1012                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1013                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1014                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1015                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1016                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1017                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1018                         RTE_PTYPE_L3_IPV4_EXT,
1019                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1020                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1021                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1022                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1023                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1024                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1025                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1026                         RTE_PTYPE_L3_IPV6,
1027                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1028                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1029                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1030                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1031                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1032                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1033                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1034                         RTE_PTYPE_L3_IPV6_EXT,
1035                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1036                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1037                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1038                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1039                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1040                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1041                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1042                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1043                         RTE_PTYPE_INNER_L3_IPV6,
1044                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1045                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1046                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1047                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1048                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1049                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1050                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1051                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1052                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1053                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1054                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1055                         RTE_PTYPE_INNER_L3_IPV6,
1056                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1057                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1058                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1059                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1060                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1061                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1062                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1063                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1064                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1065                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1066                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1067                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1068                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1069                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1070                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1071                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1072                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1073                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1074                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1075                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1076                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1077                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1078                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1079                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1080                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1081                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1082                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1083                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1084                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1085                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1086                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1087                         RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1089                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1090         };
1091
1092         static const uint32_t
1093                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1094                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1095                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1096                         RTE_PTYPE_INNER_L2_ETHER,
1097                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1098                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1099                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1100                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1101                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1102                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1103                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1104                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1105                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1106                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1107                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1108                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1109                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1110                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1111                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1112                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1113                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1114                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1115                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1116                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1117                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1118                         RTE_PTYPE_INNER_L4_TCP,
1119                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1120                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1121                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1122                         RTE_PTYPE_INNER_L4_TCP,
1123                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1125                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1126                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1128                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1129                         RTE_PTYPE_INNER_L4_TCP,
1130                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1131                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1132                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1133                         RTE_PTYPE_INNER_L3_IPV4,
1134                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1135                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1136                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1137                         RTE_PTYPE_INNER_L4_UDP,
1138                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1140                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1141                         RTE_PTYPE_INNER_L4_UDP,
1142                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1143                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1144                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1145                         RTE_PTYPE_INNER_L4_SCTP,
1146                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1147                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1148                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1149                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1150                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1151                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1152                         RTE_PTYPE_INNER_L4_UDP,
1153                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1154                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1155                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1156                         RTE_PTYPE_INNER_L4_SCTP,
1157                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1158                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1159                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1160                         RTE_PTYPE_INNER_L3_IPV4,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1164                         RTE_PTYPE_INNER_L4_SCTP,
1165                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1166                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1167                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1168                         RTE_PTYPE_INNER_L4_SCTP,
1169                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1170                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1171                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1172                         RTE_PTYPE_INNER_L4_TCP,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1176                         RTE_PTYPE_INNER_L4_UDP,
1177
1178                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1179                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1180                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1181                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1182                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1183                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1184                         RTE_PTYPE_INNER_L3_IPV4,
1185                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1186                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1187                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1188                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1189                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1190                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1191                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1192                         RTE_PTYPE_INNER_L3_IPV6,
1193                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1194                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1195                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1196                         RTE_PTYPE_INNER_L3_IPV4,
1197                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1198                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1199                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1200                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1201                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1202                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1203                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1204                         RTE_PTYPE_INNER_L3_IPV4,
1205                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1206                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1207                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1208                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1209                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1210                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1211                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1212                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1213                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1214                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1215                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1216                         RTE_PTYPE_INNER_L3_IPV4,
1217                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1218                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1219                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1220                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1221                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1222                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1223                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1224                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1225                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1226                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1227                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1228                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1229                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1230                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1231                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1232                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1233                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1234                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1235                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1236                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1237                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1238                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1239                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1240                         RTE_PTYPE_INNER_L3_IPV4,
1241                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1242                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1243                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1244                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1245                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1246                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1247                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1248                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1249                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1250                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1251                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1252                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1253                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1254                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1255                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1256                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1257                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1258                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1259                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1260                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1261                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1262                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1263                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1264                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1265                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1266                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1267                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1268                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1269         };
1270
1271         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1272                 return RTE_PTYPE_UNKNOWN;
1273
1274         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1275
1276         /* For tunnel packet */
1277         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1278                 /* Remove the tunnel bit to save the space. */
1279                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1280                 return ptype_table_tn[pkt_info];
1281         }
1282
1283         /**
1284          * For x550, if it's not tunnel,
1285          * tunnel type bit should be set to 0.
1286          * Reuse 82599's mask.
1287          */
1288         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1289
1290         return ptype_table[pkt_info];
1291 }
1292
1293 static inline uint64_t
1294 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1295 {
1296         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1297                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1298                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1299                 PKT_RX_RSS_HASH, 0, 0, 0,
1300                 0, 0, 0,  PKT_RX_FDIR,
1301         };
1302 #ifdef RTE_LIBRTE_IEEE1588
1303         static uint64_t ip_pkt_etqf_map[8] = {
1304                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1305                 0, 0, 0, 0,
1306         };
1307
1308         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1309                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1310                                 ip_rss_types_map[pkt_info & 0XF];
1311         else
1312                 return ip_rss_types_map[pkt_info & 0XF];
1313 #else
1314         return ip_rss_types_map[pkt_info & 0XF];
1315 #endif
1316 }
1317
1318 static inline uint64_t
1319 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1320 {
1321         uint64_t pkt_flags;
1322
1323         /*
1324          * Check if VLAN present only.
1325          * Do not check whether L3/L4 rx checksum done by NIC or not,
1326          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1327          */
1328         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1329
1330 #ifdef RTE_LIBRTE_IEEE1588
1331         if (rx_status & IXGBE_RXD_STAT_TMST)
1332                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1333 #endif
1334         return pkt_flags;
1335 }
1336
1337 static inline uint64_t
1338 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1339 {
1340         uint64_t pkt_flags;
1341
1342         /*
1343          * Bit 31: IPE, IPv4 checksum error
1344          * Bit 30: L4I, L4I integrity error
1345          */
1346         static uint64_t error_to_pkt_flags_map[4] = {
1347                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1348                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1349                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1350                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1351         };
1352         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1353                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1354
1355         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1356             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1357                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1358         }
1359
1360         return pkt_flags;
1361 }
1362
1363 /*
1364  * LOOK_AHEAD defines how many desc statuses to check beyond the
1365  * current descriptor.
1366  * It must be a pound define for optimal performance.
1367  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1368  * function only works with LOOK_AHEAD=8.
1369  */
1370 #define LOOK_AHEAD 8
1371 #if (LOOK_AHEAD != 8)
1372 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1373 #endif
1374 static inline int
1375 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1376 {
1377         volatile union ixgbe_adv_rx_desc *rxdp;
1378         struct ixgbe_rx_entry *rxep;
1379         struct rte_mbuf *mb;
1380         uint16_t pkt_len;
1381         uint64_t pkt_flags;
1382         int nb_dd;
1383         uint32_t s[LOOK_AHEAD];
1384         uint32_t pkt_info[LOOK_AHEAD];
1385         int i, j, nb_rx = 0;
1386         uint32_t status;
1387         uint64_t vlan_flags = rxq->vlan_flags;
1388
1389         /* get references to current descriptor and S/W ring entry */
1390         rxdp = &rxq->rx_ring[rxq->rx_tail];
1391         rxep = &rxq->sw_ring[rxq->rx_tail];
1392
1393         status = rxdp->wb.upper.status_error;
1394         /* check to make sure there is at least 1 packet to receive */
1395         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1396                 return 0;
1397
1398         /*
1399          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1400          * reference packets that are ready to be received.
1401          */
1402         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1403              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1404                 /* Read desc statuses backwards to avoid race condition */
1405                 for (j = 0; j < LOOK_AHEAD; j++)
1406                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1407
1408                 rte_smp_rmb();
1409
1410                 /* Compute how many status bits were set */
1411                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1412                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1413                         ;
1414
1415                 for (j = 0; j < nb_dd; j++)
1416                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1417                                                        lo_dword.data);
1418
1419                 nb_rx += nb_dd;
1420
1421                 /* Translate descriptor info to mbuf format */
1422                 for (j = 0; j < nb_dd; ++j) {
1423                         mb = rxep[j].mbuf;
1424                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1425                                   rxq->crc_len;
1426                         mb->data_len = pkt_len;
1427                         mb->pkt_len = pkt_len;
1428                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1429
1430                         /* convert descriptor fields to rte mbuf flags */
1431                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1432                                 vlan_flags);
1433                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1434                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1435                                         ((uint16_t)pkt_info[j]);
1436                         mb->ol_flags = pkt_flags;
1437                         mb->packet_type =
1438                                 ixgbe_rxd_pkt_info_to_pkt_type
1439                                         (pkt_info[j], rxq->pkt_type_mask);
1440
1441                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1442                                 mb->hash.rss = rte_le_to_cpu_32(
1443                                     rxdp[j].wb.lower.hi_dword.rss);
1444                         else if (pkt_flags & PKT_RX_FDIR) {
1445                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1446                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1447                                     IXGBE_ATR_HASH_MASK;
1448                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1449                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1450                         }
1451                 }
1452
1453                 /* Move mbuf pointers from the S/W ring to the stage */
1454                 for (j = 0; j < LOOK_AHEAD; ++j) {
1455                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1456                 }
1457
1458                 /* stop if all requested packets could not be received */
1459                 if (nb_dd != LOOK_AHEAD)
1460                         break;
1461         }
1462
1463         /* clear software ring entries so we can cleanup correctly */
1464         for (i = 0; i < nb_rx; ++i) {
1465                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1466         }
1467
1468
1469         return nb_rx;
1470 }
1471
1472 static inline int
1473 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1474 {
1475         volatile union ixgbe_adv_rx_desc *rxdp;
1476         struct ixgbe_rx_entry *rxep;
1477         struct rte_mbuf *mb;
1478         uint16_t alloc_idx;
1479         __le64 dma_addr;
1480         int diag, i;
1481
1482         /* allocate buffers in bulk directly into the S/W ring */
1483         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1484         rxep = &rxq->sw_ring[alloc_idx];
1485         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1486                                     rxq->rx_free_thresh);
1487         if (unlikely(diag != 0))
1488                 return -ENOMEM;
1489
1490         rxdp = &rxq->rx_ring[alloc_idx];
1491         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1492                 /* populate the static rte mbuf fields */
1493                 mb = rxep[i].mbuf;
1494                 if (reset_mbuf) {
1495                         mb->next = NULL;
1496                         mb->nb_segs = 1;
1497                         mb->port = rxq->port_id;
1498                 }
1499
1500                 rte_mbuf_refcnt_set(mb, 1);
1501                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1502
1503                 /* populate the descriptors */
1504                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1505                 rxdp[i].read.hdr_addr = 0;
1506                 rxdp[i].read.pkt_addr = dma_addr;
1507         }
1508
1509         /* update state of internal queue structure */
1510         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1511         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1512                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1513
1514         /* no errors */
1515         return 0;
1516 }
1517
1518 static inline uint16_t
1519 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1520                          uint16_t nb_pkts)
1521 {
1522         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1523         int i;
1524
1525         /* how many packets are ready to return? */
1526         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1527
1528         /* copy mbuf pointers to the application's packet list */
1529         for (i = 0; i < nb_pkts; ++i)
1530                 rx_pkts[i] = stage[i];
1531
1532         /* update internal queue state */
1533         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1534         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1535
1536         return nb_pkts;
1537 }
1538
1539 static inline uint16_t
1540 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1541              uint16_t nb_pkts)
1542 {
1543         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1544         uint16_t nb_rx = 0;
1545
1546         /* Any previously recv'd pkts will be returned from the Rx stage */
1547         if (rxq->rx_nb_avail)
1548                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1549
1550         /* Scan the H/W ring for packets to receive */
1551         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1552
1553         /* update internal queue state */
1554         rxq->rx_next_avail = 0;
1555         rxq->rx_nb_avail = nb_rx;
1556         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1557
1558         /* if required, allocate new buffers to replenish descriptors */
1559         if (rxq->rx_tail > rxq->rx_free_trigger) {
1560                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1561
1562                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1563                         int i, j;
1564
1565                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1566                                    "queue_id=%u", (unsigned) rxq->port_id,
1567                                    (unsigned) rxq->queue_id);
1568
1569                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1570                                 rxq->rx_free_thresh;
1571
1572                         /*
1573                          * Need to rewind any previous receives if we cannot
1574                          * allocate new buffers to replenish the old ones.
1575                          */
1576                         rxq->rx_nb_avail = 0;
1577                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1578                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1579                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1580
1581                         return 0;
1582                 }
1583
1584                 /* update tail pointer */
1585                 rte_wmb();
1586                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1587         }
1588
1589         if (rxq->rx_tail >= rxq->nb_rx_desc)
1590                 rxq->rx_tail = 0;
1591
1592         /* received any packets this loop? */
1593         if (rxq->rx_nb_avail)
1594                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1595
1596         return 0;
1597 }
1598
1599 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1600 uint16_t
1601 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1602                            uint16_t nb_pkts)
1603 {
1604         uint16_t nb_rx;
1605
1606         if (unlikely(nb_pkts == 0))
1607                 return 0;
1608
1609         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1610                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1611
1612         /* request is relatively large, chunk it up */
1613         nb_rx = 0;
1614         while (nb_pkts) {
1615                 uint16_t ret, n;
1616
1617                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1618                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1619                 nb_rx = (uint16_t)(nb_rx + ret);
1620                 nb_pkts = (uint16_t)(nb_pkts - ret);
1621                 if (ret < n)
1622                         break;
1623         }
1624
1625         return nb_rx;
1626 }
1627
1628 uint16_t
1629 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1630                 uint16_t nb_pkts)
1631 {
1632         struct ixgbe_rx_queue *rxq;
1633         volatile union ixgbe_adv_rx_desc *rx_ring;
1634         volatile union ixgbe_adv_rx_desc *rxdp;
1635         struct ixgbe_rx_entry *sw_ring;
1636         struct ixgbe_rx_entry *rxe;
1637         struct rte_mbuf *rxm;
1638         struct rte_mbuf *nmb;
1639         union ixgbe_adv_rx_desc rxd;
1640         uint64_t dma_addr;
1641         uint32_t staterr;
1642         uint32_t pkt_info;
1643         uint16_t pkt_len;
1644         uint16_t rx_id;
1645         uint16_t nb_rx;
1646         uint16_t nb_hold;
1647         uint64_t pkt_flags;
1648         uint64_t vlan_flags;
1649
1650         nb_rx = 0;
1651         nb_hold = 0;
1652         rxq = rx_queue;
1653         rx_id = rxq->rx_tail;
1654         rx_ring = rxq->rx_ring;
1655         sw_ring = rxq->sw_ring;
1656         vlan_flags = rxq->vlan_flags;
1657         while (nb_rx < nb_pkts) {
1658                 /*
1659                  * The order of operations here is important as the DD status
1660                  * bit must not be read after any other descriptor fields.
1661                  * rx_ring and rxdp are pointing to volatile data so the order
1662                  * of accesses cannot be reordered by the compiler. If they were
1663                  * not volatile, they could be reordered which could lead to
1664                  * using invalid descriptor fields when read from rxd.
1665                  */
1666                 rxdp = &rx_ring[rx_id];
1667                 staterr = rxdp->wb.upper.status_error;
1668                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1669                         break;
1670                 rxd = *rxdp;
1671
1672                 /*
1673                  * End of packet.
1674                  *
1675                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1676                  * is likely to be invalid and to be dropped by the various
1677                  * validation checks performed by the network stack.
1678                  *
1679                  * Allocate a new mbuf to replenish the RX ring descriptor.
1680                  * If the allocation fails:
1681                  *    - arrange for that RX descriptor to be the first one
1682                  *      being parsed the next time the receive function is
1683                  *      invoked [on the same queue].
1684                  *
1685                  *    - Stop parsing the RX ring and return immediately.
1686                  *
1687                  * This policy do not drop the packet received in the RX
1688                  * descriptor for which the allocation of a new mbuf failed.
1689                  * Thus, it allows that packet to be later retrieved if
1690                  * mbuf have been freed in the mean time.
1691                  * As a side effect, holding RX descriptors instead of
1692                  * systematically giving them back to the NIC may lead to
1693                  * RX ring exhaustion situations.
1694                  * However, the NIC can gracefully prevent such situations
1695                  * to happen by sending specific "back-pressure" flow control
1696                  * frames to its peer(s).
1697                  */
1698                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1699                            "ext_err_stat=0x%08x pkt_len=%u",
1700                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1701                            (unsigned) rx_id, (unsigned) staterr,
1702                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1703
1704                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1705                 if (nmb == NULL) {
1706                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1707                                    "queue_id=%u", (unsigned) rxq->port_id,
1708                                    (unsigned) rxq->queue_id);
1709                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1710                         break;
1711                 }
1712
1713                 nb_hold++;
1714                 rxe = &sw_ring[rx_id];
1715                 rx_id++;
1716                 if (rx_id == rxq->nb_rx_desc)
1717                         rx_id = 0;
1718
1719                 /* Prefetch next mbuf while processing current one. */
1720                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1721
1722                 /*
1723                  * When next RX descriptor is on a cache-line boundary,
1724                  * prefetch the next 4 RX descriptors and the next 8 pointers
1725                  * to mbufs.
1726                  */
1727                 if ((rx_id & 0x3) == 0) {
1728                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1729                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1730                 }
1731
1732                 rxm = rxe->mbuf;
1733                 rxe->mbuf = nmb;
1734                 dma_addr =
1735                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1736                 rxdp->read.hdr_addr = 0;
1737                 rxdp->read.pkt_addr = dma_addr;
1738
1739                 /*
1740                  * Initialize the returned mbuf.
1741                  * 1) setup generic mbuf fields:
1742                  *    - number of segments,
1743                  *    - next segment,
1744                  *    - packet length,
1745                  *    - RX port identifier.
1746                  * 2) integrate hardware offload data, if any:
1747                  *    - RSS flag & hash,
1748                  *    - IP checksum flag,
1749                  *    - VLAN TCI, if any,
1750                  *    - error flags.
1751                  */
1752                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1753                                       rxq->crc_len);
1754                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1755                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1756                 rxm->nb_segs = 1;
1757                 rxm->next = NULL;
1758                 rxm->pkt_len = pkt_len;
1759                 rxm->data_len = pkt_len;
1760                 rxm->port = rxq->port_id;
1761
1762                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1763                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1764                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1765
1766                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1767                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1768                 pkt_flags = pkt_flags |
1769                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1770                 rxm->ol_flags = pkt_flags;
1771                 rxm->packet_type =
1772                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1773                                                        rxq->pkt_type_mask);
1774
1775                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1776                         rxm->hash.rss = rte_le_to_cpu_32(
1777                                                 rxd.wb.lower.hi_dword.rss);
1778                 else if (pkt_flags & PKT_RX_FDIR) {
1779                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1780                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1781                                         IXGBE_ATR_HASH_MASK;
1782                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1783                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1784                 }
1785                 /*
1786                  * Store the mbuf address into the next entry of the array
1787                  * of returned packets.
1788                  */
1789                 rx_pkts[nb_rx++] = rxm;
1790         }
1791         rxq->rx_tail = rx_id;
1792
1793         /*
1794          * If the number of free RX descriptors is greater than the RX free
1795          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1796          * register.
1797          * Update the RDT with the value of the last processed RX descriptor
1798          * minus 1, to guarantee that the RDT register is never equal to the
1799          * RDH register, which creates a "full" ring situtation from the
1800          * hardware point of view...
1801          */
1802         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1803         if (nb_hold > rxq->rx_free_thresh) {
1804                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1805                            "nb_hold=%u nb_rx=%u",
1806                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1807                            (unsigned) rx_id, (unsigned) nb_hold,
1808                            (unsigned) nb_rx);
1809                 rx_id = (uint16_t) ((rx_id == 0) ?
1810                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1811                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1812                 nb_hold = 0;
1813         }
1814         rxq->nb_rx_hold = nb_hold;
1815         return nb_rx;
1816 }
1817
1818 /**
1819  * Detect an RSC descriptor.
1820  */
1821 static inline uint32_t
1822 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1823 {
1824         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1825                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1826 }
1827
1828 /**
1829  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1830  *
1831  * Fill the following info in the HEAD buffer of the Rx cluster:
1832  *    - RX port identifier
1833  *    - hardware offload data, if any:
1834  *      - RSS flag & hash
1835  *      - IP checksum flag
1836  *      - VLAN TCI, if any
1837  *      - error flags
1838  * @head HEAD of the packet cluster
1839  * @desc HW descriptor to get data from
1840  * @rxq Pointer to the Rx queue
1841  */
1842 static inline void
1843 ixgbe_fill_cluster_head_buf(
1844         struct rte_mbuf *head,
1845         union ixgbe_adv_rx_desc *desc,
1846         struct ixgbe_rx_queue *rxq,
1847         uint32_t staterr)
1848 {
1849         uint32_t pkt_info;
1850         uint64_t pkt_flags;
1851
1852         head->port = rxq->port_id;
1853
1854         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1855          * set in the pkt_flags field.
1856          */
1857         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1858         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1859         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1860         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1861         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1862         head->ol_flags = pkt_flags;
1863         head->packet_type =
1864                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1865
1866         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1867                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1868         else if (pkt_flags & PKT_RX_FDIR) {
1869                 head->hash.fdir.hash =
1870                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1871                                                           & IXGBE_ATR_HASH_MASK;
1872                 head->hash.fdir.id =
1873                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1874         }
1875 }
1876
1877 /**
1878  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1879  *
1880  * @rx_queue Rx queue handle
1881  * @rx_pkts table of received packets
1882  * @nb_pkts size of rx_pkts table
1883  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1884  *
1885  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1886  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1887  *
1888  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1889  * 1) When non-EOP RSC completion arrives:
1890  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1891  *       segment's data length.
1892  *    b) Set the "next" pointer of the current segment to point to the segment
1893  *       at the NEXTP index.
1894  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1895  *       in the sw_rsc_ring.
1896  * 2) When EOP arrives we just update the cluster's total length and offload
1897  *    flags and deliver the cluster up to the upper layers. In our case - put it
1898  *    in the rx_pkts table.
1899  *
1900  * Returns the number of received packets/clusters (according to the "bulk
1901  * receive" interface).
1902  */
1903 static inline uint16_t
1904 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1905                     bool bulk_alloc)
1906 {
1907         struct ixgbe_rx_queue *rxq = rx_queue;
1908         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1909         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1910         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1911         uint16_t rx_id = rxq->rx_tail;
1912         uint16_t nb_rx = 0;
1913         uint16_t nb_hold = rxq->nb_rx_hold;
1914         uint16_t prev_id = rxq->rx_tail;
1915
1916         while (nb_rx < nb_pkts) {
1917                 bool eop;
1918                 struct ixgbe_rx_entry *rxe;
1919                 struct ixgbe_scattered_rx_entry *sc_entry;
1920                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1921                 struct ixgbe_rx_entry *next_rxe = NULL;
1922                 struct rte_mbuf *first_seg;
1923                 struct rte_mbuf *rxm;
1924                 struct rte_mbuf *nmb;
1925                 union ixgbe_adv_rx_desc rxd;
1926                 uint16_t data_len;
1927                 uint16_t next_id;
1928                 volatile union ixgbe_adv_rx_desc *rxdp;
1929                 uint32_t staterr;
1930
1931 next_desc:
1932                 /*
1933                  * The code in this whole file uses the volatile pointer to
1934                  * ensure the read ordering of the status and the rest of the
1935                  * descriptor fields (on the compiler level only!!!). This is so
1936                  * UGLY - why not to just use the compiler barrier instead? DPDK
1937                  * even has the rte_compiler_barrier() for that.
1938                  *
1939                  * But most importantly this is just wrong because this doesn't
1940                  * ensure memory ordering in a general case at all. For
1941                  * instance, DPDK is supposed to work on Power CPUs where
1942                  * compiler barrier may just not be enough!
1943                  *
1944                  * I tried to write only this function properly to have a
1945                  * starting point (as a part of an LRO/RSC series) but the
1946                  * compiler cursed at me when I tried to cast away the
1947                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1948                  * keeping it the way it is for now.
1949                  *
1950                  * The code in this file is broken in so many other places and
1951                  * will just not work on a big endian CPU anyway therefore the
1952                  * lines below will have to be revisited together with the rest
1953                  * of the ixgbe PMD.
1954                  *
1955                  * TODO:
1956                  *    - Get rid of "volatile" crap and let the compiler do its
1957                  *      job.
1958                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1959                  *      memory ordering below.
1960                  */
1961                 rxdp = &rx_ring[rx_id];
1962                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1963
1964                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1965                         break;
1966
1967                 rxd = *rxdp;
1968
1969                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1970                                   "staterr=0x%x data_len=%u",
1971                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1972                            rte_le_to_cpu_16(rxd.wb.upper.length));
1973
1974                 if (!bulk_alloc) {
1975                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1976                         if (nmb == NULL) {
1977                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1978                                                   "port_id=%u queue_id=%u",
1979                                            rxq->port_id, rxq->queue_id);
1980
1981                                 rte_eth_devices[rxq->port_id].data->
1982                                                         rx_mbuf_alloc_failed++;
1983                                 break;
1984                         }
1985                 } else if (nb_hold > rxq->rx_free_thresh) {
1986                         uint16_t next_rdt = rxq->rx_free_trigger;
1987
1988                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1989                                 rte_wmb();
1990                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1991                                                     next_rdt);
1992                                 nb_hold -= rxq->rx_free_thresh;
1993                         } else {
1994                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1995                                                   "port_id=%u queue_id=%u",
1996                                            rxq->port_id, rxq->queue_id);
1997
1998                                 rte_eth_devices[rxq->port_id].data->
1999                                                         rx_mbuf_alloc_failed++;
2000                                 break;
2001                         }
2002                 }
2003
2004                 nb_hold++;
2005                 rxe = &sw_ring[rx_id];
2006                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2007
2008                 next_id = rx_id + 1;
2009                 if (next_id == rxq->nb_rx_desc)
2010                         next_id = 0;
2011
2012                 /* Prefetch next mbuf while processing current one. */
2013                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2014
2015                 /*
2016                  * When next RX descriptor is on a cache-line boundary,
2017                  * prefetch the next 4 RX descriptors and the next 4 pointers
2018                  * to mbufs.
2019                  */
2020                 if ((next_id & 0x3) == 0) {
2021                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2022                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2023                 }
2024
2025                 rxm = rxe->mbuf;
2026
2027                 if (!bulk_alloc) {
2028                         __le64 dma =
2029                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2030                         /*
2031                          * Update RX descriptor with the physical address of the
2032                          * new data buffer of the new allocated mbuf.
2033                          */
2034                         rxe->mbuf = nmb;
2035
2036                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2037                         rxdp->read.hdr_addr = 0;
2038                         rxdp->read.pkt_addr = dma;
2039                 } else
2040                         rxe->mbuf = NULL;
2041
2042                 /*
2043                  * Set data length & data buffer address of mbuf.
2044                  */
2045                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2046                 rxm->data_len = data_len;
2047
2048                 if (!eop) {
2049                         uint16_t nextp_id;
2050                         /*
2051                          * Get next descriptor index:
2052                          *  - For RSC it's in the NEXTP field.
2053                          *  - For a scattered packet - it's just a following
2054                          *    descriptor.
2055                          */
2056                         if (ixgbe_rsc_count(&rxd))
2057                                 nextp_id =
2058                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2059                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2060                         else
2061                                 nextp_id = next_id;
2062
2063                         next_sc_entry = &sw_sc_ring[nextp_id];
2064                         next_rxe = &sw_ring[nextp_id];
2065                         rte_ixgbe_prefetch(next_rxe);
2066                 }
2067
2068                 sc_entry = &sw_sc_ring[rx_id];
2069                 first_seg = sc_entry->fbuf;
2070                 sc_entry->fbuf = NULL;
2071
2072                 /*
2073                  * If this is the first buffer of the received packet,
2074                  * set the pointer to the first mbuf of the packet and
2075                  * initialize its context.
2076                  * Otherwise, update the total length and the number of segments
2077                  * of the current scattered packet, and update the pointer to
2078                  * the last mbuf of the current packet.
2079                  */
2080                 if (first_seg == NULL) {
2081                         first_seg = rxm;
2082                         first_seg->pkt_len = data_len;
2083                         first_seg->nb_segs = 1;
2084                 } else {
2085                         first_seg->pkt_len += data_len;
2086                         first_seg->nb_segs++;
2087                 }
2088
2089                 prev_id = rx_id;
2090                 rx_id = next_id;
2091
2092                 /*
2093                  * If this is not the last buffer of the received packet, update
2094                  * the pointer to the first mbuf at the NEXTP entry in the
2095                  * sw_sc_ring and continue to parse the RX ring.
2096                  */
2097                 if (!eop && next_rxe) {
2098                         rxm->next = next_rxe->mbuf;
2099                         next_sc_entry->fbuf = first_seg;
2100                         goto next_desc;
2101                 }
2102
2103                 /*
2104                  * This is the last buffer of the received packet - return
2105                  * the current cluster to the user.
2106                  */
2107                 rxm->next = NULL;
2108
2109                 /* Initialize the first mbuf of the returned packet */
2110                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2111
2112                 /*
2113                  * Deal with the case, when HW CRC srip is disabled.
2114                  * That can't happen when LRO is enabled, but still could
2115                  * happen for scattered RX mode.
2116                  */
2117                 first_seg->pkt_len -= rxq->crc_len;
2118                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2119                         struct rte_mbuf *lp;
2120
2121                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2122                                 ;
2123
2124                         first_seg->nb_segs--;
2125                         lp->data_len -= rxq->crc_len - rxm->data_len;
2126                         lp->next = NULL;
2127                         rte_pktmbuf_free_seg(rxm);
2128                 } else
2129                         rxm->data_len -= rxq->crc_len;
2130
2131                 /* Prefetch data of first segment, if configured to do so. */
2132                 rte_packet_prefetch((char *)first_seg->buf_addr +
2133                         first_seg->data_off);
2134
2135                 /*
2136                  * Store the mbuf address into the next entry of the array
2137                  * of returned packets.
2138                  */
2139                 rx_pkts[nb_rx++] = first_seg;
2140         }
2141
2142         /*
2143          * Record index of the next RX descriptor to probe.
2144          */
2145         rxq->rx_tail = rx_id;
2146
2147         /*
2148          * If the number of free RX descriptors is greater than the RX free
2149          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2150          * register.
2151          * Update the RDT with the value of the last processed RX descriptor
2152          * minus 1, to guarantee that the RDT register is never equal to the
2153          * RDH register, which creates a "full" ring situtation from the
2154          * hardware point of view...
2155          */
2156         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2157                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2158                            "nb_hold=%u nb_rx=%u",
2159                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2160
2161                 rte_wmb();
2162                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
2163                 nb_hold = 0;
2164         }
2165
2166         rxq->nb_rx_hold = nb_hold;
2167         return nb_rx;
2168 }
2169
2170 uint16_t
2171 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2172                                  uint16_t nb_pkts)
2173 {
2174         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2175 }
2176
2177 uint16_t
2178 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2179                                uint16_t nb_pkts)
2180 {
2181         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2182 }
2183
2184 /*********************************************************************
2185  *
2186  *  Queue management functions
2187  *
2188  **********************************************************************/
2189
2190 static void __attribute__((cold))
2191 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2192 {
2193         unsigned i;
2194
2195         if (txq->sw_ring != NULL) {
2196                 for (i = 0; i < txq->nb_tx_desc; i++) {
2197                         if (txq->sw_ring[i].mbuf != NULL) {
2198                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2199                                 txq->sw_ring[i].mbuf = NULL;
2200                         }
2201                 }
2202         }
2203 }
2204
2205 static void __attribute__((cold))
2206 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2207 {
2208         if (txq != NULL &&
2209             txq->sw_ring != NULL)
2210                 rte_free(txq->sw_ring);
2211 }
2212
2213 static void __attribute__((cold))
2214 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2215 {
2216         if (txq != NULL && txq->ops != NULL) {
2217                 txq->ops->release_mbufs(txq);
2218                 txq->ops->free_swring(txq);
2219                 rte_free(txq);
2220         }
2221 }
2222
2223 void __attribute__((cold))
2224 ixgbe_dev_tx_queue_release(void *txq)
2225 {
2226         ixgbe_tx_queue_release(txq);
2227 }
2228
2229 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2230 static void __attribute__((cold))
2231 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2232 {
2233         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2234         struct ixgbe_tx_entry *txe = txq->sw_ring;
2235         uint16_t prev, i;
2236
2237         /* Zero out HW ring memory */
2238         for (i = 0; i < txq->nb_tx_desc; i++) {
2239                 txq->tx_ring[i] = zeroed_desc;
2240         }
2241
2242         /* Initialize SW ring entries */
2243         prev = (uint16_t) (txq->nb_tx_desc - 1);
2244         for (i = 0; i < txq->nb_tx_desc; i++) {
2245                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2246
2247                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2248                 txe[i].mbuf = NULL;
2249                 txe[i].last_id = i;
2250                 txe[prev].next_id = i;
2251                 prev = i;
2252         }
2253
2254         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2255         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2256
2257         txq->tx_tail = 0;
2258         txq->nb_tx_used = 0;
2259         /*
2260          * Always allow 1 descriptor to be un-allocated to avoid
2261          * a H/W race condition
2262          */
2263         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2264         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2265         txq->ctx_curr = 0;
2266         memset((void *)&txq->ctx_cache, 0,
2267                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2268 }
2269
2270 static const struct ixgbe_txq_ops def_txq_ops = {
2271         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2272         .free_swring = ixgbe_tx_free_swring,
2273         .reset = ixgbe_reset_tx_queue,
2274 };
2275
2276 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2277  * the queue parameters. Used in tx_queue_setup by primary process and then
2278  * in dev_init by secondary process when attaching to an existing ethdev.
2279  */
2280 void __attribute__((cold))
2281 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2282 {
2283         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2284         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2285                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2286                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2287 #ifdef RTE_IXGBE_INC_VECTOR
2288                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2289                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2290                                         ixgbe_txq_vec_setup(txq) == 0)) {
2291                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2292                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2293                 } else
2294 #endif
2295                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2296         } else {
2297                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2298                 PMD_INIT_LOG(DEBUG,
2299                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2300                                 (unsigned long)txq->txq_flags,
2301                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2302                 PMD_INIT_LOG(DEBUG,
2303                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2304                                 (unsigned long)txq->tx_rs_thresh,
2305                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2306                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2307         }
2308 }
2309
2310 int __attribute__((cold))
2311 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2312                          uint16_t queue_idx,
2313                          uint16_t nb_desc,
2314                          unsigned int socket_id,
2315                          const struct rte_eth_txconf *tx_conf)
2316 {
2317         const struct rte_memzone *tz;
2318         struct ixgbe_tx_queue *txq;
2319         struct ixgbe_hw     *hw;
2320         uint16_t tx_rs_thresh, tx_free_thresh;
2321
2322         PMD_INIT_FUNC_TRACE();
2323         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2324
2325         /*
2326          * Validate number of transmit descriptors.
2327          * It must not exceed hardware maximum, and must be multiple
2328          * of IXGBE_ALIGN.
2329          */
2330         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2331                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2332                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2333                 return -EINVAL;
2334         }
2335
2336         /*
2337          * The following two parameters control the setting of the RS bit on
2338          * transmit descriptors.
2339          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2340          * descriptors have been used.
2341          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2342          * descriptors are used or if the number of descriptors required
2343          * to transmit a packet is greater than the number of free TX
2344          * descriptors.
2345          * The following constraints must be satisfied:
2346          *  tx_rs_thresh must be greater than 0.
2347          *  tx_rs_thresh must be less than the size of the ring minus 2.
2348          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2349          *  tx_rs_thresh must be a divisor of the ring size.
2350          *  tx_free_thresh must be greater than 0.
2351          *  tx_free_thresh must be less than the size of the ring minus 3.
2352          * One descriptor in the TX ring is used as a sentinel to avoid a
2353          * H/W race condition, hence the maximum threshold constraints.
2354          * When set to zero use default values.
2355          */
2356         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2357                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2358         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2359                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2360         if (tx_rs_thresh >= (nb_desc - 2)) {
2361                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2362                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2363                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2364                         (int)dev->data->port_id, (int)queue_idx);
2365                 return -(EINVAL);
2366         }
2367         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2368                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2369                         "(tx_rs_thresh=%u port=%d queue=%d)",
2370                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2371                         (int)dev->data->port_id, (int)queue_idx);
2372                 return -(EINVAL);
2373         }
2374         if (tx_free_thresh >= (nb_desc - 3)) {
2375                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2376                              "tx_free_thresh must be less than the number of "
2377                              "TX descriptors minus 3. (tx_free_thresh=%u "
2378                              "port=%d queue=%d)",
2379                              (unsigned int)tx_free_thresh,
2380                              (int)dev->data->port_id, (int)queue_idx);
2381                 return -(EINVAL);
2382         }
2383         if (tx_rs_thresh > tx_free_thresh) {
2384                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2385                              "tx_free_thresh. (tx_free_thresh=%u "
2386                              "tx_rs_thresh=%u port=%d queue=%d)",
2387                              (unsigned int)tx_free_thresh,
2388                              (unsigned int)tx_rs_thresh,
2389                              (int)dev->data->port_id,
2390                              (int)queue_idx);
2391                 return -(EINVAL);
2392         }
2393         if ((nb_desc % tx_rs_thresh) != 0) {
2394                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2395                              "number of TX descriptors. (tx_rs_thresh=%u "
2396                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2397                              (int)dev->data->port_id, (int)queue_idx);
2398                 return -(EINVAL);
2399         }
2400
2401         /*
2402          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2403          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2404          * by the NIC and all descriptors are written back after the NIC
2405          * accumulates WTHRESH descriptors.
2406          */
2407         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2408                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2409                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2410                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2411                              (int)dev->data->port_id, (int)queue_idx);
2412                 return -(EINVAL);
2413         }
2414
2415         /* Free memory prior to re-allocation if needed... */
2416         if (dev->data->tx_queues[queue_idx] != NULL) {
2417                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2418                 dev->data->tx_queues[queue_idx] = NULL;
2419         }
2420
2421         /* First allocate the tx queue data structure */
2422         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2423                                  RTE_CACHE_LINE_SIZE, socket_id);
2424         if (txq == NULL)
2425                 return -ENOMEM;
2426
2427         /*
2428          * Allocate TX ring hardware descriptors. A memzone large enough to
2429          * handle the maximum ring size is allocated in order to allow for
2430          * resizing in later calls to the queue setup function.
2431          */
2432         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2433                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2434                         IXGBE_ALIGN, socket_id);
2435         if (tz == NULL) {
2436                 ixgbe_tx_queue_release(txq);
2437                 return -ENOMEM;
2438         }
2439
2440         txq->nb_tx_desc = nb_desc;
2441         txq->tx_rs_thresh = tx_rs_thresh;
2442         txq->tx_free_thresh = tx_free_thresh;
2443         txq->pthresh = tx_conf->tx_thresh.pthresh;
2444         txq->hthresh = tx_conf->tx_thresh.hthresh;
2445         txq->wthresh = tx_conf->tx_thresh.wthresh;
2446         txq->queue_id = queue_idx;
2447         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2448                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2449         txq->port_id = dev->data->port_id;
2450         txq->txq_flags = tx_conf->txq_flags;
2451         txq->ops = &def_txq_ops;
2452         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2453
2454         /*
2455          * Modification to set VFTDT for virtual function if vf is detected
2456          */
2457         if (hw->mac.type == ixgbe_mac_82599_vf ||
2458             hw->mac.type == ixgbe_mac_X540_vf ||
2459             hw->mac.type == ixgbe_mac_X550_vf ||
2460             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2461             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2462                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2463         else
2464                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2465
2466         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2467         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2468
2469         /* Allocate software ring */
2470         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2471                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2472                                 RTE_CACHE_LINE_SIZE, socket_id);
2473         if (txq->sw_ring == NULL) {
2474                 ixgbe_tx_queue_release(txq);
2475                 return -ENOMEM;
2476         }
2477         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2478                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2479
2480         /* set up vector or scalar TX function as appropriate */
2481         ixgbe_set_tx_function(dev, txq);
2482
2483         txq->ops->reset(txq);
2484
2485         dev->data->tx_queues[queue_idx] = txq;
2486
2487
2488         return 0;
2489 }
2490
2491 /**
2492  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2493  *
2494  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2495  * in the sw_rsc_ring is not set to NULL but rather points to the next
2496  * mbuf of this RSC aggregation (that has not been completed yet and still
2497  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2498  * will just free first "nb_segs" segments of the cluster explicitly by calling
2499  * an rte_pktmbuf_free_seg().
2500  *
2501  * @m scattered cluster head
2502  */
2503 static void __attribute__((cold))
2504 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2505 {
2506         uint8_t i, nb_segs = m->nb_segs;
2507         struct rte_mbuf *next_seg;
2508
2509         for (i = 0; i < nb_segs; i++) {
2510                 next_seg = m->next;
2511                 rte_pktmbuf_free_seg(m);
2512                 m = next_seg;
2513         }
2514 }
2515
2516 static void __attribute__((cold))
2517 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2518 {
2519         unsigned i;
2520
2521 #ifdef RTE_IXGBE_INC_VECTOR
2522         /* SSE Vector driver has a different way of releasing mbufs. */
2523         if (rxq->rx_using_sse) {
2524                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2525                 return;
2526         }
2527 #endif
2528
2529         if (rxq->sw_ring != NULL) {
2530                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2531                         if (rxq->sw_ring[i].mbuf != NULL) {
2532                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2533                                 rxq->sw_ring[i].mbuf = NULL;
2534                         }
2535                 }
2536                 if (rxq->rx_nb_avail) {
2537                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2538                                 struct rte_mbuf *mb;
2539
2540                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2541                                 rte_pktmbuf_free_seg(mb);
2542                         }
2543                         rxq->rx_nb_avail = 0;
2544                 }
2545         }
2546
2547         if (rxq->sw_sc_ring)
2548                 for (i = 0; i < rxq->nb_rx_desc; i++)
2549                         if (rxq->sw_sc_ring[i].fbuf) {
2550                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2551                                 rxq->sw_sc_ring[i].fbuf = NULL;
2552                         }
2553 }
2554
2555 static void __attribute__((cold))
2556 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2557 {
2558         if (rxq != NULL) {
2559                 ixgbe_rx_queue_release_mbufs(rxq);
2560                 rte_free(rxq->sw_ring);
2561                 rte_free(rxq->sw_sc_ring);
2562                 rte_free(rxq);
2563         }
2564 }
2565
2566 void __attribute__((cold))
2567 ixgbe_dev_rx_queue_release(void *rxq)
2568 {
2569         ixgbe_rx_queue_release(rxq);
2570 }
2571
2572 /*
2573  * Check if Rx Burst Bulk Alloc function can be used.
2574  * Return
2575  *        0: the preconditions are satisfied and the bulk allocation function
2576  *           can be used.
2577  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2578  *           function must be used.
2579  */
2580 static inline int __attribute__((cold))
2581 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2582 {
2583         int ret = 0;
2584
2585         /*
2586          * Make sure the following pre-conditions are satisfied:
2587          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2588          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2589          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2590          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2591          * Scattered packets are not supported.  This should be checked
2592          * outside of this function.
2593          */
2594         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2595                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2596                              "rxq->rx_free_thresh=%d, "
2597                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2598                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2599                 ret = -EINVAL;
2600         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2601                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2602                              "rxq->rx_free_thresh=%d, "
2603                              "rxq->nb_rx_desc=%d",
2604                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2605                 ret = -EINVAL;
2606         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2607                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2608                              "rxq->nb_rx_desc=%d, "
2609                              "rxq->rx_free_thresh=%d",
2610                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2611                 ret = -EINVAL;
2612         } else if (!(rxq->nb_rx_desc <
2613                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2614                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2615                              "rxq->nb_rx_desc=%d, "
2616                              "IXGBE_MAX_RING_DESC=%d, "
2617                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2618                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2619                              RTE_PMD_IXGBE_RX_MAX_BURST);
2620                 ret = -EINVAL;
2621         }
2622
2623         return ret;
2624 }
2625
2626 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2627 static void __attribute__((cold))
2628 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2629 {
2630         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2631         unsigned i;
2632         uint16_t len = rxq->nb_rx_desc;
2633
2634         /*
2635          * By default, the Rx queue setup function allocates enough memory for
2636          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2637          * extra memory at the end of the descriptor ring to be zero'd out. A
2638          * pre-condition for using the Rx burst bulk alloc function is that the
2639          * number of descriptors is less than or equal to
2640          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2641          * constraints here to see if we need to zero out memory after the end
2642          * of the H/W descriptor ring.
2643          */
2644         if (adapter->rx_bulk_alloc_allowed)
2645                 /* zero out extra memory */
2646                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2647
2648         /*
2649          * Zero out HW ring memory. Zero out extra memory at the end of
2650          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2651          * reads extra memory as zeros.
2652          */
2653         for (i = 0; i < len; i++) {
2654                 rxq->rx_ring[i] = zeroed_desc;
2655         }
2656
2657         /*
2658          * initialize extra software ring entries. Space for these extra
2659          * entries is always allocated
2660          */
2661         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2662         for (i = rxq->nb_rx_desc; i < len; ++i) {
2663                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2664         }
2665
2666         rxq->rx_nb_avail = 0;
2667         rxq->rx_next_avail = 0;
2668         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2669         rxq->rx_tail = 0;
2670         rxq->nb_rx_hold = 0;
2671         rxq->pkt_first_seg = NULL;
2672         rxq->pkt_last_seg = NULL;
2673
2674 #ifdef RTE_IXGBE_INC_VECTOR
2675         rxq->rxrearm_start = 0;
2676         rxq->rxrearm_nb = 0;
2677 #endif
2678 }
2679
2680 int __attribute__((cold))
2681 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2682                          uint16_t queue_idx,
2683                          uint16_t nb_desc,
2684                          unsigned int socket_id,
2685                          const struct rte_eth_rxconf *rx_conf,
2686                          struct rte_mempool *mp)
2687 {
2688         const struct rte_memzone *rz;
2689         struct ixgbe_rx_queue *rxq;
2690         struct ixgbe_hw     *hw;
2691         uint16_t len;
2692         struct ixgbe_adapter *adapter =
2693                 (struct ixgbe_adapter *)dev->data->dev_private;
2694
2695         PMD_INIT_FUNC_TRACE();
2696         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2697
2698         /*
2699          * Validate number of receive descriptors.
2700          * It must not exceed hardware maximum, and must be multiple
2701          * of IXGBE_ALIGN.
2702          */
2703         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2704                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2705                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2706                 return -EINVAL;
2707         }
2708
2709         /* Free memory prior to re-allocation if needed... */
2710         if (dev->data->rx_queues[queue_idx] != NULL) {
2711                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2712                 dev->data->rx_queues[queue_idx] = NULL;
2713         }
2714
2715         /* First allocate the rx queue data structure */
2716         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2717                                  RTE_CACHE_LINE_SIZE, socket_id);
2718         if (rxq == NULL)
2719                 return -ENOMEM;
2720         rxq->mb_pool = mp;
2721         rxq->nb_rx_desc = nb_desc;
2722         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2723         rxq->queue_id = queue_idx;
2724         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2725                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2726         rxq->port_id = dev->data->port_id;
2727         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2728                                                         0 : ETHER_CRC_LEN);
2729         rxq->drop_en = rx_conf->rx_drop_en;
2730         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2731
2732         /*
2733          * The packet type in RX descriptor is different for different NICs.
2734          * Some bits are used for x550 but reserved for other NICS.
2735          * So set different masks for different NICs.
2736          */
2737         if (hw->mac.type == ixgbe_mac_X550 ||
2738             hw->mac.type == ixgbe_mac_X550EM_x ||
2739             hw->mac.type == ixgbe_mac_X550EM_a ||
2740             hw->mac.type == ixgbe_mac_X550_vf ||
2741             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2742             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2743                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2744         else
2745                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2746
2747         /*
2748          * Allocate RX ring hardware descriptors. A memzone large enough to
2749          * handle the maximum ring size is allocated in order to allow for
2750          * resizing in later calls to the queue setup function.
2751          */
2752         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2753                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2754         if (rz == NULL) {
2755                 ixgbe_rx_queue_release(rxq);
2756                 return -ENOMEM;
2757         }
2758
2759         /*
2760          * Zero init all the descriptors in the ring.
2761          */
2762         memset(rz->addr, 0, RX_RING_SZ);
2763
2764         /*
2765          * Modified to setup VFRDT for Virtual Function
2766          */
2767         if (hw->mac.type == ixgbe_mac_82599_vf ||
2768             hw->mac.type == ixgbe_mac_X540_vf ||
2769             hw->mac.type == ixgbe_mac_X550_vf ||
2770             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2771             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2772                 rxq->rdt_reg_addr =
2773                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2774                 rxq->rdh_reg_addr =
2775                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2776         } else {
2777                 rxq->rdt_reg_addr =
2778                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2779                 rxq->rdh_reg_addr =
2780                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2781         }
2782
2783         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2784         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2785
2786         /*
2787          * Certain constraints must be met in order to use the bulk buffer
2788          * allocation Rx burst function. If any of Rx queues doesn't meet them
2789          * the feature should be disabled for the whole port.
2790          */
2791         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2792                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2793                                     "preconditions - canceling the feature for "
2794                                     "the whole port[%d]",
2795                              rxq->queue_id, rxq->port_id);
2796                 adapter->rx_bulk_alloc_allowed = false;
2797         }
2798
2799         /*
2800          * Allocate software ring. Allow for space at the end of the
2801          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2802          * function does not access an invalid memory region.
2803          */
2804         len = nb_desc;
2805         if (adapter->rx_bulk_alloc_allowed)
2806                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2807
2808         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2809                                           sizeof(struct ixgbe_rx_entry) * len,
2810                                           RTE_CACHE_LINE_SIZE, socket_id);
2811         if (!rxq->sw_ring) {
2812                 ixgbe_rx_queue_release(rxq);
2813                 return -ENOMEM;
2814         }
2815
2816         /*
2817          * Always allocate even if it's not going to be needed in order to
2818          * simplify the code.
2819          *
2820          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2821          * be requested in ixgbe_dev_rx_init(), which is called later from
2822          * dev_start() flow.
2823          */
2824         rxq->sw_sc_ring =
2825                 rte_zmalloc_socket("rxq->sw_sc_ring",
2826                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2827                                    RTE_CACHE_LINE_SIZE, socket_id);
2828         if (!rxq->sw_sc_ring) {
2829                 ixgbe_rx_queue_release(rxq);
2830                 return -ENOMEM;
2831         }
2832
2833         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2834                             "dma_addr=0x%"PRIx64,
2835                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2836                      rxq->rx_ring_phys_addr);
2837
2838         if (!rte_is_power_of_2(nb_desc)) {
2839                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2840                                     "preconditions - canceling the feature for "
2841                                     "the whole port[%d]",
2842                              rxq->queue_id, rxq->port_id);
2843                 adapter->rx_vec_allowed = false;
2844         } else
2845                 ixgbe_rxq_vec_setup(rxq);
2846
2847         dev->data->rx_queues[queue_idx] = rxq;
2848
2849         ixgbe_reset_rx_queue(adapter, rxq);
2850
2851         return 0;
2852 }
2853
2854 uint32_t
2855 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2856 {
2857 #define IXGBE_RXQ_SCAN_INTERVAL 4
2858         volatile union ixgbe_adv_rx_desc *rxdp;
2859         struct ixgbe_rx_queue *rxq;
2860         uint32_t desc = 0;
2861
2862         if (rx_queue_id >= dev->data->nb_rx_queues) {
2863                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2864                 return 0;
2865         }
2866
2867         rxq = dev->data->rx_queues[rx_queue_id];
2868         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2869
2870         while ((desc < rxq->nb_rx_desc) &&
2871                 (rxdp->wb.upper.status_error &
2872                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2873                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2874                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2875                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2876                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2877                                 desc - rxq->nb_rx_desc]);
2878         }
2879
2880         return desc;
2881 }
2882
2883 int
2884 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2885 {
2886         volatile union ixgbe_adv_rx_desc *rxdp;
2887         struct ixgbe_rx_queue *rxq = rx_queue;
2888         uint32_t desc;
2889
2890         if (unlikely(offset >= rxq->nb_rx_desc))
2891                 return 0;
2892         desc = rxq->rx_tail + offset;
2893         if (desc >= rxq->nb_rx_desc)
2894                 desc -= rxq->nb_rx_desc;
2895
2896         rxdp = &rxq->rx_ring[desc];
2897         return !!(rxdp->wb.upper.status_error &
2898                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2899 }
2900
2901 void __attribute__((cold))
2902 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2903 {
2904         unsigned i;
2905         struct ixgbe_adapter *adapter =
2906                 (struct ixgbe_adapter *)dev->data->dev_private;
2907
2908         PMD_INIT_FUNC_TRACE();
2909
2910         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2911                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2912
2913                 if (txq != NULL) {
2914                         txq->ops->release_mbufs(txq);
2915                         txq->ops->reset(txq);
2916                 }
2917         }
2918
2919         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2920                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2921
2922                 if (rxq != NULL) {
2923                         ixgbe_rx_queue_release_mbufs(rxq);
2924                         ixgbe_reset_rx_queue(adapter, rxq);
2925                 }
2926         }
2927 }
2928
2929 void
2930 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2931 {
2932         unsigned i;
2933
2934         PMD_INIT_FUNC_TRACE();
2935
2936         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2937                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2938                 dev->data->rx_queues[i] = NULL;
2939         }
2940         dev->data->nb_rx_queues = 0;
2941
2942         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2943                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2944                 dev->data->tx_queues[i] = NULL;
2945         }
2946         dev->data->nb_tx_queues = 0;
2947 }
2948
2949 /*********************************************************************
2950  *
2951  *  Device RX/TX init functions
2952  *
2953  **********************************************************************/
2954
2955 /**
2956  * Receive Side Scaling (RSS)
2957  * See section 7.1.2.8 in the following document:
2958  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2959  *
2960  * Principles:
2961  * The source and destination IP addresses of the IP header and the source
2962  * and destination ports of TCP/UDP headers, if any, of received packets are
2963  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2964  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2965  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2966  * RSS output index which is used as the RX queue index where to store the
2967  * received packets.
2968  * The following output is supplied in the RX write-back descriptor:
2969  *     - 32-bit result of the Microsoft RSS hash function,
2970  *     - 4-bit RSS type field.
2971  */
2972
2973 /*
2974  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2975  * Used as the default key.
2976  */
2977 static uint8_t rss_intel_key[40] = {
2978         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2979         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2980         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2981         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2982         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2983 };
2984
2985 static void
2986 ixgbe_rss_disable(struct rte_eth_dev *dev)
2987 {
2988         struct ixgbe_hw *hw;
2989         uint32_t mrqc;
2990         uint32_t mrqc_reg;
2991
2992         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2993         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2994         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2995         mrqc &= ~IXGBE_MRQC_RSSEN;
2996         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2997 }
2998
2999 static void
3000 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3001 {
3002         uint8_t  *hash_key;
3003         uint32_t mrqc;
3004         uint32_t rss_key;
3005         uint64_t rss_hf;
3006         uint16_t i;
3007         uint32_t mrqc_reg;
3008         uint32_t rssrk_reg;
3009
3010         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3011         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3012
3013         hash_key = rss_conf->rss_key;
3014         if (hash_key != NULL) {
3015                 /* Fill in RSS hash key */
3016                 for (i = 0; i < 10; i++) {
3017                         rss_key  = hash_key[(i * 4)];
3018                         rss_key |= hash_key[(i * 4) + 1] << 8;
3019                         rss_key |= hash_key[(i * 4) + 2] << 16;
3020                         rss_key |= hash_key[(i * 4) + 3] << 24;
3021                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3022                 }
3023         }
3024
3025         /* Set configured hashing protocols in MRQC register */
3026         rss_hf = rss_conf->rss_hf;
3027         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3028         if (rss_hf & ETH_RSS_IPV4)
3029                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3030         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3031                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3032         if (rss_hf & ETH_RSS_IPV6)
3033                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3034         if (rss_hf & ETH_RSS_IPV6_EX)
3035                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3036         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3037                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3038         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3039                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3040         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3041                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3042         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3043                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3044         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3045                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3046         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3047 }
3048
3049 int
3050 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3051                           struct rte_eth_rss_conf *rss_conf)
3052 {
3053         struct ixgbe_hw *hw;
3054         uint32_t mrqc;
3055         uint64_t rss_hf;
3056         uint32_t mrqc_reg;
3057
3058         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3059
3060         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3061                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3062                         "NIC.");
3063                 return -ENOTSUP;
3064         }
3065         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3066
3067         /*
3068          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3069          *     "RSS enabling cannot be done dynamically while it must be
3070          *      preceded by a software reset"
3071          * Before changing anything, first check that the update RSS operation
3072          * does not attempt to disable RSS, if RSS was enabled at
3073          * initialization time, or does not attempt to enable RSS, if RSS was
3074          * disabled at initialization time.
3075          */
3076         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3077         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3078         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3079                 if (rss_hf != 0) /* Enable RSS */
3080                         return -(EINVAL);
3081                 return 0; /* Nothing to do */
3082         }
3083         /* RSS enabled */
3084         if (rss_hf == 0) /* Disable RSS */
3085                 return -(EINVAL);
3086         ixgbe_hw_rss_hash_set(hw, rss_conf);
3087         return 0;
3088 }
3089
3090 int
3091 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3092                             struct rte_eth_rss_conf *rss_conf)
3093 {
3094         struct ixgbe_hw *hw;
3095         uint8_t *hash_key;
3096         uint32_t mrqc;
3097         uint32_t rss_key;
3098         uint64_t rss_hf;
3099         uint16_t i;
3100         uint32_t mrqc_reg;
3101         uint32_t rssrk_reg;
3102
3103         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3104         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3105         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3106         hash_key = rss_conf->rss_key;
3107         if (hash_key != NULL) {
3108                 /* Return RSS hash key */
3109                 for (i = 0; i < 10; i++) {
3110                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3111                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3112                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3113                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3114                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3115                 }
3116         }
3117
3118         /* Get RSS functions configured in MRQC register */
3119         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3120         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3121                 rss_conf->rss_hf = 0;
3122                 return 0;
3123         }
3124         rss_hf = 0;
3125         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3126                 rss_hf |= ETH_RSS_IPV4;
3127         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3128                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3129         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3130                 rss_hf |= ETH_RSS_IPV6;
3131         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3132                 rss_hf |= ETH_RSS_IPV6_EX;
3133         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3134                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3135         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3136                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3137         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3138                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3139         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3140                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3141         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3142                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3143         rss_conf->rss_hf = rss_hf;
3144         return 0;
3145 }
3146
3147 static void
3148 ixgbe_rss_configure(struct rte_eth_dev *dev)
3149 {
3150         struct rte_eth_rss_conf rss_conf;
3151         struct ixgbe_hw *hw;
3152         uint32_t reta;
3153         uint16_t i;
3154         uint16_t j;
3155         uint16_t sp_reta_size;
3156         uint32_t reta_reg;
3157
3158         PMD_INIT_FUNC_TRACE();
3159         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3160
3161         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3162
3163         /*
3164          * Fill in redirection table
3165          * The byte-swap is needed because NIC registers are in
3166          * little-endian order.
3167          */
3168         reta = 0;
3169         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3170                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3171
3172                 if (j == dev->data->nb_rx_queues)
3173                         j = 0;
3174                 reta = (reta << 8) | j;
3175                 if ((i & 3) == 3)
3176                         IXGBE_WRITE_REG(hw, reta_reg,
3177                                         rte_bswap32(reta));
3178         }
3179
3180         /*
3181          * Configure the RSS key and the RSS protocols used to compute
3182          * the RSS hash of input packets.
3183          */
3184         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3185         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3186                 ixgbe_rss_disable(dev);
3187                 return;
3188         }
3189         if (rss_conf.rss_key == NULL)
3190                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3191         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3192 }
3193
3194 #define NUM_VFTA_REGISTERS 128
3195 #define NIC_RX_BUFFER_SIZE 0x200
3196 #define X550_RX_BUFFER_SIZE 0x180
3197
3198 static void
3199 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3200 {
3201         struct rte_eth_vmdq_dcb_conf *cfg;
3202         struct ixgbe_hw *hw;
3203         enum rte_eth_nb_pools num_pools;
3204         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3205         uint16_t pbsize;
3206         uint8_t nb_tcs; /* number of traffic classes */
3207         int i;
3208
3209         PMD_INIT_FUNC_TRACE();
3210         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3211         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3212         num_pools = cfg->nb_queue_pools;
3213         /* Check we have a valid number of pools */
3214         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3215                 ixgbe_rss_disable(dev);
3216                 return;
3217         }
3218         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3219         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3220
3221         /*
3222          * RXPBSIZE
3223          * split rx buffer up into sections, each for 1 traffic class
3224          */
3225         switch (hw->mac.type) {
3226         case ixgbe_mac_X550:
3227         case ixgbe_mac_X550EM_x:
3228         case ixgbe_mac_X550EM_a:
3229                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3230                 break;
3231         default:
3232                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3233                 break;
3234         }
3235         for (i = 0; i < nb_tcs; i++) {
3236                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3237
3238                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3239                 /* clear 10 bits. */
3240                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3241                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3242         }
3243         /* zero alloc all unused TCs */
3244         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3245                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3246
3247                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3248                 /* clear 10 bits. */
3249                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3250         }
3251
3252         /* MRQC: enable vmdq and dcb */
3253         mrqc = (num_pools == ETH_16_POOLS) ?
3254                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3255         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3256
3257         /* PFVTCTL: turn on virtualisation and set the default pool */
3258         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3259         if (cfg->enable_default_pool) {
3260                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3261         } else {
3262                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3263         }
3264
3265         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3266
3267         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3268         queue_mapping = 0;
3269         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3270                 /*
3271                  * mapping is done with 3 bits per priority,
3272                  * so shift by i*3 each time
3273                  */
3274                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3275
3276         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3277
3278         /* RTRPCS: DCB related */
3279         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3280
3281         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3282         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3283         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3284         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3285
3286         /* VFTA - enable all vlan filters */
3287         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3288                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3289         }
3290
3291         /* VFRE: pool enabling for receive - 16 or 32 */
3292         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3293                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3294
3295         /*
3296          * MPSAR - allow pools to read specific mac addresses
3297          * In this case, all pools should be able to read from mac addr 0
3298          */
3299         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3300         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3301
3302         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3303         for (i = 0; i < cfg->nb_pool_maps; i++) {
3304                 /* set vlan id in VF register and set the valid bit */
3305                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3306                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3307                 /*
3308                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3309                  * pools, we only need to use the first half of the register
3310                  * i.e. bits 0-31
3311                  */
3312                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3313         }
3314 }
3315
3316 /**
3317  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3318  * @dev: pointer to eth_dev structure
3319  * @dcb_config: pointer to ixgbe_dcb_config structure
3320  */
3321 static void
3322 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3323                        struct ixgbe_dcb_config *dcb_config)
3324 {
3325         uint32_t reg;
3326         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3327
3328         PMD_INIT_FUNC_TRACE();
3329         if (hw->mac.type != ixgbe_mac_82598EB) {
3330                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3331                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3332                 reg |= IXGBE_RTTDCS_ARBDIS;
3333                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3334
3335                 /* Enable DCB for Tx with 8 TCs */
3336                 if (dcb_config->num_tcs.pg_tcs == 8) {
3337                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3338                 } else {
3339                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3340                 }
3341                 if (dcb_config->vt_mode)
3342                         reg |= IXGBE_MTQC_VT_ENA;
3343                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3344
3345                 /* Enable the Tx desc arbiter */
3346                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3347                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3348                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3349
3350                 /* Enable Security TX Buffer IFG for DCB */
3351                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3352                 reg |= IXGBE_SECTX_DCB;
3353                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3354         }
3355 }
3356
3357 /**
3358  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3359  * @dev: pointer to rte_eth_dev structure
3360  * @dcb_config: pointer to ixgbe_dcb_config structure
3361  */
3362 static void
3363 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3364                         struct ixgbe_dcb_config *dcb_config)
3365 {
3366         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3367                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3368         struct ixgbe_hw *hw =
3369                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3370
3371         PMD_INIT_FUNC_TRACE();
3372         if (hw->mac.type != ixgbe_mac_82598EB)
3373                 /*PF VF Transmit Enable*/
3374                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3375                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3376
3377         /*Configure general DCB TX parameters*/
3378         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3379 }
3380
3381 static void
3382 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3383                         struct ixgbe_dcb_config *dcb_config)
3384 {
3385         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3386                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3387         struct ixgbe_dcb_tc_config *tc;
3388         uint8_t i, j;
3389
3390         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3391         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3392                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3393                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3394         } else {
3395                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3396                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3397         }
3398         /* User Priority to Traffic Class mapping */
3399         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3400                 j = vmdq_rx_conf->dcb_tc[i];
3401                 tc = &dcb_config->tc_config[j];
3402                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3403                                                 (uint8_t)(1 << j);
3404         }
3405 }
3406
3407 static void
3408 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3409                         struct ixgbe_dcb_config *dcb_config)
3410 {
3411         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3412                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3413         struct ixgbe_dcb_tc_config *tc;
3414         uint8_t i, j;
3415
3416         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3417         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3418                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3419                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3420         } else {
3421                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3422                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3423         }
3424
3425         /* User Priority to Traffic Class mapping */
3426         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3427                 j = vmdq_tx_conf->dcb_tc[i];
3428                 tc = &dcb_config->tc_config[j];
3429                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3430                                                 (uint8_t)(1 << j);
3431         }
3432 }
3433
3434 static void
3435 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3436                 struct ixgbe_dcb_config *dcb_config)
3437 {
3438         struct rte_eth_dcb_rx_conf *rx_conf =
3439                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3440         struct ixgbe_dcb_tc_config *tc;
3441         uint8_t i, j;
3442
3443         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3444         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3445
3446         /* User Priority to Traffic Class mapping */
3447         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3448                 j = rx_conf->dcb_tc[i];
3449                 tc = &dcb_config->tc_config[j];
3450                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3451                                                 (uint8_t)(1 << j);
3452         }
3453 }
3454
3455 static void
3456 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3457                 struct ixgbe_dcb_config *dcb_config)
3458 {
3459         struct rte_eth_dcb_tx_conf *tx_conf =
3460                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3461         struct ixgbe_dcb_tc_config *tc;
3462         uint8_t i, j;
3463
3464         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3465         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3466
3467         /* User Priority to Traffic Class mapping */
3468         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3469                 j = tx_conf->dcb_tc[i];
3470                 tc = &dcb_config->tc_config[j];
3471                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3472                                                 (uint8_t)(1 << j);
3473         }
3474 }
3475
3476 /**
3477  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3478  * @dev: pointer to eth_dev structure
3479  * @dcb_config: pointer to ixgbe_dcb_config structure
3480  */
3481 static void
3482 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3483                        struct ixgbe_dcb_config *dcb_config)
3484 {
3485         uint32_t reg;
3486         uint32_t vlanctrl;
3487         uint8_t i;
3488         uint32_t q;
3489         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3490
3491         PMD_INIT_FUNC_TRACE();
3492         /*
3493          * Disable the arbiter before changing parameters
3494          * (always enable recycle mode; WSP)
3495          */
3496         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3497         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3498
3499         if (hw->mac.type != ixgbe_mac_82598EB) {
3500                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3501                 if (dcb_config->num_tcs.pg_tcs == 4) {
3502                         if (dcb_config->vt_mode)
3503                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3504                                         IXGBE_MRQC_VMDQRT4TCEN;
3505                         else {
3506                                 /* no matter the mode is DCB or DCB_RSS, just
3507                                  * set the MRQE to RSSXTCEN. RSS is controlled
3508                                  * by RSS_FIELD
3509                                  */
3510                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3511                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3512                                         IXGBE_MRQC_RTRSS4TCEN;
3513                         }
3514                 }
3515                 if (dcb_config->num_tcs.pg_tcs == 8) {
3516                         if (dcb_config->vt_mode)
3517                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3518                                         IXGBE_MRQC_VMDQRT8TCEN;
3519                         else {
3520                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3521                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3522                                         IXGBE_MRQC_RTRSS8TCEN;
3523                         }
3524                 }
3525
3526                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3527
3528                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3529                         /* Disable drop for all queues in VMDQ mode*/
3530                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3531                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3532                                                 (IXGBE_QDE_WRITE |
3533                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3534                 } else {
3535                         /* Enable drop for all queues in SRIOV mode */
3536                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3537                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3538                                                 (IXGBE_QDE_WRITE |
3539                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3540                                                  IXGBE_QDE_ENABLE));
3541                 }
3542         }
3543
3544         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3545         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3546         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3547         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3548
3549         /* VFTA - enable all vlan filters */
3550         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3551                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3552         }
3553
3554         /*
3555          * Configure Rx packet plane (recycle mode; WSP) and
3556          * enable arbiter
3557          */
3558         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3559         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3560 }
3561
3562 static void
3563 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3564                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3565 {
3566         switch (hw->mac.type) {
3567         case ixgbe_mac_82598EB:
3568                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3569                 break;
3570         case ixgbe_mac_82599EB:
3571         case ixgbe_mac_X540:
3572         case ixgbe_mac_X550:
3573         case ixgbe_mac_X550EM_x:
3574         case ixgbe_mac_X550EM_a:
3575                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3576                                                   tsa, map);
3577                 break;
3578         default:
3579                 break;
3580         }
3581 }
3582
3583 static void
3584 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3585                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3586 {
3587         switch (hw->mac.type) {
3588         case ixgbe_mac_82598EB:
3589                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3590                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3591                 break;
3592         case ixgbe_mac_82599EB:
3593         case ixgbe_mac_X540:
3594         case ixgbe_mac_X550:
3595         case ixgbe_mac_X550EM_x:
3596         case ixgbe_mac_X550EM_a:
3597                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3598                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3599                 break;
3600         default:
3601                 break;
3602         }
3603 }
3604
3605 #define DCB_RX_CONFIG  1
3606 #define DCB_TX_CONFIG  1
3607 #define DCB_TX_PB      1024
3608 /**
3609  * ixgbe_dcb_hw_configure - Enable DCB and configure
3610  * general DCB in VT mode and non-VT mode parameters
3611  * @dev: pointer to rte_eth_dev structure
3612  * @dcb_config: pointer to ixgbe_dcb_config structure
3613  */
3614 static int
3615 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3616                         struct ixgbe_dcb_config *dcb_config)
3617 {
3618         int     ret = 0;
3619         uint8_t i, pfc_en, nb_tcs;
3620         uint16_t pbsize, rx_buffer_size;
3621         uint8_t config_dcb_rx = 0;
3622         uint8_t config_dcb_tx = 0;
3623         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3624         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3625         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3626         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3627         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3628         struct ixgbe_dcb_tc_config *tc;
3629         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3630         struct ixgbe_hw *hw =
3631                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3632
3633         switch (dev->data->dev_conf.rxmode.mq_mode) {
3634         case ETH_MQ_RX_VMDQ_DCB:
3635                 dcb_config->vt_mode = true;
3636                 if (hw->mac.type != ixgbe_mac_82598EB) {
3637                         config_dcb_rx = DCB_RX_CONFIG;
3638                         /*
3639                          *get dcb and VT rx configuration parameters
3640                          *from rte_eth_conf
3641                          */
3642                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3643                         /*Configure general VMDQ and DCB RX parameters*/
3644                         ixgbe_vmdq_dcb_configure(dev);
3645                 }
3646                 break;
3647         case ETH_MQ_RX_DCB:
3648         case ETH_MQ_RX_DCB_RSS:
3649                 dcb_config->vt_mode = false;
3650                 config_dcb_rx = DCB_RX_CONFIG;
3651                 /* Get dcb TX configuration parameters from rte_eth_conf */
3652                 ixgbe_dcb_rx_config(dev, dcb_config);
3653                 /*Configure general DCB RX parameters*/
3654                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3655                 break;
3656         default:
3657                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3658                 break;
3659         }
3660         switch (dev->data->dev_conf.txmode.mq_mode) {
3661         case ETH_MQ_TX_VMDQ_DCB:
3662                 dcb_config->vt_mode = true;
3663                 config_dcb_tx = DCB_TX_CONFIG;
3664                 /* get DCB and VT TX configuration parameters
3665                  * from rte_eth_conf
3666                  */
3667                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3668                 /*Configure general VMDQ and DCB TX parameters*/
3669                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3670                 break;
3671
3672         case ETH_MQ_TX_DCB:
3673                 dcb_config->vt_mode = false;
3674                 config_dcb_tx = DCB_TX_CONFIG;
3675                 /*get DCB TX configuration parameters from rte_eth_conf*/
3676                 ixgbe_dcb_tx_config(dev, dcb_config);
3677                 /*Configure general DCB TX parameters*/
3678                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3679                 break;
3680         default:
3681                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3682                 break;
3683         }
3684
3685         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3686         /* Unpack map */
3687         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3688         if (nb_tcs == ETH_4_TCS) {
3689                 /* Avoid un-configured priority mapping to TC0 */
3690                 uint8_t j = 4;
3691                 uint8_t mask = 0xFF;
3692
3693                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3694                         mask = (uint8_t)(mask & (~(1 << map[i])));
3695                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3696                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3697                                 map[j++] = i;
3698                         mask >>= 1;
3699                 }
3700                 /* Re-configure 4 TCs BW */
3701                 for (i = 0; i < nb_tcs; i++) {
3702                         tc = &dcb_config->tc_config[i];
3703                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3704                                                 (uint8_t)(100 / nb_tcs);
3705                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3706                                                 (uint8_t)(100 / nb_tcs);
3707                 }
3708                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3709                         tc = &dcb_config->tc_config[i];
3710                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3711                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3712                 }
3713         } else {
3714                 /* Re-configure 8 TCs BW */
3715                 for (i = 0; i < nb_tcs; i++) {
3716                         tc = &dcb_config->tc_config[i];
3717                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3718                                 (uint8_t)(100 / nb_tcs + (i & 1));
3719                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3720                                 (uint8_t)(100 / nb_tcs + (i & 1));
3721                 }
3722         }
3723
3724         switch (hw->mac.type) {
3725         case ixgbe_mac_X550:
3726         case ixgbe_mac_X550EM_x:
3727         case ixgbe_mac_X550EM_a:
3728                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3729                 break;
3730         default:
3731                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3732                 break;
3733         }
3734
3735         if (config_dcb_rx) {
3736                 /* Set RX buffer size */
3737                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3738                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3739
3740                 for (i = 0; i < nb_tcs; i++) {
3741                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3742                 }
3743                 /* zero alloc all unused TCs */
3744                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3745                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3746                 }
3747         }
3748         if (config_dcb_tx) {
3749                 /* Only support an equally distributed
3750                  *  Tx packet buffer strategy.
3751                  */
3752                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3753                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3754
3755                 for (i = 0; i < nb_tcs; i++) {
3756                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3757                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3758                 }
3759                 /* Clear unused TCs, if any, to zero buffer size*/
3760                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3761                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3762                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3763                 }
3764         }
3765
3766         /*Calculates traffic class credits*/
3767         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3768                                 IXGBE_DCB_TX_CONFIG);
3769         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3770                                 IXGBE_DCB_RX_CONFIG);
3771
3772         if (config_dcb_rx) {
3773                 /* Unpack CEE standard containers */
3774                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3775                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3776                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3777                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3778                 /* Configure PG(ETS) RX */
3779                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3780         }
3781
3782         if (config_dcb_tx) {
3783                 /* Unpack CEE standard containers */
3784                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3785                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3786                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3787                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3788                 /* Configure PG(ETS) TX */
3789                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3790         }
3791
3792         /*Configure queue statistics registers*/
3793         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3794
3795         /* Check if the PFC is supported */
3796         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3797                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3798                 for (i = 0; i < nb_tcs; i++) {
3799                         /*
3800                         * If the TC count is 8,and the default high_water is 48,
3801                         * the low_water is 16 as default.
3802                         */
3803                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3804                         hw->fc.low_water[i] = pbsize / 4;
3805                         /* Enable pfc for this TC */
3806                         tc = &dcb_config->tc_config[i];
3807                         tc->pfc = ixgbe_dcb_pfc_enabled;
3808                 }
3809                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3810                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3811                         pfc_en &= 0x0F;
3812                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3813         }
3814
3815         return ret;
3816 }
3817
3818 /**
3819  * ixgbe_configure_dcb - Configure DCB  Hardware
3820  * @dev: pointer to rte_eth_dev
3821  */
3822 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3823 {
3824         struct ixgbe_dcb_config *dcb_cfg =
3825                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3826         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3827
3828         PMD_INIT_FUNC_TRACE();
3829
3830         /* check support mq_mode for DCB */
3831         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3832             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3833             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3834                 return;
3835
3836         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3837                 return;
3838
3839         /** Configure DCB hardware **/
3840         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3841 }
3842
3843 /*
3844  * VMDq only support for 10 GbE NIC.
3845  */
3846 static void
3847 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3848 {
3849         struct rte_eth_vmdq_rx_conf *cfg;
3850         struct ixgbe_hw *hw;
3851         enum rte_eth_nb_pools num_pools;
3852         uint32_t mrqc, vt_ctl, vlanctrl;
3853         uint32_t vmolr = 0;
3854         int i;
3855
3856         PMD_INIT_FUNC_TRACE();
3857         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3858         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3859         num_pools = cfg->nb_queue_pools;
3860
3861         ixgbe_rss_disable(dev);
3862
3863         /* MRQC: enable vmdq */
3864         mrqc = IXGBE_MRQC_VMDQEN;
3865         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3866
3867         /* PFVTCTL: turn on virtualisation and set the default pool */
3868         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3869         if (cfg->enable_default_pool)
3870                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3871         else
3872                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3873
3874         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3875
3876         for (i = 0; i < (int)num_pools; i++) {
3877                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3878                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3879         }
3880
3881         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3882         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3883         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3884         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3885
3886         /* VFTA - enable all vlan filters */
3887         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3888                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3889
3890         /* VFRE: pool enabling for receive - 64 */
3891         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3892         if (num_pools == ETH_64_POOLS)
3893                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3894
3895         /*
3896          * MPSAR - allow pools to read specific mac addresses
3897          * In this case, all pools should be able to read from mac addr 0
3898          */
3899         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3900         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3901
3902         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3903         for (i = 0; i < cfg->nb_pool_maps; i++) {
3904                 /* set vlan id in VF register and set the valid bit */
3905                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3906                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3907                 /*
3908                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3909                  * pools, we only need to use the first half of the register
3910                  * i.e. bits 0-31
3911                  */
3912                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3913                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3914                                         (cfg->pool_map[i].pools & UINT32_MAX));
3915                 else
3916                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3917                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3918
3919         }
3920
3921         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3922         if (cfg->enable_loop_back) {
3923                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3924                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3925                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3926         }
3927
3928         IXGBE_WRITE_FLUSH(hw);
3929 }
3930
3931 /*
3932  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3933  * @hw: pointer to hardware structure
3934  */
3935 static void
3936 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3937 {
3938         uint32_t reg;
3939         uint32_t q;
3940
3941         PMD_INIT_FUNC_TRACE();
3942         /*PF VF Transmit Enable*/
3943         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3944         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3945
3946         /* Disable the Tx desc arbiter so that MTQC can be changed */
3947         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3948         reg |= IXGBE_RTTDCS_ARBDIS;
3949         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3950
3951         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3952         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3953
3954         /* Disable drop for all queues */
3955         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3956                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3957                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3958
3959         /* Enable the Tx desc arbiter */
3960         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3961         reg &= ~IXGBE_RTTDCS_ARBDIS;
3962         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3963
3964         IXGBE_WRITE_FLUSH(hw);
3965 }
3966
3967 static int __attribute__((cold))
3968 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3969 {
3970         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3971         uint64_t dma_addr;
3972         unsigned int i;
3973
3974         /* Initialize software ring entries */
3975         for (i = 0; i < rxq->nb_rx_desc; i++) {
3976                 volatile union ixgbe_adv_rx_desc *rxd;
3977                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
3978
3979                 if (mbuf == NULL) {
3980                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3981                                      (unsigned) rxq->queue_id);
3982                         return -ENOMEM;
3983                 }
3984
3985                 rte_mbuf_refcnt_set(mbuf, 1);
3986                 mbuf->next = NULL;
3987                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3988                 mbuf->nb_segs = 1;
3989                 mbuf->port = rxq->port_id;
3990
3991                 dma_addr =
3992                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
3993                 rxd = &rxq->rx_ring[i];
3994                 rxd->read.hdr_addr = 0;
3995                 rxd->read.pkt_addr = dma_addr;
3996                 rxe[i].mbuf = mbuf;
3997         }
3998
3999         return 0;
4000 }
4001
4002 static int
4003 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4004 {
4005         struct ixgbe_hw *hw;
4006         uint32_t mrqc;
4007
4008         ixgbe_rss_configure(dev);
4009
4010         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4011
4012         /* MRQC: enable VF RSS */
4013         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4014         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4015         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4016         case ETH_64_POOLS:
4017                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4018                 break;
4019
4020         case ETH_32_POOLS:
4021                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4022                 break;
4023
4024         default:
4025                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4026                 return -EINVAL;
4027         }
4028
4029         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4030
4031         return 0;
4032 }
4033
4034 static int
4035 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4036 {
4037         struct ixgbe_hw *hw =
4038                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4039
4040         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4041         case ETH_64_POOLS:
4042                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4043                         IXGBE_MRQC_VMDQEN);
4044                 break;
4045
4046         case ETH_32_POOLS:
4047                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4048                         IXGBE_MRQC_VMDQRT4TCEN);
4049                 break;
4050
4051         case ETH_16_POOLS:
4052                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4053                         IXGBE_MRQC_VMDQRT8TCEN);
4054                 break;
4055         default:
4056                 PMD_INIT_LOG(ERR,
4057                         "invalid pool number in IOV mode");
4058                 break;
4059         }
4060         return 0;
4061 }
4062
4063 static int
4064 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4065 {
4066         struct ixgbe_hw *hw =
4067                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4068
4069         if (hw->mac.type == ixgbe_mac_82598EB)
4070                 return 0;
4071
4072         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4073                 /*
4074                  * SRIOV inactive scheme
4075                  * any DCB/RSS w/o VMDq multi-queue setting
4076                  */
4077                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4078                 case ETH_MQ_RX_RSS:
4079                 case ETH_MQ_RX_DCB_RSS:
4080                 case ETH_MQ_RX_VMDQ_RSS:
4081                         ixgbe_rss_configure(dev);
4082                         break;
4083
4084                 case ETH_MQ_RX_VMDQ_DCB:
4085                         ixgbe_vmdq_dcb_configure(dev);
4086                         break;
4087
4088                 case ETH_MQ_RX_VMDQ_ONLY:
4089                         ixgbe_vmdq_rx_hw_configure(dev);
4090                         break;
4091
4092                 case ETH_MQ_RX_NONE:
4093                 default:
4094                         /* if mq_mode is none, disable rss mode.*/
4095                         ixgbe_rss_disable(dev);
4096                         break;
4097                 }
4098         } else {
4099                 /* SRIOV active scheme
4100                  * Support RSS together with SRIOV.
4101                  */
4102                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4103                 case ETH_MQ_RX_RSS:
4104                 case ETH_MQ_RX_VMDQ_RSS:
4105                         ixgbe_config_vf_rss(dev);
4106                         break;
4107                 case ETH_MQ_RX_VMDQ_DCB:
4108                 case ETH_MQ_RX_DCB:
4109                 /* In SRIOV, the configuration is the same as VMDq case */
4110                         ixgbe_vmdq_dcb_configure(dev);
4111                         break;
4112                 /* DCB/RSS together with SRIOV is not supported */
4113                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4114                 case ETH_MQ_RX_DCB_RSS:
4115                         PMD_INIT_LOG(ERR,
4116                                 "Could not support DCB/RSS with VMDq & SRIOV");
4117                         return -1;
4118                 default:
4119                         ixgbe_config_vf_default(dev);
4120                         break;
4121                 }
4122         }
4123
4124         return 0;
4125 }
4126
4127 static int
4128 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4129 {
4130         struct ixgbe_hw *hw =
4131                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4132         uint32_t mtqc;
4133         uint32_t rttdcs;
4134
4135         if (hw->mac.type == ixgbe_mac_82598EB)
4136                 return 0;
4137
4138         /* disable arbiter before setting MTQC */
4139         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4140         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4141         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4142
4143         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4144                 /*
4145                  * SRIOV inactive scheme
4146                  * any DCB w/o VMDq multi-queue setting
4147                  */
4148                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4149                         ixgbe_vmdq_tx_hw_configure(hw);
4150                 else {
4151                         mtqc = IXGBE_MTQC_64Q_1PB;
4152                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4153                 }
4154         } else {
4155                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4156
4157                 /*
4158                  * SRIOV active scheme
4159                  * FIXME if support DCB together with VMDq & SRIOV
4160                  */
4161                 case ETH_64_POOLS:
4162                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4163                         break;
4164                 case ETH_32_POOLS:
4165                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4166                         break;
4167                 case ETH_16_POOLS:
4168                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4169                                 IXGBE_MTQC_8TC_8TQ;
4170                         break;
4171                 default:
4172                         mtqc = IXGBE_MTQC_64Q_1PB;
4173                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4174                 }
4175                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4176         }
4177
4178         /* re-enable arbiter */
4179         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4180         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4181
4182         return 0;
4183 }
4184
4185 /**
4186  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4187  *
4188  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4189  * spec rev. 3.0 chapter 8.2.3.8.13.
4190  *
4191  * @pool Memory pool of the Rx queue
4192  */
4193 static inline uint32_t
4194 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4195 {
4196         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4197
4198         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4199         uint16_t maxdesc =
4200                 IPV4_MAX_PKT_LEN /
4201                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4202
4203         if (maxdesc >= 16)
4204                 return IXGBE_RSCCTL_MAXDESC_16;
4205         else if (maxdesc >= 8)
4206                 return IXGBE_RSCCTL_MAXDESC_8;
4207         else if (maxdesc >= 4)
4208                 return IXGBE_RSCCTL_MAXDESC_4;
4209         else
4210                 return IXGBE_RSCCTL_MAXDESC_1;
4211 }
4212
4213 /**
4214  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4215  * interrupt
4216  *
4217  * (Taken from FreeBSD tree)
4218  * (yes this is all very magic and confusing :)
4219  *
4220  * @dev port handle
4221  * @entry the register array entry
4222  * @vector the MSIX vector for this queue
4223  * @type RX/TX/MISC
4224  */
4225 static void
4226 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4227 {
4228         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4229         u32 ivar, index;
4230
4231         vector |= IXGBE_IVAR_ALLOC_VAL;
4232
4233         switch (hw->mac.type) {
4234
4235         case ixgbe_mac_82598EB:
4236                 if (type == -1)
4237                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4238                 else
4239                         entry += (type * 64);
4240                 index = (entry >> 2) & 0x1F;
4241                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4242                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4243                 ivar |= (vector << (8 * (entry & 0x3)));
4244                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4245                 break;
4246
4247         case ixgbe_mac_82599EB:
4248         case ixgbe_mac_X540:
4249                 if (type == -1) { /* MISC IVAR */
4250                         index = (entry & 1) * 8;
4251                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4252                         ivar &= ~(0xFF << index);
4253                         ivar |= (vector << index);
4254                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4255                 } else {        /* RX/TX IVARS */
4256                         index = (16 * (entry & 1)) + (8 * type);
4257                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4258                         ivar &= ~(0xFF << index);
4259                         ivar |= (vector << index);
4260                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4261                 }
4262
4263                 break;
4264
4265         default:
4266                 break;
4267         }
4268 }
4269
4270 void __attribute__((cold))
4271 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4272 {
4273         uint16_t i, rx_using_sse;
4274         struct ixgbe_adapter *adapter =
4275                 (struct ixgbe_adapter *)dev->data->dev_private;
4276
4277         /*
4278          * In order to allow Vector Rx there are a few configuration
4279          * conditions to be met and Rx Bulk Allocation should be allowed.
4280          */
4281         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4282             !adapter->rx_bulk_alloc_allowed) {
4283                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4284                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4285                                     "not enabled",
4286                              dev->data->port_id);
4287
4288                 adapter->rx_vec_allowed = false;
4289         }
4290
4291         /*
4292          * Initialize the appropriate LRO callback.
4293          *
4294          * If all queues satisfy the bulk allocation preconditions
4295          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4296          * Otherwise use a single allocation version.
4297          */
4298         if (dev->data->lro) {
4299                 if (adapter->rx_bulk_alloc_allowed) {
4300                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4301                                            "allocation version");
4302                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4303                 } else {
4304                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4305                                            "allocation version");
4306                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4307                 }
4308         } else if (dev->data->scattered_rx) {
4309                 /*
4310                  * Set the non-LRO scattered callback: there are Vector and
4311                  * single allocation versions.
4312                  */
4313                 if (adapter->rx_vec_allowed) {
4314                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4315                                             "callback (port=%d).",
4316                                      dev->data->port_id);
4317
4318                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4319                 } else if (adapter->rx_bulk_alloc_allowed) {
4320                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4321                                            "allocation callback (port=%d).",
4322                                      dev->data->port_id);
4323                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4324                 } else {
4325                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4326                                             "single allocation) "
4327                                             "Scattered Rx callback "
4328                                             "(port=%d).",
4329                                      dev->data->port_id);
4330
4331                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4332                 }
4333         /*
4334          * Below we set "simple" callbacks according to port/queues parameters.
4335          * If parameters allow we are going to choose between the following
4336          * callbacks:
4337          *    - Vector
4338          *    - Bulk Allocation
4339          *    - Single buffer allocation (the simplest one)
4340          */
4341         } else if (adapter->rx_vec_allowed) {
4342                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4343                                     "burst size no less than %d (port=%d).",
4344                              RTE_IXGBE_DESCS_PER_LOOP,
4345                              dev->data->port_id);
4346
4347                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4348         } else if (adapter->rx_bulk_alloc_allowed) {
4349                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4350                                     "satisfied. Rx Burst Bulk Alloc function "
4351                                     "will be used on port=%d.",
4352                              dev->data->port_id);
4353
4354                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4355         } else {
4356                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4357                                     "satisfied, or Scattered Rx is requested "
4358                                     "(port=%d).",
4359                              dev->data->port_id);
4360
4361                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4362         }
4363
4364         /* Propagate information about RX function choice through all queues. */
4365
4366         rx_using_sse =
4367                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4368                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4369
4370         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4371                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4372
4373                 rxq->rx_using_sse = rx_using_sse;
4374         }
4375 }
4376
4377 /**
4378  * ixgbe_set_rsc - configure RSC related port HW registers
4379  *
4380  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4381  * of 82599 Spec (x540 configuration is virtually the same).
4382  *
4383  * @dev port handle
4384  *
4385  * Returns 0 in case of success or a non-zero error code
4386  */
4387 static int
4388 ixgbe_set_rsc(struct rte_eth_dev *dev)
4389 {
4390         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4391         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4392         struct rte_eth_dev_info dev_info = { 0 };
4393         bool rsc_capable = false;
4394         uint16_t i;
4395         uint32_t rdrxctl;
4396
4397         /* Sanity check */
4398         dev->dev_ops->dev_infos_get(dev, &dev_info);
4399         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4400                 rsc_capable = true;
4401
4402         if (!rsc_capable && rx_conf->enable_lro) {
4403                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4404                                    "support it");
4405                 return -EINVAL;
4406         }
4407
4408         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4409
4410         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4411                 /*
4412                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4413                  * 3.0 RSC configuration requires HW CRC stripping being
4414                  * enabled. If user requested both HW CRC stripping off
4415                  * and RSC on - return an error.
4416                  */
4417                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4418                                     "is disabled");
4419                 return -EINVAL;
4420         }
4421
4422         /* RFCTL configuration  */
4423         if (rsc_capable) {
4424                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4425
4426                 if (rx_conf->enable_lro)
4427                         /*
4428                          * Since NFS packets coalescing is not supported - clear
4429                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4430                          * enabled.
4431                          */
4432                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4433                                    IXGBE_RFCTL_NFSR_DIS);
4434                 else
4435                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4436
4437                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4438         }
4439
4440         /* If LRO hasn't been requested - we are done here. */
4441         if (!rx_conf->enable_lro)
4442                 return 0;
4443
4444         /* Set RDRXCTL.RSCACKC bit */
4445         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4446         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4447         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4448
4449         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4450         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4451                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4452                 uint32_t srrctl =
4453                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4454                 uint32_t rscctl =
4455                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4456                 uint32_t psrtype =
4457                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4458                 uint32_t eitr =
4459                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4460
4461                 /*
4462                  * ixgbe PMD doesn't support header-split at the moment.
4463                  *
4464                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4465                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4466                  * should be configured even if header split is not
4467                  * enabled. We will configure it 128 bytes following the
4468                  * recommendation in the spec.
4469                  */
4470                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4471                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4472                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4473
4474                 /*
4475                  * TODO: Consider setting the Receive Descriptor Minimum
4476                  * Threshold Size for an RSC case. This is not an obviously
4477                  * beneficiary option but the one worth considering...
4478                  */
4479
4480                 rscctl |= IXGBE_RSCCTL_RSCEN;
4481                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4482                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4483
4484                 /*
4485                  * RSC: Set ITR interval corresponding to 2K ints/s.
4486                  *
4487                  * Full-sized RSC aggregations for a 10Gb/s link will
4488                  * arrive at about 20K aggregation/s rate.
4489                  *
4490                  * 2K inst/s rate will make only 10% of the
4491                  * aggregations to be closed due to the interrupt timer
4492                  * expiration for a streaming at wire-speed case.
4493                  *
4494                  * For a sparse streaming case this setting will yield
4495                  * at most 500us latency for a single RSC aggregation.
4496                  */
4497                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4498                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4499
4500                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4501                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4502                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4503                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4504
4505                 /*
4506                  * RSC requires the mapping of the queue to the
4507                  * interrupt vector.
4508                  */
4509                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4510         }
4511
4512         dev->data->lro = 1;
4513
4514         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4515
4516         return 0;
4517 }
4518
4519 /*
4520  * Initializes Receive Unit.
4521  */
4522 int __attribute__((cold))
4523 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4524 {
4525         struct ixgbe_hw     *hw;
4526         struct ixgbe_rx_queue *rxq;
4527         uint64_t bus_addr;
4528         uint32_t rxctrl;
4529         uint32_t fctrl;
4530         uint32_t hlreg0;
4531         uint32_t maxfrs;
4532         uint32_t srrctl;
4533         uint32_t rdrxctl;
4534         uint32_t rxcsum;
4535         uint16_t buf_size;
4536         uint16_t i;
4537         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4538         int rc;
4539
4540         PMD_INIT_FUNC_TRACE();
4541         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4542
4543         /*
4544          * Make sure receives are disabled while setting
4545          * up the RX context (registers, descriptor rings, etc.).
4546          */
4547         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4548         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4549
4550         /* Enable receipt of broadcasted frames */
4551         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4552         fctrl |= IXGBE_FCTRL_BAM;
4553         fctrl |= IXGBE_FCTRL_DPF;
4554         fctrl |= IXGBE_FCTRL_PMCF;
4555         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4556
4557         /*
4558          * Configure CRC stripping, if any.
4559          */
4560         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4561         if (rx_conf->hw_strip_crc)
4562                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4563         else
4564                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4565
4566         /*
4567          * Configure jumbo frame support, if any.
4568          */
4569         if (rx_conf->jumbo_frame == 1) {
4570                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4571                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4572                 maxfrs &= 0x0000FFFF;
4573                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4574                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4575         } else
4576                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4577
4578         /*
4579          * If loopback mode is configured for 82599, set LPBK bit.
4580          */
4581         if (hw->mac.type == ixgbe_mac_82599EB &&
4582                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4583                 hlreg0 |= IXGBE_HLREG0_LPBK;
4584         else
4585                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4586
4587         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4588
4589         /* Setup RX queues */
4590         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4591                 rxq = dev->data->rx_queues[i];
4592
4593                 /*
4594                  * Reset crc_len in case it was changed after queue setup by a
4595                  * call to configure.
4596                  */
4597                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4598
4599                 /* Setup the Base and Length of the Rx Descriptor Rings */
4600                 bus_addr = rxq->rx_ring_phys_addr;
4601                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4602                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4603                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4604                                 (uint32_t)(bus_addr >> 32));
4605                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4606                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4607                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4608                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4609
4610                 /* Configure the SRRCTL register */
4611 #ifdef RTE_HEADER_SPLIT_ENABLE
4612                 /*
4613                  * Configure Header Split
4614                  */
4615                 if (rx_conf->header_split) {
4616                         if (hw->mac.type == ixgbe_mac_82599EB) {
4617                                 /* Must setup the PSRTYPE register */
4618                                 uint32_t psrtype;
4619
4620                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4621                                         IXGBE_PSRTYPE_UDPHDR   |
4622                                         IXGBE_PSRTYPE_IPV4HDR  |
4623                                         IXGBE_PSRTYPE_IPV6HDR;
4624                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4625                         }
4626                         srrctl = ((rx_conf->split_hdr_size <<
4627                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4628                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4629                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4630                 } else
4631 #endif
4632                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4633
4634                 /* Set if packets are dropped when no descriptors available */
4635                 if (rxq->drop_en)
4636                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4637
4638                 /*
4639                  * Configure the RX buffer size in the BSIZEPACKET field of
4640                  * the SRRCTL register of the queue.
4641                  * The value is in 1 KB resolution. Valid values can be from
4642                  * 1 KB to 16 KB.
4643                  */
4644                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4645                         RTE_PKTMBUF_HEADROOM);
4646                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4647                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4648
4649                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4650
4651                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4652                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4653
4654                 /* It adds dual VLAN length for supporting dual VLAN */
4655                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4656                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4657                         dev->data->scattered_rx = 1;
4658         }
4659
4660         if (rx_conf->enable_scatter)
4661                 dev->data->scattered_rx = 1;
4662
4663         /*
4664          * Device configured with multiple RX queues.
4665          */
4666         ixgbe_dev_mq_rx_configure(dev);
4667
4668         /*
4669          * Setup the Checksum Register.
4670          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4671          * Enable IP/L4 checkum computation by hardware if requested to do so.
4672          */
4673         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4674         rxcsum |= IXGBE_RXCSUM_PCSD;
4675         if (rx_conf->hw_ip_checksum)
4676                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4677         else
4678                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4679
4680         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4681
4682         if (hw->mac.type == ixgbe_mac_82599EB ||
4683             hw->mac.type == ixgbe_mac_X540) {
4684                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4685                 if (rx_conf->hw_strip_crc)
4686                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4687                 else
4688                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4689                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4690                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4691         }
4692
4693         rc = ixgbe_set_rsc(dev);
4694         if (rc)
4695                 return rc;
4696
4697         ixgbe_set_rx_function(dev);
4698
4699         return 0;
4700 }
4701
4702 /*
4703  * Initializes Transmit Unit.
4704  */
4705 void __attribute__((cold))
4706 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4707 {
4708         struct ixgbe_hw     *hw;
4709         struct ixgbe_tx_queue *txq;
4710         uint64_t bus_addr;
4711         uint32_t hlreg0;
4712         uint32_t txctrl;
4713         uint16_t i;
4714
4715         PMD_INIT_FUNC_TRACE();
4716         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4717
4718         /* Enable TX CRC (checksum offload requirement) and hw padding
4719          * (TSO requirement)
4720          */
4721         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4722         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4723         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4724
4725         /* Setup the Base and Length of the Tx Descriptor Rings */
4726         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4727                 txq = dev->data->tx_queues[i];
4728
4729                 bus_addr = txq->tx_ring_phys_addr;
4730                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4731                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4732                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4733                                 (uint32_t)(bus_addr >> 32));
4734                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4735                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4736                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4737                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4738                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4739
4740                 /*
4741                  * Disable Tx Head Writeback RO bit, since this hoses
4742                  * bookkeeping if things aren't delivered in order.
4743                  */
4744                 switch (hw->mac.type) {
4745                 case ixgbe_mac_82598EB:
4746                         txctrl = IXGBE_READ_REG(hw,
4747                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4748                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4749                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4750                                         txctrl);
4751                         break;
4752
4753                 case ixgbe_mac_82599EB:
4754                 case ixgbe_mac_X540:
4755                 case ixgbe_mac_X550:
4756                 case ixgbe_mac_X550EM_x:
4757                 case ixgbe_mac_X550EM_a:
4758                 default:
4759                         txctrl = IXGBE_READ_REG(hw,
4760                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4761                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4762                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4763                                         txctrl);
4764                         break;
4765                 }
4766         }
4767
4768         /* Device configured with multiple TX queues. */
4769         ixgbe_dev_mq_tx_configure(dev);
4770 }
4771
4772 /*
4773  * Set up link for 82599 loopback mode Tx->Rx.
4774  */
4775 static inline void __attribute__((cold))
4776 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4777 {
4778         PMD_INIT_FUNC_TRACE();
4779
4780         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4781                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4782                                 IXGBE_SUCCESS) {
4783                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4784                         /* ignore error */
4785                         return;
4786                 }
4787         }
4788
4789         /* Restart link */
4790         IXGBE_WRITE_REG(hw,
4791                         IXGBE_AUTOC,
4792                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4793         ixgbe_reset_pipeline_82599(hw);
4794
4795         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4796         msec_delay(50);
4797 }
4798
4799
4800 /*
4801  * Start Transmit and Receive Units.
4802  */
4803 int __attribute__((cold))
4804 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4805 {
4806         struct ixgbe_hw     *hw;
4807         struct ixgbe_tx_queue *txq;
4808         struct ixgbe_rx_queue *rxq;
4809         uint32_t txdctl;
4810         uint32_t dmatxctl;
4811         uint32_t rxctrl;
4812         uint16_t i;
4813         int ret = 0;
4814
4815         PMD_INIT_FUNC_TRACE();
4816         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4817
4818         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4819                 txq = dev->data->tx_queues[i];
4820                 /* Setup Transmit Threshold Registers */
4821                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4822                 txdctl |= txq->pthresh & 0x7F;
4823                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4824                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4825                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4826         }
4827
4828         if (hw->mac.type != ixgbe_mac_82598EB) {
4829                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4830                 dmatxctl |= IXGBE_DMATXCTL_TE;
4831                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4832         }
4833
4834         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4835                 txq = dev->data->tx_queues[i];
4836                 if (!txq->tx_deferred_start) {
4837                         ret = ixgbe_dev_tx_queue_start(dev, i);
4838                         if (ret < 0)
4839                                 return ret;
4840                 }
4841         }
4842
4843         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4844                 rxq = dev->data->rx_queues[i];
4845                 if (!rxq->rx_deferred_start) {
4846                         ret = ixgbe_dev_rx_queue_start(dev, i);
4847                         if (ret < 0)
4848                                 return ret;
4849                 }
4850         }
4851
4852         /* Enable Receive engine */
4853         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4854         if (hw->mac.type == ixgbe_mac_82598EB)
4855                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4856         rxctrl |= IXGBE_RXCTRL_RXEN;
4857         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4858
4859         /* If loopback mode is enabled for 82599, set up the link accordingly */
4860         if (hw->mac.type == ixgbe_mac_82599EB &&
4861                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4862                 ixgbe_setup_loopback_link_82599(hw);
4863
4864         return 0;
4865 }
4866
4867 /*
4868  * Start Receive Units for specified queue.
4869  */
4870 int __attribute__((cold))
4871 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4872 {
4873         struct ixgbe_hw     *hw;
4874         struct ixgbe_rx_queue *rxq;
4875         uint32_t rxdctl;
4876         int poll_ms;
4877
4878         PMD_INIT_FUNC_TRACE();
4879         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4880
4881         if (rx_queue_id < dev->data->nb_rx_queues) {
4882                 rxq = dev->data->rx_queues[rx_queue_id];
4883
4884                 /* Allocate buffers for descriptor rings */
4885                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4886                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4887                                      rx_queue_id);
4888                         return -1;
4889                 }
4890                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4891                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4892                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4893
4894                 /* Wait until RX Enable ready */
4895                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4896                 do {
4897                         rte_delay_ms(1);
4898                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4899                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4900                 if (!poll_ms)
4901                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4902                                      rx_queue_id);
4903                 rte_wmb();
4904                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4905                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4906                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4907         } else
4908                 return -1;
4909
4910         return 0;
4911 }
4912
4913 /*
4914  * Stop Receive Units for specified queue.
4915  */
4916 int __attribute__((cold))
4917 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4918 {
4919         struct ixgbe_hw     *hw;
4920         struct ixgbe_adapter *adapter =
4921                 (struct ixgbe_adapter *)dev->data->dev_private;
4922         struct ixgbe_rx_queue *rxq;
4923         uint32_t rxdctl;
4924         int poll_ms;
4925
4926         PMD_INIT_FUNC_TRACE();
4927         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4928
4929         if (rx_queue_id < dev->data->nb_rx_queues) {
4930                 rxq = dev->data->rx_queues[rx_queue_id];
4931
4932                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4933                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4934                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4935
4936                 /* Wait until RX Enable bit clear */
4937                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4938                 do {
4939                         rte_delay_ms(1);
4940                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4941                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4942                 if (!poll_ms)
4943                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4944                                      rx_queue_id);
4945
4946                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4947
4948                 ixgbe_rx_queue_release_mbufs(rxq);
4949                 ixgbe_reset_rx_queue(adapter, rxq);
4950                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4951         } else
4952                 return -1;
4953
4954         return 0;
4955 }
4956
4957
4958 /*
4959  * Start Transmit Units for specified queue.
4960  */
4961 int __attribute__((cold))
4962 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4963 {
4964         struct ixgbe_hw     *hw;
4965         struct ixgbe_tx_queue *txq;
4966         uint32_t txdctl;
4967         int poll_ms;
4968
4969         PMD_INIT_FUNC_TRACE();
4970         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4971
4972         if (tx_queue_id < dev->data->nb_tx_queues) {
4973                 txq = dev->data->tx_queues[tx_queue_id];
4974                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4975                 txdctl |= IXGBE_TXDCTL_ENABLE;
4976                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4977
4978                 /* Wait until TX Enable ready */
4979                 if (hw->mac.type == ixgbe_mac_82599EB) {
4980                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4981                         do {
4982                                 rte_delay_ms(1);
4983                                 txdctl = IXGBE_READ_REG(hw,
4984                                         IXGBE_TXDCTL(txq->reg_idx));
4985                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4986                         if (!poll_ms)
4987                                 PMD_INIT_LOG(ERR, "Could not enable "
4988                                              "Tx Queue %d", tx_queue_id);
4989                 }
4990                 rte_wmb();
4991                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4992                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4993                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4994         } else
4995                 return -1;
4996
4997         return 0;
4998 }
4999
5000 /*
5001  * Stop Transmit Units for specified queue.
5002  */
5003 int __attribute__((cold))
5004 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5005 {
5006         struct ixgbe_hw     *hw;
5007         struct ixgbe_tx_queue *txq;
5008         uint32_t txdctl;
5009         uint32_t txtdh, txtdt;
5010         int poll_ms;
5011
5012         PMD_INIT_FUNC_TRACE();
5013         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5014
5015         if (tx_queue_id >= dev->data->nb_tx_queues)
5016                 return -1;
5017
5018         txq = dev->data->tx_queues[tx_queue_id];
5019
5020         /* Wait until TX queue is empty */
5021         if (hw->mac.type == ixgbe_mac_82599EB) {
5022                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5023                 do {
5024                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5025                         txtdh = IXGBE_READ_REG(hw,
5026                                                IXGBE_TDH(txq->reg_idx));
5027                         txtdt = IXGBE_READ_REG(hw,
5028                                                IXGBE_TDT(txq->reg_idx));
5029                 } while (--poll_ms && (txtdh != txtdt));
5030                 if (!poll_ms)
5031                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5032                                      "when stopping.", tx_queue_id);
5033         }
5034
5035         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5036         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5037         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5038
5039         /* Wait until TX Enable bit clear */
5040         if (hw->mac.type == ixgbe_mac_82599EB) {
5041                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5042                 do {
5043                         rte_delay_ms(1);
5044                         txdctl = IXGBE_READ_REG(hw,
5045                                                 IXGBE_TXDCTL(txq->reg_idx));
5046                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5047                 if (!poll_ms)
5048                         PMD_INIT_LOG(ERR, "Could not disable "
5049                                      "Tx Queue %d", tx_queue_id);
5050         }
5051
5052         if (txq->ops != NULL) {
5053                 txq->ops->release_mbufs(txq);
5054                 txq->ops->reset(txq);
5055         }
5056         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5057
5058         return 0;
5059 }
5060
5061 void
5062 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5063         struct rte_eth_rxq_info *qinfo)
5064 {
5065         struct ixgbe_rx_queue *rxq;
5066
5067         rxq = dev->data->rx_queues[queue_id];
5068
5069         qinfo->mp = rxq->mb_pool;
5070         qinfo->scattered_rx = dev->data->scattered_rx;
5071         qinfo->nb_desc = rxq->nb_rx_desc;
5072
5073         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5074         qinfo->conf.rx_drop_en = rxq->drop_en;
5075         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5076 }
5077
5078 void
5079 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5080         struct rte_eth_txq_info *qinfo)
5081 {
5082         struct ixgbe_tx_queue *txq;
5083
5084         txq = dev->data->tx_queues[queue_id];
5085
5086         qinfo->nb_desc = txq->nb_tx_desc;
5087
5088         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5089         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5090         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5091
5092         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5093         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5094         qinfo->conf.txq_flags = txq->txq_flags;
5095         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5096 }
5097
5098 /*
5099  * [VF] Initializes Receive Unit.
5100  */
5101 int __attribute__((cold))
5102 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5103 {
5104         struct ixgbe_hw     *hw;
5105         struct ixgbe_rx_queue *rxq;
5106         uint64_t bus_addr;
5107         uint32_t srrctl, psrtype = 0;
5108         uint16_t buf_size;
5109         uint16_t i;
5110         int ret;
5111
5112         PMD_INIT_FUNC_TRACE();
5113         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5114
5115         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5116                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5117                         "it should be power of 2");
5118                 return -1;
5119         }
5120
5121         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5122                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5123                         "it should be equal to or less than %d",
5124                         hw->mac.max_rx_queues);
5125                 return -1;
5126         }
5127
5128         /*
5129          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5130          * disables the VF receipt of packets if the PF MTU is > 1500.
5131          * This is done to deal with 82599 limitations that imposes
5132          * the PF and all VFs to share the same MTU.
5133          * Then, the PF driver enables again the VF receipt of packet when
5134          * the VF driver issues a IXGBE_VF_SET_LPE request.
5135          * In the meantime, the VF device cannot be used, even if the VF driver
5136          * and the Guest VM network stack are ready to accept packets with a
5137          * size up to the PF MTU.
5138          * As a work-around to this PF behaviour, force the call to
5139          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5140          * VF packets received can work in all cases.
5141          */
5142         ixgbevf_rlpml_set_vf(hw,
5143                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5144
5145         /* Setup RX queues */
5146         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5147                 rxq = dev->data->rx_queues[i];
5148
5149                 /* Allocate buffers for descriptor rings */
5150                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5151                 if (ret)
5152                         return ret;
5153
5154                 /* Setup the Base and Length of the Rx Descriptor Rings */
5155                 bus_addr = rxq->rx_ring_phys_addr;
5156
5157                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5158                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5159                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5160                                 (uint32_t)(bus_addr >> 32));
5161                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5162                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5163                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5164                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5165
5166
5167                 /* Configure the SRRCTL register */
5168 #ifdef RTE_HEADER_SPLIT_ENABLE
5169                 /*
5170                  * Configure Header Split
5171                  */
5172                 if (dev->data->dev_conf.rxmode.header_split) {
5173                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5174                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5175                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5176                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5177                 } else
5178 #endif
5179                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5180
5181                 /* Set if packets are dropped when no descriptors available */
5182                 if (rxq->drop_en)
5183                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5184
5185                 /*
5186                  * Configure the RX buffer size in the BSIZEPACKET field of
5187                  * the SRRCTL register of the queue.
5188                  * The value is in 1 KB resolution. Valid values can be from
5189                  * 1 KB to 16 KB.
5190                  */
5191                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5192                         RTE_PKTMBUF_HEADROOM);
5193                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5194                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5195
5196                 /*
5197                  * VF modification to write virtual function SRRCTL register
5198                  */
5199                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5200
5201                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5202                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5203
5204                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5205                     /* It adds dual VLAN length for supporting dual VLAN */
5206                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5207                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5208                         if (!dev->data->scattered_rx)
5209                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5210                         dev->data->scattered_rx = 1;
5211                 }
5212         }
5213
5214 #ifdef RTE_HEADER_SPLIT_ENABLE
5215         if (dev->data->dev_conf.rxmode.header_split)
5216                 /* Must setup the PSRTYPE register */
5217                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5218                         IXGBE_PSRTYPE_UDPHDR   |
5219                         IXGBE_PSRTYPE_IPV4HDR  |
5220                         IXGBE_PSRTYPE_IPV6HDR;
5221 #endif
5222
5223         /* Set RQPL for VF RSS according to max Rx queue */
5224         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5225                 IXGBE_PSRTYPE_RQPL_SHIFT;
5226         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5227
5228         ixgbe_set_rx_function(dev);
5229
5230         return 0;
5231 }
5232
5233 /*
5234  * [VF] Initializes Transmit Unit.
5235  */
5236 void __attribute__((cold))
5237 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5238 {
5239         struct ixgbe_hw     *hw;
5240         struct ixgbe_tx_queue *txq;
5241         uint64_t bus_addr;
5242         uint32_t txctrl;
5243         uint16_t i;
5244
5245         PMD_INIT_FUNC_TRACE();
5246         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5247
5248         /* Setup the Base and Length of the Tx Descriptor Rings */
5249         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5250                 txq = dev->data->tx_queues[i];
5251                 bus_addr = txq->tx_ring_phys_addr;
5252                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5253                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5254                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5255                                 (uint32_t)(bus_addr >> 32));
5256                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5257                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5258                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5259                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5260                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5261
5262                 /*
5263                  * Disable Tx Head Writeback RO bit, since this hoses
5264                  * bookkeeping if things aren't delivered in order.
5265                  */
5266                 txctrl = IXGBE_READ_REG(hw,
5267                                 IXGBE_VFDCA_TXCTRL(i));
5268                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5269                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5270                                 txctrl);
5271         }
5272 }
5273
5274 /*
5275  * [VF] Start Transmit and Receive Units.
5276  */
5277 void __attribute__((cold))
5278 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5279 {
5280         struct ixgbe_hw     *hw;
5281         struct ixgbe_tx_queue *txq;
5282         struct ixgbe_rx_queue *rxq;
5283         uint32_t txdctl;
5284         uint32_t rxdctl;
5285         uint16_t i;
5286         int poll_ms;
5287
5288         PMD_INIT_FUNC_TRACE();
5289         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5290
5291         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5292                 txq = dev->data->tx_queues[i];
5293                 /* Setup Transmit Threshold Registers */
5294                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5295                 txdctl |= txq->pthresh & 0x7F;
5296                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5297                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5298                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5299         }
5300
5301         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5302
5303                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5304                 txdctl |= IXGBE_TXDCTL_ENABLE;
5305                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5306
5307                 poll_ms = 10;
5308                 /* Wait until TX Enable ready */
5309                 do {
5310                         rte_delay_ms(1);
5311                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5312                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5313                 if (!poll_ms)
5314                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5315         }
5316         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5317
5318                 rxq = dev->data->rx_queues[i];
5319
5320                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5321                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5322                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5323
5324                 /* Wait until RX Enable ready */
5325                 poll_ms = 10;
5326                 do {
5327                         rte_delay_ms(1);
5328                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5329                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5330                 if (!poll_ms)
5331                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5332                 rte_wmb();
5333                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5334
5335         }
5336 }
5337
5338 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5339 int __attribute__((weak))
5340 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5341 {
5342         return -1;
5343 }
5344
5345 uint16_t __attribute__((weak))
5346 ixgbe_recv_pkts_vec(
5347         void __rte_unused *rx_queue,
5348         struct rte_mbuf __rte_unused **rx_pkts,
5349         uint16_t __rte_unused nb_pkts)
5350 {
5351         return 0;
5352 }
5353
5354 uint16_t __attribute__((weak))
5355 ixgbe_recv_scattered_pkts_vec(
5356         void __rte_unused *rx_queue,
5357         struct rte_mbuf __rte_unused **rx_pkts,
5358         uint16_t __rte_unused nb_pkts)
5359 {
5360         return 0;
5361 }
5362
5363 int __attribute__((weak))
5364 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5365 {
5366         return -1;
5367 }