Imported Upstream version 16.11.1
[deb_dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73
74 #include "ixgbe_logs.h"
75 #include "base/ixgbe_api.h"
76 #include "base/ixgbe_vf.h"
77 #include "ixgbe_ethdev.h"
78 #include "base/ixgbe_dcb.h"
79 #include "base/ixgbe_common.h"
80 #include "ixgbe_rxtx.h"
81
82 /* Bit Mask to indicate what bits required for building TX context */
83 #define IXGBE_TX_OFFLOAD_MASK (                  \
84                 PKT_TX_VLAN_PKT |                \
85                 PKT_TX_IP_CKSUM |                \
86                 PKT_TX_L4_MASK |                 \
87                 PKT_TX_TCP_SEG |                 \
88                 PKT_TX_OUTER_IP_CKSUM)
89
90 #if 1
91 #define RTE_PMD_USE_PREFETCH
92 #endif
93
94 #ifdef RTE_PMD_USE_PREFETCH
95 /*
96  * Prefetch a cache line into all cache levels.
97  */
98 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
99 #else
100 #define rte_ixgbe_prefetch(p)   do {} while (0)
101 #endif
102
103 /*********************************************************************
104  *
105  *  TX functions
106  *
107  **********************************************************************/
108
109 /*
110  * Check for descriptors with their DD bit set and free mbufs.
111  * Return the total number of buffers freed.
112  */
113 static inline int __attribute__((always_inline))
114 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
115 {
116         struct ixgbe_tx_entry *txep;
117         uint32_t status;
118         int i, nb_free = 0;
119         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
120
121         /* check DD bit on threshold descriptor */
122         status = txq->tx_ring[txq->tx_next_dd].wb.status;
123         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
124                 return 0;
125
126         /*
127          * first buffer to free from S/W ring is at index
128          * tx_next_dd - (tx_rs_thresh-1)
129          */
130         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
131
132         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
133                 /* free buffers one at a time */
134                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
135                 txep->mbuf = NULL;
136
137                 if (unlikely(m == NULL))
138                         continue;
139
140                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
141                     (nb_free > 0 && m->pool != free[0]->pool)) {
142                         rte_mempool_put_bulk(free[0]->pool,
143                                              (void **)free, nb_free);
144                         nb_free = 0;
145                 }
146
147                 free[nb_free++] = m;
148         }
149
150         if (nb_free > 0)
151                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
152
153         /* buffers were freed, update counters */
154         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
155         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
156         if (txq->tx_next_dd >= txq->nb_tx_desc)
157                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
158
159         return txq->tx_rs_thresh;
160 }
161
162 /* Populate 4 descriptors with data from 4 mbufs */
163 static inline void
164 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
165 {
166         uint64_t buf_dma_addr;
167         uint32_t pkt_len;
168         int i;
169
170         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
171                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
172                 pkt_len = (*pkts)->data_len;
173
174                 /* write data to descriptor */
175                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
176
177                 txdp->read.cmd_type_len =
178                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
179
180                 txdp->read.olinfo_status =
181                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
182
183                 rte_prefetch0(&(*pkts)->pool);
184         }
185 }
186
187 /* Populate 1 descriptor with data from 1 mbuf */
188 static inline void
189 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
190 {
191         uint64_t buf_dma_addr;
192         uint32_t pkt_len;
193
194         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
195         pkt_len = (*pkts)->data_len;
196
197         /* write data to descriptor */
198         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
199         txdp->read.cmd_type_len =
200                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
201         txdp->read.olinfo_status =
202                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
203         rte_prefetch0(&(*pkts)->pool);
204 }
205
206 /*
207  * Fill H/W descriptor ring with mbuf data.
208  * Copy mbuf pointers to the S/W ring.
209  */
210 static inline void
211 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
212                       uint16_t nb_pkts)
213 {
214         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
215         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
216         const int N_PER_LOOP = 4;
217         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
218         int mainpart, leftover;
219         int i, j;
220
221         /*
222          * Process most of the packets in chunks of N pkts.  Any
223          * leftover packets will get processed one at a time.
224          */
225         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
226         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
227         for (i = 0; i < mainpart; i += N_PER_LOOP) {
228                 /* Copy N mbuf pointers to the S/W ring */
229                 for (j = 0; j < N_PER_LOOP; ++j) {
230                         (txep + i + j)->mbuf = *(pkts + i + j);
231                 }
232                 tx4(txdp + i, pkts + i);
233         }
234
235         if (unlikely(leftover > 0)) {
236                 for (i = 0; i < leftover; ++i) {
237                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
238                         tx1(txdp + mainpart + i, pkts + mainpart + i);
239                 }
240         }
241 }
242
243 static inline uint16_t
244 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
245              uint16_t nb_pkts)
246 {
247         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
248         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
249         uint16_t n = 0;
250
251         /*
252          * Begin scanning the H/W ring for done descriptors when the
253          * number of available descriptors drops below tx_free_thresh.  For
254          * each done descriptor, free the associated buffer.
255          */
256         if (txq->nb_tx_free < txq->tx_free_thresh)
257                 ixgbe_tx_free_bufs(txq);
258
259         /* Only use descriptors that are available */
260         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
261         if (unlikely(nb_pkts == 0))
262                 return 0;
263
264         /* Use exactly nb_pkts descriptors */
265         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
266
267         /*
268          * At this point, we know there are enough descriptors in the
269          * ring to transmit all the packets.  This assumes that each
270          * mbuf contains a single segment, and that no new offloads
271          * are expected, which would require a new context descriptor.
272          */
273
274         /*
275          * See if we're going to wrap-around. If so, handle the top
276          * of the descriptor ring first, then do the bottom.  If not,
277          * the processing looks just like the "bottom" part anyway...
278          */
279         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
280                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
281                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
282
283                 /*
284                  * We know that the last descriptor in the ring will need to
285                  * have its RS bit set because tx_rs_thresh has to be
286                  * a divisor of the ring size
287                  */
288                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
289                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
290                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
291
292                 txq->tx_tail = 0;
293         }
294
295         /* Fill H/W descriptor ring with mbuf data */
296         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
297         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
298
299         /*
300          * Determine if RS bit should be set
301          * This is what we actually want:
302          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
303          * but instead of subtracting 1 and doing >=, we can just do
304          * greater than without subtracting.
305          */
306         if (txq->tx_tail > txq->tx_next_rs) {
307                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
308                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
309                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
310                                                 txq->tx_rs_thresh);
311                 if (txq->tx_next_rs >= txq->nb_tx_desc)
312                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
313         }
314
315         /*
316          * Check for wrap-around. This would only happen if we used
317          * up to the last descriptor in the ring, no more, no less.
318          */
319         if (txq->tx_tail >= txq->nb_tx_desc)
320                 txq->tx_tail = 0;
321
322         /* update tail pointer */
323         rte_wmb();
324         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
325
326         return nb_pkts;
327 }
328
329 uint16_t
330 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
331                        uint16_t nb_pkts)
332 {
333         uint16_t nb_tx;
334
335         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
336         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
337                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
338
339         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
340         nb_tx = 0;
341         while (nb_pkts) {
342                 uint16_t ret, n;
343
344                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
345                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
346                 nb_tx = (uint16_t)(nb_tx + ret);
347                 nb_pkts = (uint16_t)(nb_pkts - ret);
348                 if (ret < n)
349                         break;
350         }
351
352         return nb_tx;
353 }
354
355 static inline void
356 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
357                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
358                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
359 {
360         uint32_t type_tucmd_mlhl;
361         uint32_t mss_l4len_idx = 0;
362         uint32_t ctx_idx;
363         uint32_t vlan_macip_lens;
364         union ixgbe_tx_offload tx_offload_mask;
365         uint32_t seqnum_seed = 0;
366
367         ctx_idx = txq->ctx_curr;
368         tx_offload_mask.data[0] = 0;
369         tx_offload_mask.data[1] = 0;
370         type_tucmd_mlhl = 0;
371
372         /* Specify which HW CTX to upload. */
373         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
374
375         if (ol_flags & PKT_TX_VLAN_PKT) {
376                 tx_offload_mask.vlan_tci |= ~0;
377         }
378
379         /* check if TCP segmentation required for this packet */
380         if (ol_flags & PKT_TX_TCP_SEG) {
381                 /* implies IP cksum in IPv4 */
382                 if (ol_flags & PKT_TX_IP_CKSUM)
383                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
384                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
385                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
386                 else
387                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
388                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
390
391                 tx_offload_mask.l2_len |= ~0;
392                 tx_offload_mask.l3_len |= ~0;
393                 tx_offload_mask.l4_len |= ~0;
394                 tx_offload_mask.tso_segsz |= ~0;
395                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
396                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
397         } else { /* no TSO, check if hardware checksum is needed */
398                 if (ol_flags & PKT_TX_IP_CKSUM) {
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
400                         tx_offload_mask.l2_len |= ~0;
401                         tx_offload_mask.l3_len |= ~0;
402                 }
403
404                 switch (ol_flags & PKT_TX_L4_MASK) {
405                 case PKT_TX_UDP_CKSUM:
406                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
407                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
408                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
409                         tx_offload_mask.l2_len |= ~0;
410                         tx_offload_mask.l3_len |= ~0;
411                         break;
412                 case PKT_TX_TCP_CKSUM:
413                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
414                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
415                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
416                         tx_offload_mask.l2_len |= ~0;
417                         tx_offload_mask.l3_len |= ~0;
418                         break;
419                 case PKT_TX_SCTP_CKSUM:
420                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
421                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
422                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
423                         tx_offload_mask.l2_len |= ~0;
424                         tx_offload_mask.l3_len |= ~0;
425                         break;
426                 default:
427                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
428                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
429                         break;
430                 }
431         }
432
433         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
434                 tx_offload_mask.outer_l2_len |= ~0;
435                 tx_offload_mask.outer_l3_len |= ~0;
436                 tx_offload_mask.l2_len |= ~0;
437                 seqnum_seed |= tx_offload.outer_l3_len
438                                << IXGBE_ADVTXD_OUTER_IPLEN;
439                 seqnum_seed |= tx_offload.l2_len
440                                << IXGBE_ADVTXD_TUNNEL_LEN;
441         }
442
443         txq->ctx_cache[ctx_idx].flags = ol_flags;
444         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
445                 tx_offload_mask.data[0] & tx_offload.data[0];
446         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
447                 tx_offload_mask.data[1] & tx_offload.data[1];
448         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
449
450         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
451         vlan_macip_lens = tx_offload.l3_len;
452         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
453                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
454                                     IXGBE_ADVTXD_MACLEN_SHIFT);
455         else
456                 vlan_macip_lens |= (tx_offload.l2_len <<
457                                     IXGBE_ADVTXD_MACLEN_SHIFT);
458         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
459         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
460         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
461         ctx_txd->seqnum_seed     = seqnum_seed;
462 }
463
464 /*
465  * Check which hardware context can be used. Use the existing match
466  * or create a new context descriptor.
467  */
468 static inline uint32_t
469 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
470                    union ixgbe_tx_offload tx_offload)
471 {
472         /* If match with the current used context */
473         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
474                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
475                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
476                      & tx_offload.data[0])) &&
477                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
478                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
479                      & tx_offload.data[1]))))
480                 return txq->ctx_curr;
481
482         /* What if match with the next context  */
483         txq->ctx_curr ^= 1;
484         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
485                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
486                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
487                      & tx_offload.data[0])) &&
488                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
489                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
490                      & tx_offload.data[1]))))
491                 return txq->ctx_curr;
492
493         /* Mismatch, use the previous context */
494         return IXGBE_CTX_NUM;
495 }
496
497 static inline uint32_t
498 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
499 {
500         uint32_t tmp = 0;
501
502         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
503                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
504         if (ol_flags & PKT_TX_IP_CKSUM)
505                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
506         if (ol_flags & PKT_TX_TCP_SEG)
507                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
508         return tmp;
509 }
510
511 static inline uint32_t
512 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
513 {
514         uint32_t cmdtype = 0;
515
516         if (ol_flags & PKT_TX_VLAN_PKT)
517                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
518         if (ol_flags & PKT_TX_TCP_SEG)
519                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
520         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
521                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
522         return cmdtype;
523 }
524
525 /* Default RS bit threshold values */
526 #ifndef DEFAULT_TX_RS_THRESH
527 #define DEFAULT_TX_RS_THRESH   32
528 #endif
529 #ifndef DEFAULT_TX_FREE_THRESH
530 #define DEFAULT_TX_FREE_THRESH 32
531 #endif
532
533 /* Reset transmit descriptors after they have been used */
534 static inline int
535 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
536 {
537         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
538         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
539         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
540         uint16_t nb_tx_desc = txq->nb_tx_desc;
541         uint16_t desc_to_clean_to;
542         uint16_t nb_tx_to_clean;
543         uint32_t status;
544
545         /* Determine the last descriptor needing to be cleaned */
546         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
547         if (desc_to_clean_to >= nb_tx_desc)
548                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
549
550         /* Check to make sure the last descriptor to clean is done */
551         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
552         status = txr[desc_to_clean_to].wb.status;
553         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
554                 PMD_TX_FREE_LOG(DEBUG,
555                                 "TX descriptor %4u is not done"
556                                 "(port=%d queue=%d)",
557                                 desc_to_clean_to,
558                                 txq->port_id, txq->queue_id);
559                 /* Failed to clean any descriptors, better luck next time */
560                 return -(1);
561         }
562
563         /* Figure out how many descriptors will be cleaned */
564         if (last_desc_cleaned > desc_to_clean_to)
565                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
566                                                         desc_to_clean_to);
567         else
568                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
569                                                 last_desc_cleaned);
570
571         PMD_TX_FREE_LOG(DEBUG,
572                         "Cleaning %4u TX descriptors: %4u to %4u "
573                         "(port=%d queue=%d)",
574                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
575                         txq->port_id, txq->queue_id);
576
577         /*
578          * The last descriptor to clean is done, so that means all the
579          * descriptors from the last descriptor that was cleaned
580          * up to the last descriptor with the RS bit set
581          * are done. Only reset the threshold descriptor.
582          */
583         txr[desc_to_clean_to].wb.status = 0;
584
585         /* Update the txq to reflect the last descriptor that was cleaned */
586         txq->last_desc_cleaned = desc_to_clean_to;
587         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
588
589         /* No Error */
590         return 0;
591 }
592
593 uint16_t
594 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
595                 uint16_t nb_pkts)
596 {
597         struct ixgbe_tx_queue *txq;
598         struct ixgbe_tx_entry *sw_ring;
599         struct ixgbe_tx_entry *txe, *txn;
600         volatile union ixgbe_adv_tx_desc *txr;
601         volatile union ixgbe_adv_tx_desc *txd, *txp;
602         struct rte_mbuf     *tx_pkt;
603         struct rte_mbuf     *m_seg;
604         uint64_t buf_dma_addr;
605         uint32_t olinfo_status;
606         uint32_t cmd_type_len;
607         uint32_t pkt_len;
608         uint16_t slen;
609         uint64_t ol_flags;
610         uint16_t tx_id;
611         uint16_t tx_last;
612         uint16_t nb_tx;
613         uint16_t nb_used;
614         uint64_t tx_ol_req;
615         uint32_t ctx = 0;
616         uint32_t new_ctx;
617         union ixgbe_tx_offload tx_offload;
618
619         tx_offload.data[0] = 0;
620         tx_offload.data[1] = 0;
621         txq = tx_queue;
622         sw_ring = txq->sw_ring;
623         txr     = txq->tx_ring;
624         tx_id   = txq->tx_tail;
625         txe = &sw_ring[tx_id];
626         txp = NULL;
627
628         /* Determine if the descriptor ring needs to be cleaned. */
629         if (txq->nb_tx_free < txq->tx_free_thresh)
630                 ixgbe_xmit_cleanup(txq);
631
632         rte_prefetch0(&txe->mbuf->pool);
633
634         /* TX loop */
635         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
636                 new_ctx = 0;
637                 tx_pkt = *tx_pkts++;
638                 pkt_len = tx_pkt->pkt_len;
639
640                 /*
641                  * Determine how many (if any) context descriptors
642                  * are needed for offload functionality.
643                  */
644                 ol_flags = tx_pkt->ol_flags;
645
646                 /* If hardware offload required */
647                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
648                 if (tx_ol_req) {
649                         tx_offload.l2_len = tx_pkt->l2_len;
650                         tx_offload.l3_len = tx_pkt->l3_len;
651                         tx_offload.l4_len = tx_pkt->l4_len;
652                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
653                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
654                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
655                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
656
657                         /* If new context need be built or reuse the exist ctx. */
658                         ctx = what_advctx_update(txq, tx_ol_req,
659                                 tx_offload);
660                         /* Only allocate context descriptor if required*/
661                         new_ctx = (ctx == IXGBE_CTX_NUM);
662                         ctx = txq->ctx_curr;
663                 }
664
665                 /*
666                  * Keep track of how many descriptors are used this loop
667                  * This will always be the number of segments + the number of
668                  * Context descriptors required to transmit the packet
669                  */
670                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
671
672                 if (txp != NULL &&
673                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
674                         /* set RS on the previous packet in the burst */
675                         txp->read.cmd_type_len |=
676                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
677
678                 /*
679                  * The number of descriptors that must be allocated for a
680                  * packet is the number of segments of that packet, plus 1
681                  * Context Descriptor for the hardware offload, if any.
682                  * Determine the last TX descriptor to allocate in the TX ring
683                  * for the packet, starting from the current position (tx_id)
684                  * in the ring.
685                  */
686                 tx_last = (uint16_t) (tx_id + nb_used - 1);
687
688                 /* Circular ring */
689                 if (tx_last >= txq->nb_tx_desc)
690                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
691
692                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
693                            " tx_first=%u tx_last=%u",
694                            (unsigned) txq->port_id,
695                            (unsigned) txq->queue_id,
696                            (unsigned) pkt_len,
697                            (unsigned) tx_id,
698                            (unsigned) tx_last);
699
700                 /*
701                  * Make sure there are enough TX descriptors available to
702                  * transmit the entire packet.
703                  * nb_used better be less than or equal to txq->tx_rs_thresh
704                  */
705                 if (nb_used > txq->nb_tx_free) {
706                         PMD_TX_FREE_LOG(DEBUG,
707                                         "Not enough free TX descriptors "
708                                         "nb_used=%4u nb_free=%4u "
709                                         "(port=%d queue=%d)",
710                                         nb_used, txq->nb_tx_free,
711                                         txq->port_id, txq->queue_id);
712
713                         if (ixgbe_xmit_cleanup(txq) != 0) {
714                                 /* Could not clean any descriptors */
715                                 if (nb_tx == 0)
716                                         return 0;
717                                 goto end_of_tx;
718                         }
719
720                         /* nb_used better be <= txq->tx_rs_thresh */
721                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
722                                 PMD_TX_FREE_LOG(DEBUG,
723                                         "The number of descriptors needed to "
724                                         "transmit the packet exceeds the "
725                                         "RS bit threshold. This will impact "
726                                         "performance."
727                                         "nb_used=%4u nb_free=%4u "
728                                         "tx_rs_thresh=%4u. "
729                                         "(port=%d queue=%d)",
730                                         nb_used, txq->nb_tx_free,
731                                         txq->tx_rs_thresh,
732                                         txq->port_id, txq->queue_id);
733                                 /*
734                                  * Loop here until there are enough TX
735                                  * descriptors or until the ring cannot be
736                                  * cleaned.
737                                  */
738                                 while (nb_used > txq->nb_tx_free) {
739                                         if (ixgbe_xmit_cleanup(txq) != 0) {
740                                                 /*
741                                                  * Could not clean any
742                                                  * descriptors
743                                                  */
744                                                 if (nb_tx == 0)
745                                                         return 0;
746                                                 goto end_of_tx;
747                                         }
748                                 }
749                         }
750                 }
751
752                 /*
753                  * By now there are enough free TX descriptors to transmit
754                  * the packet.
755                  */
756
757                 /*
758                  * Set common flags of all TX Data Descriptors.
759                  *
760                  * The following bits must be set in all Data Descriptors:
761                  *   - IXGBE_ADVTXD_DTYP_DATA
762                  *   - IXGBE_ADVTXD_DCMD_DEXT
763                  *
764                  * The following bits must be set in the first Data Descriptor
765                  * and are ignored in the other ones:
766                  *   - IXGBE_ADVTXD_DCMD_IFCS
767                  *   - IXGBE_ADVTXD_MAC_1588
768                  *   - IXGBE_ADVTXD_DCMD_VLE
769                  *
770                  * The following bits must only be set in the last Data
771                  * Descriptor:
772                  *   - IXGBE_TXD_CMD_EOP
773                  *
774                  * The following bits can be set in any Data Descriptor, but
775                  * are only set in the last Data Descriptor:
776                  *   - IXGBE_TXD_CMD_RS
777                  */
778                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
779                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
780
781 #ifdef RTE_LIBRTE_IEEE1588
782                 if (ol_flags & PKT_TX_IEEE1588_TMST)
783                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
784 #endif
785
786                 olinfo_status = 0;
787                 if (tx_ol_req) {
788
789                         if (ol_flags & PKT_TX_TCP_SEG) {
790                                 /* when TSO is on, paylen in descriptor is the
791                                  * not the packet len but the tcp payload len */
792                                 pkt_len -= (tx_offload.l2_len +
793                                         tx_offload.l3_len + tx_offload.l4_len);
794                         }
795
796                         /*
797                          * Setup the TX Advanced Context Descriptor if required
798                          */
799                         if (new_ctx) {
800                                 volatile struct ixgbe_adv_tx_context_desc *
801                                     ctx_txd;
802
803                                 ctx_txd = (volatile struct
804                                     ixgbe_adv_tx_context_desc *)
805                                     &txr[tx_id];
806
807                                 txn = &sw_ring[txe->next_id];
808                                 rte_prefetch0(&txn->mbuf->pool);
809
810                                 if (txe->mbuf != NULL) {
811                                         rte_pktmbuf_free_seg(txe->mbuf);
812                                         txe->mbuf = NULL;
813                                 }
814
815                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
816                                         tx_offload);
817
818                                 txe->last_id = tx_last;
819                                 tx_id = txe->next_id;
820                                 txe = txn;
821                         }
822
823                         /*
824                          * Setup the TX Advanced Data Descriptor,
825                          * This path will go through
826                          * whatever new/reuse the context descriptor
827                          */
828                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
829                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
830                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
831                 }
832
833                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
834
835                 m_seg = tx_pkt;
836                 do {
837                         txd = &txr[tx_id];
838                         txn = &sw_ring[txe->next_id];
839                         rte_prefetch0(&txn->mbuf->pool);
840
841                         if (txe->mbuf != NULL)
842                                 rte_pktmbuf_free_seg(txe->mbuf);
843                         txe->mbuf = m_seg;
844
845                         /*
846                          * Set up Transmit Data Descriptor.
847                          */
848                         slen = m_seg->data_len;
849                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
850                         txd->read.buffer_addr =
851                                 rte_cpu_to_le_64(buf_dma_addr);
852                         txd->read.cmd_type_len =
853                                 rte_cpu_to_le_32(cmd_type_len | slen);
854                         txd->read.olinfo_status =
855                                 rte_cpu_to_le_32(olinfo_status);
856                         txe->last_id = tx_last;
857                         tx_id = txe->next_id;
858                         txe = txn;
859                         m_seg = m_seg->next;
860                 } while (m_seg != NULL);
861
862                 /*
863                  * The last packet data descriptor needs End Of Packet (EOP)
864                  */
865                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
866                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
867                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
868
869                 /* Set RS bit only on threshold packets' last descriptor */
870                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
871                         PMD_TX_FREE_LOG(DEBUG,
872                                         "Setting RS bit on TXD id="
873                                         "%4u (port=%d queue=%d)",
874                                         tx_last, txq->port_id, txq->queue_id);
875
876                         cmd_type_len |= IXGBE_TXD_CMD_RS;
877
878                         /* Update txq RS bit counters */
879                         txq->nb_tx_used = 0;
880                         txp = NULL;
881                 } else
882                         txp = txd;
883
884                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
885         }
886
887 end_of_tx:
888         /* set RS on last packet in the burst */
889         if (txp != NULL)
890                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
891
892         rte_wmb();
893
894         /*
895          * Set the Transmit Descriptor Tail (TDT)
896          */
897         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
898                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
899                    (unsigned) tx_id, (unsigned) nb_tx);
900         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
901         txq->tx_tail = tx_id;
902
903         return nb_tx;
904 }
905
906 /*********************************************************************
907  *
908  *  RX functions
909  *
910  **********************************************************************/
911
912 #define IXGBE_PACKET_TYPE_ETHER                         0X00
913 #define IXGBE_PACKET_TYPE_IPV4                          0X01
914 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
915 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
916 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
917 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
918 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
919 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
920 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
921 #define IXGBE_PACKET_TYPE_IPV6                          0X04
922 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
923 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
924 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
925 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
926 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
927 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
928 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
929 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
930 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
931 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
932 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
933 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
934 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
935 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
936 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
937 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
938 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
939 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
940 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
941 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
942 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
943 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
944 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
945
946 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
947 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
948 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
949 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
950 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
951 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
952 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
953 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
954 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
955 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
956 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
957 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
958 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
959 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
960 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
961 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
962 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
963 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
964 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
965 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
966 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
967 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
968 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
969
970 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
971 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
972 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
973 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
974 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
975 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
976 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
977 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
978 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
979 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
980 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
981 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
982 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
983 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
984 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
985 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
986 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
987 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
988 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
989 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
990 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
991 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
992 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
993
994 #define IXGBE_PACKET_TYPE_MAX               0X80
995 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
996 #define IXGBE_PACKET_TYPE_SHIFT             0X04
997
998 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
999 static inline uint32_t
1000 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1001 {
1002         /**
1003          * Use 2 different table for normal packet and tunnel packet
1004          * to save the space.
1005          */
1006         static const uint32_t
1007                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1008                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1009                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1010                         RTE_PTYPE_L3_IPV4,
1011                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1012                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1013                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1014                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1015                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1016                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1017                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1018                         RTE_PTYPE_L3_IPV4_EXT,
1019                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1020                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1021                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1022                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1023                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1024                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1025                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1026                         RTE_PTYPE_L3_IPV6,
1027                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1028                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1029                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1030                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1031                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1032                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1033                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1034                         RTE_PTYPE_L3_IPV6_EXT,
1035                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1036                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1037                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1038                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1039                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1040                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1041                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1042                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1043                         RTE_PTYPE_INNER_L3_IPV6,
1044                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1045                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1046                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1047                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1048                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1049                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1050                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1051                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1052                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1053                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1054                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1055                         RTE_PTYPE_INNER_L3_IPV6,
1056                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1057                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1058                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1059                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1060                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1061                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1062                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1063                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1064                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1065                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1066                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1067                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1068                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1069                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1070                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1071                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1072                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1073                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1074                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1075                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1076                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1077                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1078                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1079                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1080                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1081                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1082                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1083                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1084                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1085                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1086                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1087                         RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1089                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1090         };
1091
1092         static const uint32_t
1093                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1094                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1095                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1096                         RTE_PTYPE_INNER_L2_ETHER,
1097                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1098                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1099                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1100                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1101                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1102                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1103                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1104                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1105                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1106                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1107                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1108                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1109                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1110                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1111                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1112                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1113                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1114                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1115                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1116                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1117                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1118                         RTE_PTYPE_INNER_L4_TCP,
1119                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1120                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1121                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1122                         RTE_PTYPE_INNER_L4_TCP,
1123                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1125                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1126                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1128                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1129                         RTE_PTYPE_INNER_L4_TCP,
1130                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1131                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1132                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1133                         RTE_PTYPE_INNER_L3_IPV4,
1134                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1135                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1136                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1137                         RTE_PTYPE_INNER_L4_UDP,
1138                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1140                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1141                         RTE_PTYPE_INNER_L4_UDP,
1142                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1143                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1144                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1145                         RTE_PTYPE_INNER_L4_SCTP,
1146                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1147                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1148                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1149                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1150                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1151                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1152                         RTE_PTYPE_INNER_L4_UDP,
1153                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1154                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1155                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1156                         RTE_PTYPE_INNER_L4_SCTP,
1157                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1158                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1159                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1160                         RTE_PTYPE_INNER_L3_IPV4,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1164                         RTE_PTYPE_INNER_L4_SCTP,
1165                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1166                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1167                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1168                         RTE_PTYPE_INNER_L4_SCTP,
1169                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1170                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1171                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1172                         RTE_PTYPE_INNER_L4_TCP,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1176                         RTE_PTYPE_INNER_L4_UDP,
1177
1178                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1179                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1180                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1181                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1182                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1183                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1184                         RTE_PTYPE_INNER_L3_IPV4,
1185                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1186                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1187                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1188                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1189                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1190                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1191                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1192                         RTE_PTYPE_INNER_L3_IPV6,
1193                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1194                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1195                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1196                         RTE_PTYPE_INNER_L3_IPV4,
1197                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1198                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1199                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1200                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1201                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1202                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1203                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1204                         RTE_PTYPE_INNER_L3_IPV4,
1205                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1206                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1207                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1208                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1209                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1210                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1211                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1212                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1213                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1214                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1215                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1216                         RTE_PTYPE_INNER_L3_IPV4,
1217                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1218                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1219                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1220                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1221                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1222                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1223                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1224                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1225                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1226                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1227                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1228                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1229                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1230                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1231                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1232                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1233                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1234                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1235                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1236                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1237                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1238                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1239                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1240                         RTE_PTYPE_INNER_L3_IPV4,
1241                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1242                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1243                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1244                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1245                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1246                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1247                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1248                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1249                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1250                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1251                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1252                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1253                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1254                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1255                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1256                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1257                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1258                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1259                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1260                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1261                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1262                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1263                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1264                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1265                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1266                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1267                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1268                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1269         };
1270
1271         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1272                 return RTE_PTYPE_UNKNOWN;
1273
1274         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1275
1276         /* For tunnel packet */
1277         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1278                 /* Remove the tunnel bit to save the space. */
1279                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1280                 return ptype_table_tn[pkt_info];
1281         }
1282
1283         /**
1284          * For x550, if it's not tunnel,
1285          * tunnel type bit should be set to 0.
1286          * Reuse 82599's mask.
1287          */
1288         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1289
1290         return ptype_table[pkt_info];
1291 }
1292
1293 static inline uint64_t
1294 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1295 {
1296         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1297                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1298                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1299                 PKT_RX_RSS_HASH, 0, 0, 0,
1300                 0, 0, 0,  PKT_RX_FDIR,
1301         };
1302 #ifdef RTE_LIBRTE_IEEE1588
1303         static uint64_t ip_pkt_etqf_map[8] = {
1304                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1305                 0, 0, 0, 0,
1306         };
1307
1308         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1309                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1310                                 ip_rss_types_map[pkt_info & 0XF];
1311         else
1312                 return ip_rss_types_map[pkt_info & 0XF];
1313 #else
1314         return ip_rss_types_map[pkt_info & 0XF];
1315 #endif
1316 }
1317
1318 static inline uint64_t
1319 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1320 {
1321         uint64_t pkt_flags;
1322
1323         /*
1324          * Check if VLAN present only.
1325          * Do not check whether L3/L4 rx checksum done by NIC or not,
1326          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1327          */
1328         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1329
1330 #ifdef RTE_LIBRTE_IEEE1588
1331         if (rx_status & IXGBE_RXD_STAT_TMST)
1332                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1333 #endif
1334         return pkt_flags;
1335 }
1336
1337 static inline uint64_t
1338 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1339 {
1340         uint64_t pkt_flags;
1341
1342         /*
1343          * Bit 31: IPE, IPv4 checksum error
1344          * Bit 30: L4I, L4I integrity error
1345          */
1346         static uint64_t error_to_pkt_flags_map[4] = {
1347                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1348                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1349                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1350                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1351         };
1352         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1353                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1354
1355         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1356             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1357                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1358         }
1359
1360         return pkt_flags;
1361 }
1362
1363 /*
1364  * LOOK_AHEAD defines how many desc statuses to check beyond the
1365  * current descriptor.
1366  * It must be a pound define for optimal performance.
1367  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1368  * function only works with LOOK_AHEAD=8.
1369  */
1370 #define LOOK_AHEAD 8
1371 #if (LOOK_AHEAD != 8)
1372 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1373 #endif
1374 static inline int
1375 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1376 {
1377         volatile union ixgbe_adv_rx_desc *rxdp;
1378         struct ixgbe_rx_entry *rxep;
1379         struct rte_mbuf *mb;
1380         uint16_t pkt_len;
1381         uint64_t pkt_flags;
1382         int nb_dd;
1383         uint32_t s[LOOK_AHEAD];
1384         uint32_t pkt_info[LOOK_AHEAD];
1385         int i, j, nb_rx = 0;
1386         uint32_t status;
1387         uint64_t vlan_flags = rxq->vlan_flags;
1388
1389         /* get references to current descriptor and S/W ring entry */
1390         rxdp = &rxq->rx_ring[rxq->rx_tail];
1391         rxep = &rxq->sw_ring[rxq->rx_tail];
1392
1393         status = rxdp->wb.upper.status_error;
1394         /* check to make sure there is at least 1 packet to receive */
1395         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1396                 return 0;
1397
1398         /*
1399          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1400          * reference packets that are ready to be received.
1401          */
1402         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1403              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1404                 /* Read desc statuses backwards to avoid race condition */
1405                 for (j = 0; j < LOOK_AHEAD; j++)
1406                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1407
1408                 rte_smp_rmb();
1409
1410                 /* Compute how many status bits were set */
1411                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1412                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1413                         ;
1414
1415                 for (j = 0; j < nb_dd; j++)
1416                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1417                                                        lo_dword.data);
1418
1419                 nb_rx += nb_dd;
1420
1421                 /* Translate descriptor info to mbuf format */
1422                 for (j = 0; j < nb_dd; ++j) {
1423                         mb = rxep[j].mbuf;
1424                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1425                                   rxq->crc_len;
1426                         mb->data_len = pkt_len;
1427                         mb->pkt_len = pkt_len;
1428                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1429
1430                         /* convert descriptor fields to rte mbuf flags */
1431                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1432                                 vlan_flags);
1433                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1434                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1435                                         ((uint16_t)pkt_info[j]);
1436                         mb->ol_flags = pkt_flags;
1437                         mb->packet_type =
1438                                 ixgbe_rxd_pkt_info_to_pkt_type
1439                                         (pkt_info[j], rxq->pkt_type_mask);
1440
1441                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1442                                 mb->hash.rss = rte_le_to_cpu_32(
1443                                     rxdp[j].wb.lower.hi_dword.rss);
1444                         else if (pkt_flags & PKT_RX_FDIR) {
1445                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1446                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1447                                     IXGBE_ATR_HASH_MASK;
1448                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1449                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1450                         }
1451                 }
1452
1453                 /* Move mbuf pointers from the S/W ring to the stage */
1454                 for (j = 0; j < LOOK_AHEAD; ++j) {
1455                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1456                 }
1457
1458                 /* stop if all requested packets could not be received */
1459                 if (nb_dd != LOOK_AHEAD)
1460                         break;
1461         }
1462
1463         /* clear software ring entries so we can cleanup correctly */
1464         for (i = 0; i < nb_rx; ++i) {
1465                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1466         }
1467
1468
1469         return nb_rx;
1470 }
1471
1472 static inline int
1473 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1474 {
1475         volatile union ixgbe_adv_rx_desc *rxdp;
1476         struct ixgbe_rx_entry *rxep;
1477         struct rte_mbuf *mb;
1478         uint16_t alloc_idx;
1479         __le64 dma_addr;
1480         int diag, i;
1481
1482         /* allocate buffers in bulk directly into the S/W ring */
1483         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1484         rxep = &rxq->sw_ring[alloc_idx];
1485         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1486                                     rxq->rx_free_thresh);
1487         if (unlikely(diag != 0))
1488                 return -ENOMEM;
1489
1490         rxdp = &rxq->rx_ring[alloc_idx];
1491         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1492                 /* populate the static rte mbuf fields */
1493                 mb = rxep[i].mbuf;
1494                 if (reset_mbuf) {
1495                         mb->next = NULL;
1496                         mb->nb_segs = 1;
1497                         mb->port = rxq->port_id;
1498                 }
1499
1500                 rte_mbuf_refcnt_set(mb, 1);
1501                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1502
1503                 /* populate the descriptors */
1504                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1505                 rxdp[i].read.hdr_addr = 0;
1506                 rxdp[i].read.pkt_addr = dma_addr;
1507         }
1508
1509         /* update state of internal queue structure */
1510         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1511         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1512                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1513
1514         /* no errors */
1515         return 0;
1516 }
1517
1518 static inline uint16_t
1519 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1520                          uint16_t nb_pkts)
1521 {
1522         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1523         int i;
1524
1525         /* how many packets are ready to return? */
1526         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1527
1528         /* copy mbuf pointers to the application's packet list */
1529         for (i = 0; i < nb_pkts; ++i)
1530                 rx_pkts[i] = stage[i];
1531
1532         /* update internal queue state */
1533         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1534         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1535
1536         return nb_pkts;
1537 }
1538
1539 static inline uint16_t
1540 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1541              uint16_t nb_pkts)
1542 {
1543         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1544         uint16_t nb_rx = 0;
1545
1546         /* Any previously recv'd pkts will be returned from the Rx stage */
1547         if (rxq->rx_nb_avail)
1548                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1549
1550         /* Scan the H/W ring for packets to receive */
1551         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1552
1553         /* update internal queue state */
1554         rxq->rx_next_avail = 0;
1555         rxq->rx_nb_avail = nb_rx;
1556         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1557
1558         /* if required, allocate new buffers to replenish descriptors */
1559         if (rxq->rx_tail > rxq->rx_free_trigger) {
1560                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1561
1562                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1563                         int i, j;
1564
1565                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1566                                    "queue_id=%u", (unsigned) rxq->port_id,
1567                                    (unsigned) rxq->queue_id);
1568
1569                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1570                                 rxq->rx_free_thresh;
1571
1572                         /*
1573                          * Need to rewind any previous receives if we cannot
1574                          * allocate new buffers to replenish the old ones.
1575                          */
1576                         rxq->rx_nb_avail = 0;
1577                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1578                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1579                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1580
1581                         return 0;
1582                 }
1583
1584                 /* update tail pointer */
1585                 rte_wmb();
1586                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1587         }
1588
1589         if (rxq->rx_tail >= rxq->nb_rx_desc)
1590                 rxq->rx_tail = 0;
1591
1592         /* received any packets this loop? */
1593         if (rxq->rx_nb_avail)
1594                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1595
1596         return 0;
1597 }
1598
1599 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1600 uint16_t
1601 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1602                            uint16_t nb_pkts)
1603 {
1604         uint16_t nb_rx;
1605
1606         if (unlikely(nb_pkts == 0))
1607                 return 0;
1608
1609         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1610                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1611
1612         /* request is relatively large, chunk it up */
1613         nb_rx = 0;
1614         while (nb_pkts) {
1615                 uint16_t ret, n;
1616
1617                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1618                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1619                 nb_rx = (uint16_t)(nb_rx + ret);
1620                 nb_pkts = (uint16_t)(nb_pkts - ret);
1621                 if (ret < n)
1622                         break;
1623         }
1624
1625         return nb_rx;
1626 }
1627
1628 uint16_t
1629 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1630                 uint16_t nb_pkts)
1631 {
1632         struct ixgbe_rx_queue *rxq;
1633         volatile union ixgbe_adv_rx_desc *rx_ring;
1634         volatile union ixgbe_adv_rx_desc *rxdp;
1635         struct ixgbe_rx_entry *sw_ring;
1636         struct ixgbe_rx_entry *rxe;
1637         struct rte_mbuf *rxm;
1638         struct rte_mbuf *nmb;
1639         union ixgbe_adv_rx_desc rxd;
1640         uint64_t dma_addr;
1641         uint32_t staterr;
1642         uint32_t pkt_info;
1643         uint16_t pkt_len;
1644         uint16_t rx_id;
1645         uint16_t nb_rx;
1646         uint16_t nb_hold;
1647         uint64_t pkt_flags;
1648         uint64_t vlan_flags;
1649
1650         nb_rx = 0;
1651         nb_hold = 0;
1652         rxq = rx_queue;
1653         rx_id = rxq->rx_tail;
1654         rx_ring = rxq->rx_ring;
1655         sw_ring = rxq->sw_ring;
1656         vlan_flags = rxq->vlan_flags;
1657         while (nb_rx < nb_pkts) {
1658                 /*
1659                  * The order of operations here is important as the DD status
1660                  * bit must not be read after any other descriptor fields.
1661                  * rx_ring and rxdp are pointing to volatile data so the order
1662                  * of accesses cannot be reordered by the compiler. If they were
1663                  * not volatile, they could be reordered which could lead to
1664                  * using invalid descriptor fields when read from rxd.
1665                  */
1666                 rxdp = &rx_ring[rx_id];
1667                 staterr = rxdp->wb.upper.status_error;
1668                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1669                         break;
1670                 rxd = *rxdp;
1671
1672                 /*
1673                  * End of packet.
1674                  *
1675                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1676                  * is likely to be invalid and to be dropped by the various
1677                  * validation checks performed by the network stack.
1678                  *
1679                  * Allocate a new mbuf to replenish the RX ring descriptor.
1680                  * If the allocation fails:
1681                  *    - arrange for that RX descriptor to be the first one
1682                  *      being parsed the next time the receive function is
1683                  *      invoked [on the same queue].
1684                  *
1685                  *    - Stop parsing the RX ring and return immediately.
1686                  *
1687                  * This policy do not drop the packet received in the RX
1688                  * descriptor for which the allocation of a new mbuf failed.
1689                  * Thus, it allows that packet to be later retrieved if
1690                  * mbuf have been freed in the mean time.
1691                  * As a side effect, holding RX descriptors instead of
1692                  * systematically giving them back to the NIC may lead to
1693                  * RX ring exhaustion situations.
1694                  * However, the NIC can gracefully prevent such situations
1695                  * to happen by sending specific "back-pressure" flow control
1696                  * frames to its peer(s).
1697                  */
1698                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1699                            "ext_err_stat=0x%08x pkt_len=%u",
1700                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1701                            (unsigned) rx_id, (unsigned) staterr,
1702                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1703
1704                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1705                 if (nmb == NULL) {
1706                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1707                                    "queue_id=%u", (unsigned) rxq->port_id,
1708                                    (unsigned) rxq->queue_id);
1709                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1710                         break;
1711                 }
1712
1713                 nb_hold++;
1714                 rxe = &sw_ring[rx_id];
1715                 rx_id++;
1716                 if (rx_id == rxq->nb_rx_desc)
1717                         rx_id = 0;
1718
1719                 /* Prefetch next mbuf while processing current one. */
1720                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1721
1722                 /*
1723                  * When next RX descriptor is on a cache-line boundary,
1724                  * prefetch the next 4 RX descriptors and the next 8 pointers
1725                  * to mbufs.
1726                  */
1727                 if ((rx_id & 0x3) == 0) {
1728                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1729                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1730                 }
1731
1732                 rxm = rxe->mbuf;
1733                 rxe->mbuf = nmb;
1734                 dma_addr =
1735                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1736                 rxdp->read.hdr_addr = 0;
1737                 rxdp->read.pkt_addr = dma_addr;
1738
1739                 /*
1740                  * Initialize the returned mbuf.
1741                  * 1) setup generic mbuf fields:
1742                  *    - number of segments,
1743                  *    - next segment,
1744                  *    - packet length,
1745                  *    - RX port identifier.
1746                  * 2) integrate hardware offload data, if any:
1747                  *    - RSS flag & hash,
1748                  *    - IP checksum flag,
1749                  *    - VLAN TCI, if any,
1750                  *    - error flags.
1751                  */
1752                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1753                                       rxq->crc_len);
1754                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1755                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1756                 rxm->nb_segs = 1;
1757                 rxm->next = NULL;
1758                 rxm->pkt_len = pkt_len;
1759                 rxm->data_len = pkt_len;
1760                 rxm->port = rxq->port_id;
1761
1762                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1763                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1764                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1765
1766                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1767                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1768                 pkt_flags = pkt_flags |
1769                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1770                 rxm->ol_flags = pkt_flags;
1771                 rxm->packet_type =
1772                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1773                                                        rxq->pkt_type_mask);
1774
1775                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1776                         rxm->hash.rss = rte_le_to_cpu_32(
1777                                                 rxd.wb.lower.hi_dword.rss);
1778                 else if (pkt_flags & PKT_RX_FDIR) {
1779                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1780                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1781                                         IXGBE_ATR_HASH_MASK;
1782                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1783                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1784                 }
1785                 /*
1786                  * Store the mbuf address into the next entry of the array
1787                  * of returned packets.
1788                  */
1789                 rx_pkts[nb_rx++] = rxm;
1790         }
1791         rxq->rx_tail = rx_id;
1792
1793         /*
1794          * If the number of free RX descriptors is greater than the RX free
1795          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1796          * register.
1797          * Update the RDT with the value of the last processed RX descriptor
1798          * minus 1, to guarantee that the RDT register is never equal to the
1799          * RDH register, which creates a "full" ring situtation from the
1800          * hardware point of view...
1801          */
1802         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1803         if (nb_hold > rxq->rx_free_thresh) {
1804                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1805                            "nb_hold=%u nb_rx=%u",
1806                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1807                            (unsigned) rx_id, (unsigned) nb_hold,
1808                            (unsigned) nb_rx);
1809                 rx_id = (uint16_t) ((rx_id == 0) ?
1810                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1811                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1812                 nb_hold = 0;
1813         }
1814         rxq->nb_rx_hold = nb_hold;
1815         return nb_rx;
1816 }
1817
1818 /**
1819  * Detect an RSC descriptor.
1820  */
1821 static inline uint32_t
1822 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1823 {
1824         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1825                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1826 }
1827
1828 /**
1829  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1830  *
1831  * Fill the following info in the HEAD buffer of the Rx cluster:
1832  *    - RX port identifier
1833  *    - hardware offload data, if any:
1834  *      - RSS flag & hash
1835  *      - IP checksum flag
1836  *      - VLAN TCI, if any
1837  *      - error flags
1838  * @head HEAD of the packet cluster
1839  * @desc HW descriptor to get data from
1840  * @rxq Pointer to the Rx queue
1841  */
1842 static inline void
1843 ixgbe_fill_cluster_head_buf(
1844         struct rte_mbuf *head,
1845         union ixgbe_adv_rx_desc *desc,
1846         struct ixgbe_rx_queue *rxq,
1847         uint32_t staterr)
1848 {
1849         uint32_t pkt_info;
1850         uint64_t pkt_flags;
1851
1852         head->port = rxq->port_id;
1853
1854         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1855          * set in the pkt_flags field.
1856          */
1857         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1858         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1859         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1860         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1861         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1862         head->ol_flags = pkt_flags;
1863         head->packet_type =
1864                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1865
1866         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1867                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1868         else if (pkt_flags & PKT_RX_FDIR) {
1869                 head->hash.fdir.hash =
1870                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1871                                                           & IXGBE_ATR_HASH_MASK;
1872                 head->hash.fdir.id =
1873                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1874         }
1875 }
1876
1877 /**
1878  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1879  *
1880  * @rx_queue Rx queue handle
1881  * @rx_pkts table of received packets
1882  * @nb_pkts size of rx_pkts table
1883  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1884  *
1885  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1886  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1887  *
1888  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1889  * 1) When non-EOP RSC completion arrives:
1890  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1891  *       segment's data length.
1892  *    b) Set the "next" pointer of the current segment to point to the segment
1893  *       at the NEXTP index.
1894  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1895  *       in the sw_rsc_ring.
1896  * 2) When EOP arrives we just update the cluster's total length and offload
1897  *    flags and deliver the cluster up to the upper layers. In our case - put it
1898  *    in the rx_pkts table.
1899  *
1900  * Returns the number of received packets/clusters (according to the "bulk
1901  * receive" interface).
1902  */
1903 static inline uint16_t
1904 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1905                     bool bulk_alloc)
1906 {
1907         struct ixgbe_rx_queue *rxq = rx_queue;
1908         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1909         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1910         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1911         uint16_t rx_id = rxq->rx_tail;
1912         uint16_t nb_rx = 0;
1913         uint16_t nb_hold = rxq->nb_rx_hold;
1914         uint16_t prev_id = rxq->rx_tail;
1915
1916         while (nb_rx < nb_pkts) {
1917                 bool eop;
1918                 struct ixgbe_rx_entry *rxe;
1919                 struct ixgbe_scattered_rx_entry *sc_entry;
1920                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1921                 struct ixgbe_rx_entry *next_rxe = NULL;
1922                 struct rte_mbuf *first_seg;
1923                 struct rte_mbuf *rxm;
1924                 struct rte_mbuf *nmb;
1925                 union ixgbe_adv_rx_desc rxd;
1926                 uint16_t data_len;
1927                 uint16_t next_id;
1928                 volatile union ixgbe_adv_rx_desc *rxdp;
1929                 uint32_t staterr;
1930
1931 next_desc:
1932                 /*
1933                  * The code in this whole file uses the volatile pointer to
1934                  * ensure the read ordering of the status and the rest of the
1935                  * descriptor fields (on the compiler level only!!!). This is so
1936                  * UGLY - why not to just use the compiler barrier instead? DPDK
1937                  * even has the rte_compiler_barrier() for that.
1938                  *
1939                  * But most importantly this is just wrong because this doesn't
1940                  * ensure memory ordering in a general case at all. For
1941                  * instance, DPDK is supposed to work on Power CPUs where
1942                  * compiler barrier may just not be enough!
1943                  *
1944                  * I tried to write only this function properly to have a
1945                  * starting point (as a part of an LRO/RSC series) but the
1946                  * compiler cursed at me when I tried to cast away the
1947                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1948                  * keeping it the way it is for now.
1949                  *
1950                  * The code in this file is broken in so many other places and
1951                  * will just not work on a big endian CPU anyway therefore the
1952                  * lines below will have to be revisited together with the rest
1953                  * of the ixgbe PMD.
1954                  *
1955                  * TODO:
1956                  *    - Get rid of "volatile" crap and let the compiler do its
1957                  *      job.
1958                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1959                  *      memory ordering below.
1960                  */
1961                 rxdp = &rx_ring[rx_id];
1962                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1963
1964                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1965                         break;
1966
1967                 rxd = *rxdp;
1968
1969                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1970                                   "staterr=0x%x data_len=%u",
1971                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1972                            rte_le_to_cpu_16(rxd.wb.upper.length));
1973
1974                 if (!bulk_alloc) {
1975                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1976                         if (nmb == NULL) {
1977                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1978                                                   "port_id=%u queue_id=%u",
1979                                            rxq->port_id, rxq->queue_id);
1980
1981                                 rte_eth_devices[rxq->port_id].data->
1982                                                         rx_mbuf_alloc_failed++;
1983                                 break;
1984                         }
1985                 } else if (nb_hold > rxq->rx_free_thresh) {
1986                         uint16_t next_rdt = rxq->rx_free_trigger;
1987
1988                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1989                                 rte_wmb();
1990                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1991                                                     next_rdt);
1992                                 nb_hold -= rxq->rx_free_thresh;
1993                         } else {
1994                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1995                                                   "port_id=%u queue_id=%u",
1996                                            rxq->port_id, rxq->queue_id);
1997
1998                                 rte_eth_devices[rxq->port_id].data->
1999                                                         rx_mbuf_alloc_failed++;
2000                                 break;
2001                         }
2002                 }
2003
2004                 nb_hold++;
2005                 rxe = &sw_ring[rx_id];
2006                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2007
2008                 next_id = rx_id + 1;
2009                 if (next_id == rxq->nb_rx_desc)
2010                         next_id = 0;
2011
2012                 /* Prefetch next mbuf while processing current one. */
2013                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2014
2015                 /*
2016                  * When next RX descriptor is on a cache-line boundary,
2017                  * prefetch the next 4 RX descriptors and the next 4 pointers
2018                  * to mbufs.
2019                  */
2020                 if ((next_id & 0x3) == 0) {
2021                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2022                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2023                 }
2024
2025                 rxm = rxe->mbuf;
2026
2027                 if (!bulk_alloc) {
2028                         __le64 dma =
2029                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2030                         /*
2031                          * Update RX descriptor with the physical address of the
2032                          * new data buffer of the new allocated mbuf.
2033                          */
2034                         rxe->mbuf = nmb;
2035
2036                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2037                         rxdp->read.hdr_addr = 0;
2038                         rxdp->read.pkt_addr = dma;
2039                 } else
2040                         rxe->mbuf = NULL;
2041
2042                 /*
2043                  * Set data length & data buffer address of mbuf.
2044                  */
2045                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2046                 rxm->data_len = data_len;
2047
2048                 if (!eop) {
2049                         uint16_t nextp_id;
2050                         /*
2051                          * Get next descriptor index:
2052                          *  - For RSC it's in the NEXTP field.
2053                          *  - For a scattered packet - it's just a following
2054                          *    descriptor.
2055                          */
2056                         if (ixgbe_rsc_count(&rxd))
2057                                 nextp_id =
2058                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2059                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2060                         else
2061                                 nextp_id = next_id;
2062
2063                         next_sc_entry = &sw_sc_ring[nextp_id];
2064                         next_rxe = &sw_ring[nextp_id];
2065                         rte_ixgbe_prefetch(next_rxe);
2066                 }
2067
2068                 sc_entry = &sw_sc_ring[rx_id];
2069                 first_seg = sc_entry->fbuf;
2070                 sc_entry->fbuf = NULL;
2071
2072                 /*
2073                  * If this is the first buffer of the received packet,
2074                  * set the pointer to the first mbuf of the packet and
2075                  * initialize its context.
2076                  * Otherwise, update the total length and the number of segments
2077                  * of the current scattered packet, and update the pointer to
2078                  * the last mbuf of the current packet.
2079                  */
2080                 if (first_seg == NULL) {
2081                         first_seg = rxm;
2082                         first_seg->pkt_len = data_len;
2083                         first_seg->nb_segs = 1;
2084                 } else {
2085                         first_seg->pkt_len += data_len;
2086                         first_seg->nb_segs++;
2087                 }
2088
2089                 prev_id = rx_id;
2090                 rx_id = next_id;
2091
2092                 /*
2093                  * If this is not the last buffer of the received packet, update
2094                  * the pointer to the first mbuf at the NEXTP entry in the
2095                  * sw_sc_ring and continue to parse the RX ring.
2096                  */
2097                 if (!eop && next_rxe) {
2098                         rxm->next = next_rxe->mbuf;
2099                         next_sc_entry->fbuf = first_seg;
2100                         goto next_desc;
2101                 }
2102
2103                 /*
2104                  * This is the last buffer of the received packet - return
2105                  * the current cluster to the user.
2106                  */
2107                 rxm->next = NULL;
2108
2109                 /* Initialize the first mbuf of the returned packet */
2110                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2111
2112                 /*
2113                  * Deal with the case, when HW CRC srip is disabled.
2114                  * That can't happen when LRO is enabled, but still could
2115                  * happen for scattered RX mode.
2116                  */
2117                 first_seg->pkt_len -= rxq->crc_len;
2118                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2119                         struct rte_mbuf *lp;
2120
2121                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2122                                 ;
2123
2124                         first_seg->nb_segs--;
2125                         lp->data_len -= rxq->crc_len - rxm->data_len;
2126                         lp->next = NULL;
2127                         rte_pktmbuf_free_seg(rxm);
2128                 } else
2129                         rxm->data_len -= rxq->crc_len;
2130
2131                 /* Prefetch data of first segment, if configured to do so. */
2132                 rte_packet_prefetch((char *)first_seg->buf_addr +
2133                         first_seg->data_off);
2134
2135                 /*
2136                  * Store the mbuf address into the next entry of the array
2137                  * of returned packets.
2138                  */
2139                 rx_pkts[nb_rx++] = first_seg;
2140         }
2141
2142         /*
2143          * Record index of the next RX descriptor to probe.
2144          */
2145         rxq->rx_tail = rx_id;
2146
2147         /*
2148          * If the number of free RX descriptors is greater than the RX free
2149          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2150          * register.
2151          * Update the RDT with the value of the last processed RX descriptor
2152          * minus 1, to guarantee that the RDT register is never equal to the
2153          * RDH register, which creates a "full" ring situtation from the
2154          * hardware point of view...
2155          */
2156         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2157                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2158                            "nb_hold=%u nb_rx=%u",
2159                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2160
2161                 rte_wmb();
2162                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
2163                 nb_hold = 0;
2164         }
2165
2166         rxq->nb_rx_hold = nb_hold;
2167         return nb_rx;
2168 }
2169
2170 uint16_t
2171 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2172                                  uint16_t nb_pkts)
2173 {
2174         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2175 }
2176
2177 uint16_t
2178 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2179                                uint16_t nb_pkts)
2180 {
2181         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2182 }
2183
2184 /*********************************************************************
2185  *
2186  *  Queue management functions
2187  *
2188  **********************************************************************/
2189
2190 static void __attribute__((cold))
2191 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2192 {
2193         unsigned i;
2194
2195         if (txq->sw_ring != NULL) {
2196                 for (i = 0; i < txq->nb_tx_desc; i++) {
2197                         if (txq->sw_ring[i].mbuf != NULL) {
2198                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2199                                 txq->sw_ring[i].mbuf = NULL;
2200                         }
2201                 }
2202         }
2203 }
2204
2205 static void __attribute__((cold))
2206 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2207 {
2208         if (txq != NULL &&
2209             txq->sw_ring != NULL)
2210                 rte_free(txq->sw_ring);
2211 }
2212
2213 static void __attribute__((cold))
2214 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2215 {
2216         if (txq != NULL && txq->ops != NULL) {
2217                 txq->ops->release_mbufs(txq);
2218                 txq->ops->free_swring(txq);
2219                 rte_free(txq);
2220         }
2221 }
2222
2223 void __attribute__((cold))
2224 ixgbe_dev_tx_queue_release(void *txq)
2225 {
2226         ixgbe_tx_queue_release(txq);
2227 }
2228
2229 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2230 static void __attribute__((cold))
2231 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2232 {
2233         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2234         struct ixgbe_tx_entry *txe = txq->sw_ring;
2235         uint16_t prev, i;
2236
2237         /* Zero out HW ring memory */
2238         for (i = 0; i < txq->nb_tx_desc; i++) {
2239                 txq->tx_ring[i] = zeroed_desc;
2240         }
2241
2242         /* Initialize SW ring entries */
2243         prev = (uint16_t) (txq->nb_tx_desc - 1);
2244         for (i = 0; i < txq->nb_tx_desc; i++) {
2245                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2246
2247                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2248                 txe[i].mbuf = NULL;
2249                 txe[i].last_id = i;
2250                 txe[prev].next_id = i;
2251                 prev = i;
2252         }
2253
2254         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2255         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2256
2257         txq->tx_tail = 0;
2258         txq->nb_tx_used = 0;
2259         /*
2260          * Always allow 1 descriptor to be un-allocated to avoid
2261          * a H/W race condition
2262          */
2263         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2264         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2265         txq->ctx_curr = 0;
2266         memset((void *)&txq->ctx_cache, 0,
2267                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2268 }
2269
2270 static const struct ixgbe_txq_ops def_txq_ops = {
2271         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2272         .free_swring = ixgbe_tx_free_swring,
2273         .reset = ixgbe_reset_tx_queue,
2274 };
2275
2276 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2277  * the queue parameters. Used in tx_queue_setup by primary process and then
2278  * in dev_init by secondary process when attaching to an existing ethdev.
2279  */
2280 void __attribute__((cold))
2281 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2282 {
2283         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2284         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2285                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2286                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2287 #ifdef RTE_IXGBE_INC_VECTOR
2288                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2289                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2290                                         ixgbe_txq_vec_setup(txq) == 0)) {
2291                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2292                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2293                 } else
2294 #endif
2295                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2296         } else {
2297                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2298                 PMD_INIT_LOG(DEBUG,
2299                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2300                                 (unsigned long)txq->txq_flags,
2301                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2302                 PMD_INIT_LOG(DEBUG,
2303                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2304                                 (unsigned long)txq->tx_rs_thresh,
2305                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2306                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2307         }
2308 }
2309
2310 int __attribute__((cold))
2311 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2312                          uint16_t queue_idx,
2313                          uint16_t nb_desc,
2314                          unsigned int socket_id,
2315                          const struct rte_eth_txconf *tx_conf)
2316 {
2317         const struct rte_memzone *tz;
2318         struct ixgbe_tx_queue *txq;
2319         struct ixgbe_hw     *hw;
2320         uint16_t tx_rs_thresh, tx_free_thresh;
2321
2322         PMD_INIT_FUNC_TRACE();
2323         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2324
2325         /*
2326          * Validate number of transmit descriptors.
2327          * It must not exceed hardware maximum, and must be multiple
2328          * of IXGBE_ALIGN.
2329          */
2330         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2331                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2332                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2333                 return -EINVAL;
2334         }
2335
2336         /*
2337          * The following two parameters control the setting of the RS bit on
2338          * transmit descriptors.
2339          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2340          * descriptors have been used.
2341          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2342          * descriptors are used or if the number of descriptors required
2343          * to transmit a packet is greater than the number of free TX
2344          * descriptors.
2345          * The following constraints must be satisfied:
2346          *  tx_rs_thresh must be greater than 0.
2347          *  tx_rs_thresh must be less than the size of the ring minus 2.
2348          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2349          *  tx_rs_thresh must be a divisor of the ring size.
2350          *  tx_free_thresh must be greater than 0.
2351          *  tx_free_thresh must be less than the size of the ring minus 3.
2352          * One descriptor in the TX ring is used as a sentinel to avoid a
2353          * H/W race condition, hence the maximum threshold constraints.
2354          * When set to zero use default values.
2355          */
2356         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2357                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2358         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2359                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2360         if (tx_rs_thresh >= (nb_desc - 2)) {
2361                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2362                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2363                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2364                         (int)dev->data->port_id, (int)queue_idx);
2365                 return -(EINVAL);
2366         }
2367         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2368                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2369                         "(tx_rs_thresh=%u port=%d queue=%d)",
2370                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2371                         (int)dev->data->port_id, (int)queue_idx);
2372                 return -(EINVAL);
2373         }
2374         if (tx_free_thresh >= (nb_desc - 3)) {
2375                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2376                              "tx_free_thresh must be less than the number of "
2377                              "TX descriptors minus 3. (tx_free_thresh=%u "
2378                              "port=%d queue=%d)",
2379                              (unsigned int)tx_free_thresh,
2380                              (int)dev->data->port_id, (int)queue_idx);
2381                 return -(EINVAL);
2382         }
2383         if (tx_rs_thresh > tx_free_thresh) {
2384                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2385                              "tx_free_thresh. (tx_free_thresh=%u "
2386                              "tx_rs_thresh=%u port=%d queue=%d)",
2387                              (unsigned int)tx_free_thresh,
2388                              (unsigned int)tx_rs_thresh,
2389                              (int)dev->data->port_id,
2390                              (int)queue_idx);
2391                 return -(EINVAL);
2392         }
2393         if ((nb_desc % tx_rs_thresh) != 0) {
2394                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2395                              "number of TX descriptors. (tx_rs_thresh=%u "
2396                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2397                              (int)dev->data->port_id, (int)queue_idx);
2398                 return -(EINVAL);
2399         }
2400
2401         /*
2402          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2403          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2404          * by the NIC and all descriptors are written back after the NIC
2405          * accumulates WTHRESH descriptors.
2406          */
2407         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2408                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2409                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2410                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2411                              (int)dev->data->port_id, (int)queue_idx);
2412                 return -(EINVAL);
2413         }
2414
2415         /* Free memory prior to re-allocation if needed... */
2416         if (dev->data->tx_queues[queue_idx] != NULL) {
2417                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2418                 dev->data->tx_queues[queue_idx] = NULL;
2419         }
2420
2421         /* First allocate the tx queue data structure */
2422         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2423                                  RTE_CACHE_LINE_SIZE, socket_id);
2424         if (txq == NULL)
2425                 return -ENOMEM;
2426
2427         /*
2428          * Allocate TX ring hardware descriptors. A memzone large enough to
2429          * handle the maximum ring size is allocated in order to allow for
2430          * resizing in later calls to the queue setup function.
2431          */
2432         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2433                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2434                         IXGBE_ALIGN, socket_id);
2435         if (tz == NULL) {
2436                 ixgbe_tx_queue_release(txq);
2437                 return -ENOMEM;
2438         }
2439
2440         txq->nb_tx_desc = nb_desc;
2441         txq->tx_rs_thresh = tx_rs_thresh;
2442         txq->tx_free_thresh = tx_free_thresh;
2443         txq->pthresh = tx_conf->tx_thresh.pthresh;
2444         txq->hthresh = tx_conf->tx_thresh.hthresh;
2445         txq->wthresh = tx_conf->tx_thresh.wthresh;
2446         txq->queue_id = queue_idx;
2447         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2448                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2449         txq->port_id = dev->data->port_id;
2450         txq->txq_flags = tx_conf->txq_flags;
2451         txq->ops = &def_txq_ops;
2452         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2453
2454         /*
2455          * Modification to set VFTDT for virtual function if vf is detected
2456          */
2457         if (hw->mac.type == ixgbe_mac_82599_vf ||
2458             hw->mac.type == ixgbe_mac_X540_vf ||
2459             hw->mac.type == ixgbe_mac_X550_vf ||
2460             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2461             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2462                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2463         else
2464                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2465
2466         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2467         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2468
2469         /* Allocate software ring */
2470         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2471                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2472                                 RTE_CACHE_LINE_SIZE, socket_id);
2473         if (txq->sw_ring == NULL) {
2474                 ixgbe_tx_queue_release(txq);
2475                 return -ENOMEM;
2476         }
2477         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2478                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2479
2480         /* set up vector or scalar TX function as appropriate */
2481         ixgbe_set_tx_function(dev, txq);
2482
2483         txq->ops->reset(txq);
2484
2485         dev->data->tx_queues[queue_idx] = txq;
2486
2487
2488         return 0;
2489 }
2490
2491 /**
2492  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2493  *
2494  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2495  * in the sw_rsc_ring is not set to NULL but rather points to the next
2496  * mbuf of this RSC aggregation (that has not been completed yet and still
2497  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2498  * will just free first "nb_segs" segments of the cluster explicitly by calling
2499  * an rte_pktmbuf_free_seg().
2500  *
2501  * @m scattered cluster head
2502  */
2503 static void __attribute__((cold))
2504 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2505 {
2506         uint8_t i, nb_segs = m->nb_segs;
2507         struct rte_mbuf *next_seg;
2508
2509         for (i = 0; i < nb_segs; i++) {
2510                 next_seg = m->next;
2511                 rte_pktmbuf_free_seg(m);
2512                 m = next_seg;
2513         }
2514 }
2515
2516 static void __attribute__((cold))
2517 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2518 {
2519         unsigned i;
2520
2521 #ifdef RTE_IXGBE_INC_VECTOR
2522         /* SSE Vector driver has a different way of releasing mbufs. */
2523         if (rxq->rx_using_sse) {
2524                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2525                 return;
2526         }
2527 #endif
2528
2529         if (rxq->sw_ring != NULL) {
2530                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2531                         if (rxq->sw_ring[i].mbuf != NULL) {
2532                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2533                                 rxq->sw_ring[i].mbuf = NULL;
2534                         }
2535                 }
2536                 if (rxq->rx_nb_avail) {
2537                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2538                                 struct rte_mbuf *mb;
2539
2540                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2541                                 rte_pktmbuf_free_seg(mb);
2542                         }
2543                         rxq->rx_nb_avail = 0;
2544                 }
2545         }
2546
2547         if (rxq->sw_sc_ring)
2548                 for (i = 0; i < rxq->nb_rx_desc; i++)
2549                         if (rxq->sw_sc_ring[i].fbuf) {
2550                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2551                                 rxq->sw_sc_ring[i].fbuf = NULL;
2552                         }
2553 }
2554
2555 static void __attribute__((cold))
2556 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2557 {
2558         if (rxq != NULL) {
2559                 ixgbe_rx_queue_release_mbufs(rxq);
2560                 rte_free(rxq->sw_ring);
2561                 rte_free(rxq->sw_sc_ring);
2562                 rte_free(rxq);
2563         }
2564 }
2565
2566 void __attribute__((cold))
2567 ixgbe_dev_rx_queue_release(void *rxq)
2568 {
2569         ixgbe_rx_queue_release(rxq);
2570 }
2571
2572 /*
2573  * Check if Rx Burst Bulk Alloc function can be used.
2574  * Return
2575  *        0: the preconditions are satisfied and the bulk allocation function
2576  *           can be used.
2577  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2578  *           function must be used.
2579  */
2580 static inline int __attribute__((cold))
2581 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2582 {
2583         int ret = 0;
2584
2585         /*
2586          * Make sure the following pre-conditions are satisfied:
2587          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2588          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2589          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2590          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2591          * Scattered packets are not supported.  This should be checked
2592          * outside of this function.
2593          */
2594         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2595                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2596                              "rxq->rx_free_thresh=%d, "
2597                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2598                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2599                 ret = -EINVAL;
2600         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2601                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2602                              "rxq->rx_free_thresh=%d, "
2603                              "rxq->nb_rx_desc=%d",
2604                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2605                 ret = -EINVAL;
2606         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2607                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2608                              "rxq->nb_rx_desc=%d, "
2609                              "rxq->rx_free_thresh=%d",
2610                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2611                 ret = -EINVAL;
2612         } else if (!(rxq->nb_rx_desc <
2613                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2614                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2615                              "rxq->nb_rx_desc=%d, "
2616                              "IXGBE_MAX_RING_DESC=%d, "
2617                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2618                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2619                              RTE_PMD_IXGBE_RX_MAX_BURST);
2620                 ret = -EINVAL;
2621         }
2622
2623         return ret;
2624 }
2625
2626 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2627 static void __attribute__((cold))
2628 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2629 {
2630         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2631         unsigned i;
2632         uint16_t len = rxq->nb_rx_desc;
2633
2634         /*
2635          * By default, the Rx queue setup function allocates enough memory for
2636          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2637          * extra memory at the end of the descriptor ring to be zero'd out. A
2638          * pre-condition for using the Rx burst bulk alloc function is that the
2639          * number of descriptors is less than or equal to
2640          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2641          * constraints here to see if we need to zero out memory after the end
2642          * of the H/W descriptor ring.
2643          */
2644         if (adapter->rx_bulk_alloc_allowed)
2645                 /* zero out extra memory */
2646                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2647
2648         /*
2649          * Zero out HW ring memory. Zero out extra memory at the end of
2650          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2651          * reads extra memory as zeros.
2652          */
2653         for (i = 0; i < len; i++) {
2654                 rxq->rx_ring[i] = zeroed_desc;
2655         }
2656
2657         /*
2658          * initialize extra software ring entries. Space for these extra
2659          * entries is always allocated
2660          */
2661         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2662         for (i = rxq->nb_rx_desc; i < len; ++i) {
2663                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2664         }
2665
2666         rxq->rx_nb_avail = 0;
2667         rxq->rx_next_avail = 0;
2668         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2669         rxq->rx_tail = 0;
2670         rxq->nb_rx_hold = 0;
2671         rxq->pkt_first_seg = NULL;
2672         rxq->pkt_last_seg = NULL;
2673
2674 #ifdef RTE_IXGBE_INC_VECTOR
2675         rxq->rxrearm_start = 0;
2676         rxq->rxrearm_nb = 0;
2677 #endif
2678 }
2679
2680 int __attribute__((cold))
2681 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2682                          uint16_t queue_idx,
2683                          uint16_t nb_desc,
2684                          unsigned int socket_id,
2685                          const struct rte_eth_rxconf *rx_conf,
2686                          struct rte_mempool *mp)
2687 {
2688         const struct rte_memzone *rz;
2689         struct ixgbe_rx_queue *rxq;
2690         struct ixgbe_hw     *hw;
2691         uint16_t len;
2692         struct ixgbe_adapter *adapter =
2693                 (struct ixgbe_adapter *)dev->data->dev_private;
2694
2695         PMD_INIT_FUNC_TRACE();
2696         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2697
2698         /*
2699          * Validate number of receive descriptors.
2700          * It must not exceed hardware maximum, and must be multiple
2701          * of IXGBE_ALIGN.
2702          */
2703         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2704                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2705                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2706                 return -EINVAL;
2707         }
2708
2709         /* Free memory prior to re-allocation if needed... */
2710         if (dev->data->rx_queues[queue_idx] != NULL) {
2711                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2712                 dev->data->rx_queues[queue_idx] = NULL;
2713         }
2714
2715         /* First allocate the rx queue data structure */
2716         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2717                                  RTE_CACHE_LINE_SIZE, socket_id);
2718         if (rxq == NULL)
2719                 return -ENOMEM;
2720         rxq->mb_pool = mp;
2721         rxq->nb_rx_desc = nb_desc;
2722         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2723         rxq->queue_id = queue_idx;
2724         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2725                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2726         rxq->port_id = dev->data->port_id;
2727         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2728                                                         0 : ETHER_CRC_LEN);
2729         rxq->drop_en = rx_conf->rx_drop_en;
2730         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2731
2732         /*
2733          * The packet type in RX descriptor is different for different NICs.
2734          * Some bits are used for x550 but reserved for other NICS.
2735          * So set different masks for different NICs.
2736          */
2737         if (hw->mac.type == ixgbe_mac_X550 ||
2738             hw->mac.type == ixgbe_mac_X550EM_x ||
2739             hw->mac.type == ixgbe_mac_X550EM_a ||
2740             hw->mac.type == ixgbe_mac_X550_vf ||
2741             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2742             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2743                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2744         else
2745                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2746
2747         /*
2748          * Allocate RX ring hardware descriptors. A memzone large enough to
2749          * handle the maximum ring size is allocated in order to allow for
2750          * resizing in later calls to the queue setup function.
2751          */
2752         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2753                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2754         if (rz == NULL) {
2755                 ixgbe_rx_queue_release(rxq);
2756                 return -ENOMEM;
2757         }
2758
2759         /*
2760          * Zero init all the descriptors in the ring.
2761          */
2762         memset(rz->addr, 0, RX_RING_SZ);
2763
2764         /*
2765          * Modified to setup VFRDT for Virtual Function
2766          */
2767         if (hw->mac.type == ixgbe_mac_82599_vf ||
2768             hw->mac.type == ixgbe_mac_X540_vf ||
2769             hw->mac.type == ixgbe_mac_X550_vf ||
2770             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2771             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2772                 rxq->rdt_reg_addr =
2773                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2774                 rxq->rdh_reg_addr =
2775                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2776         } else {
2777                 rxq->rdt_reg_addr =
2778                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2779                 rxq->rdh_reg_addr =
2780                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2781         }
2782
2783         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2784         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2785
2786         /*
2787          * Certain constraints must be met in order to use the bulk buffer
2788          * allocation Rx burst function. If any of Rx queues doesn't meet them
2789          * the feature should be disabled for the whole port.
2790          */
2791         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2792                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2793                                     "preconditions - canceling the feature for "
2794                                     "the whole port[%d]",
2795                              rxq->queue_id, rxq->port_id);
2796                 adapter->rx_bulk_alloc_allowed = false;
2797         }
2798
2799         /*
2800          * Allocate software ring. Allow for space at the end of the
2801          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2802          * function does not access an invalid memory region.
2803          */
2804         len = nb_desc;
2805         if (adapter->rx_bulk_alloc_allowed)
2806                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2807
2808         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2809                                           sizeof(struct ixgbe_rx_entry) * len,
2810                                           RTE_CACHE_LINE_SIZE, socket_id);
2811         if (!rxq->sw_ring) {
2812                 ixgbe_rx_queue_release(rxq);
2813                 return -ENOMEM;
2814         }
2815
2816         /*
2817          * Always allocate even if it's not going to be needed in order to
2818          * simplify the code.
2819          *
2820          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2821          * be requested in ixgbe_dev_rx_init(), which is called later from
2822          * dev_start() flow.
2823          */
2824         rxq->sw_sc_ring =
2825                 rte_zmalloc_socket("rxq->sw_sc_ring",
2826                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2827                                    RTE_CACHE_LINE_SIZE, socket_id);
2828         if (!rxq->sw_sc_ring) {
2829                 ixgbe_rx_queue_release(rxq);
2830                 return -ENOMEM;
2831         }
2832
2833         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2834                             "dma_addr=0x%"PRIx64,
2835                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2836                      rxq->rx_ring_phys_addr);
2837
2838         if (!rte_is_power_of_2(nb_desc)) {
2839                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2840                                     "preconditions - canceling the feature for "
2841                                     "the whole port[%d]",
2842                              rxq->queue_id, rxq->port_id);
2843                 adapter->rx_vec_allowed = false;
2844         } else
2845                 ixgbe_rxq_vec_setup(rxq);
2846
2847         dev->data->rx_queues[queue_idx] = rxq;
2848
2849         ixgbe_reset_rx_queue(adapter, rxq);
2850
2851         return 0;
2852 }
2853
2854 uint32_t
2855 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2856 {
2857 #define IXGBE_RXQ_SCAN_INTERVAL 4
2858         volatile union ixgbe_adv_rx_desc *rxdp;
2859         struct ixgbe_rx_queue *rxq;
2860         uint32_t desc = 0;
2861
2862         if (rx_queue_id >= dev->data->nb_rx_queues) {
2863                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2864                 return 0;
2865         }
2866
2867         rxq = dev->data->rx_queues[rx_queue_id];
2868         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2869
2870         while ((desc < rxq->nb_rx_desc) &&
2871                 (rxdp->wb.upper.status_error &
2872                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2873                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2874                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2875                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2876                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2877                                 desc - rxq->nb_rx_desc]);
2878         }
2879
2880         return desc;
2881 }
2882
2883 int
2884 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2885 {
2886         volatile union ixgbe_adv_rx_desc *rxdp;
2887         struct ixgbe_rx_queue *rxq = rx_queue;
2888         uint32_t desc;
2889
2890         if (unlikely(offset >= rxq->nb_rx_desc))
2891                 return 0;
2892         desc = rxq->rx_tail + offset;
2893         if (desc >= rxq->nb_rx_desc)
2894                 desc -= rxq->nb_rx_desc;
2895
2896         rxdp = &rxq->rx_ring[desc];
2897         return !!(rxdp->wb.upper.status_error &
2898                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2899 }
2900
2901 void __attribute__((cold))
2902 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2903 {
2904         unsigned i;
2905         struct ixgbe_adapter *adapter =
2906                 (struct ixgbe_adapter *)dev->data->dev_private;
2907
2908         PMD_INIT_FUNC_TRACE();
2909
2910         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2911                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2912
2913                 if (txq != NULL) {
2914                         txq->ops->release_mbufs(txq);
2915                         txq->ops->reset(txq);
2916                 }
2917         }
2918
2919         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2920                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2921
2922                 if (rxq != NULL) {
2923                         ixgbe_rx_queue_release_mbufs(rxq);
2924                         ixgbe_reset_rx_queue(adapter, rxq);
2925                 }
2926         }
2927 }
2928
2929 void
2930 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2931 {
2932         unsigned i;
2933
2934         PMD_INIT_FUNC_TRACE();
2935
2936         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2937                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2938                 dev->data->rx_queues[i] = NULL;
2939         }
2940         dev->data->nb_rx_queues = 0;
2941
2942         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2943                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2944                 dev->data->tx_queues[i] = NULL;
2945         }
2946         dev->data->nb_tx_queues = 0;
2947 }
2948
2949 /*********************************************************************
2950  *
2951  *  Device RX/TX init functions
2952  *
2953  **********************************************************************/
2954
2955 /**
2956  * Receive Side Scaling (RSS)
2957  * See section 7.1.2.8 in the following document:
2958  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2959  *
2960  * Principles:
2961  * The source and destination IP addresses of the IP header and the source
2962  * and destination ports of TCP/UDP headers, if any, of received packets are
2963  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2964  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2965  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2966  * RSS output index which is used as the RX queue index where to store the
2967  * received packets.
2968  * The following output is supplied in the RX write-back descriptor:
2969  *     - 32-bit result of the Microsoft RSS hash function,
2970  *     - 4-bit RSS type field.
2971  */
2972
2973 /*
2974  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2975  * Used as the default key.
2976  */
2977 static uint8_t rss_intel_key[40] = {
2978         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2979         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2980         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2981         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2982         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2983 };
2984
2985 static void
2986 ixgbe_rss_disable(struct rte_eth_dev *dev)
2987 {
2988         struct ixgbe_hw *hw;
2989         uint32_t mrqc;
2990         uint32_t mrqc_reg;
2991
2992         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2993         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2994         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2995         mrqc &= ~IXGBE_MRQC_RSSEN;
2996         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2997 }
2998
2999 static void
3000 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3001 {
3002         uint8_t  *hash_key;
3003         uint32_t mrqc;
3004         uint32_t rss_key;
3005         uint64_t rss_hf;
3006         uint16_t i;
3007         uint32_t mrqc_reg;
3008         uint32_t rssrk_reg;
3009
3010         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3011         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3012
3013         hash_key = rss_conf->rss_key;
3014         if (hash_key != NULL) {
3015                 /* Fill in RSS hash key */
3016                 for (i = 0; i < 10; i++) {
3017                         rss_key  = hash_key[(i * 4)];
3018                         rss_key |= hash_key[(i * 4) + 1] << 8;
3019                         rss_key |= hash_key[(i * 4) + 2] << 16;
3020                         rss_key |= hash_key[(i * 4) + 3] << 24;
3021                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3022                 }
3023         }
3024
3025         /* Set configured hashing protocols in MRQC register */
3026         rss_hf = rss_conf->rss_hf;
3027         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3028         if (rss_hf & ETH_RSS_IPV4)
3029                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3030         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3031                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3032         if (rss_hf & ETH_RSS_IPV6)
3033                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3034         if (rss_hf & ETH_RSS_IPV6_EX)
3035                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3036         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3037                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3038         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3039                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3040         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3041                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3042         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3043                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3044         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3045                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3046         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3047 }
3048
3049 int
3050 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3051                           struct rte_eth_rss_conf *rss_conf)
3052 {
3053         struct ixgbe_hw *hw;
3054         uint32_t mrqc;
3055         uint64_t rss_hf;
3056         uint32_t mrqc_reg;
3057
3058         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3059
3060         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3061                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3062                         "NIC.");
3063                 return -ENOTSUP;
3064         }
3065         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3066
3067         /*
3068          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3069          *     "RSS enabling cannot be done dynamically while it must be
3070          *      preceded by a software reset"
3071          * Before changing anything, first check that the update RSS operation
3072          * does not attempt to disable RSS, if RSS was enabled at
3073          * initialization time, or does not attempt to enable RSS, if RSS was
3074          * disabled at initialization time.
3075          */
3076         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3077         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3078         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3079                 if (rss_hf != 0) /* Enable RSS */
3080                         return -(EINVAL);
3081                 return 0; /* Nothing to do */
3082         }
3083         /* RSS enabled */
3084         if (rss_hf == 0) /* Disable RSS */
3085                 return -(EINVAL);
3086         ixgbe_hw_rss_hash_set(hw, rss_conf);
3087         return 0;
3088 }
3089
3090 int
3091 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3092                             struct rte_eth_rss_conf *rss_conf)
3093 {
3094         struct ixgbe_hw *hw;
3095         uint8_t *hash_key;
3096         uint32_t mrqc;
3097         uint32_t rss_key;
3098         uint64_t rss_hf;
3099         uint16_t i;
3100         uint32_t mrqc_reg;
3101         uint32_t rssrk_reg;
3102
3103         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3104         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3105         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3106         hash_key = rss_conf->rss_key;
3107         if (hash_key != NULL) {
3108                 /* Return RSS hash key */
3109                 for (i = 0; i < 10; i++) {
3110                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3111                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3112                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3113                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3114                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3115                 }
3116         }
3117
3118         /* Get RSS functions configured in MRQC register */
3119         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3120         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3121                 rss_conf->rss_hf = 0;
3122                 return 0;
3123         }
3124         rss_hf = 0;
3125         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3126                 rss_hf |= ETH_RSS_IPV4;
3127         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3128                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3129         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3130                 rss_hf |= ETH_RSS_IPV6;
3131         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3132                 rss_hf |= ETH_RSS_IPV6_EX;
3133         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3134                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3135         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3136                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3137         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3138                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3139         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3140                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3141         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3142                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3143         rss_conf->rss_hf = rss_hf;
3144         return 0;
3145 }
3146
3147 static void
3148 ixgbe_rss_configure(struct rte_eth_dev *dev)
3149 {
3150         struct rte_eth_rss_conf rss_conf;
3151         struct ixgbe_hw *hw;
3152         uint32_t reta;
3153         uint16_t i;
3154         uint16_t j;
3155         uint16_t sp_reta_size;
3156         uint32_t reta_reg;
3157
3158         PMD_INIT_FUNC_TRACE();
3159         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3160
3161         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3162
3163         /*
3164          * Fill in redirection table
3165          * The byte-swap is needed because NIC registers are in
3166          * little-endian order.
3167          */
3168         reta = 0;
3169         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3170                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3171
3172                 if (j == dev->data->nb_rx_queues)
3173                         j = 0;
3174                 reta = (reta << 8) | j;
3175                 if ((i & 3) == 3)
3176                         IXGBE_WRITE_REG(hw, reta_reg,
3177                                         rte_bswap32(reta));
3178         }
3179
3180         /*
3181          * Configure the RSS key and the RSS protocols used to compute
3182          * the RSS hash of input packets.
3183          */
3184         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3185         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3186                 ixgbe_rss_disable(dev);
3187                 return;
3188         }
3189         if (rss_conf.rss_key == NULL)
3190                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3191         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3192 }
3193
3194 #define NUM_VFTA_REGISTERS 128
3195 #define NIC_RX_BUFFER_SIZE 0x200
3196 #define X550_RX_BUFFER_SIZE 0x180
3197
3198 static void
3199 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3200 {
3201         struct rte_eth_vmdq_dcb_conf *cfg;
3202         struct ixgbe_hw *hw;
3203         enum rte_eth_nb_pools num_pools;
3204         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3205         uint16_t pbsize;
3206         uint8_t nb_tcs; /* number of traffic classes */
3207         int i;
3208
3209         PMD_INIT_FUNC_TRACE();
3210         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3211         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3212         num_pools = cfg->nb_queue_pools;
3213         /* Check we have a valid number of pools */
3214         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3215                 ixgbe_rss_disable(dev);
3216                 return;
3217         }
3218         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3219         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3220
3221         /*
3222          * RXPBSIZE
3223          * split rx buffer up into sections, each for 1 traffic class
3224          */
3225         switch (hw->mac.type) {
3226         case ixgbe_mac_X550:
3227         case ixgbe_mac_X550EM_x:
3228         case ixgbe_mac_X550EM_a:
3229                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3230                 break;
3231         default:
3232                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3233                 break;
3234         }
3235         for (i = 0; i < nb_tcs; i++) {
3236                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3237
3238                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3239                 /* clear 10 bits. */
3240                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3241                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3242         }
3243         /* zero alloc all unused TCs */
3244         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3245                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3246
3247                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3248                 /* clear 10 bits. */
3249                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3250         }
3251
3252         /* MRQC: enable vmdq and dcb */
3253         mrqc = (num_pools == ETH_16_POOLS) ?
3254                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3255         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3256
3257         /* PFVTCTL: turn on virtualisation and set the default pool */
3258         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3259         if (cfg->enable_default_pool) {
3260                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3261         } else {
3262                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3263         }
3264
3265         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3266
3267         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3268         queue_mapping = 0;
3269         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3270                 /*
3271                  * mapping is done with 3 bits per priority,
3272                  * so shift by i*3 each time
3273                  */
3274                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3275
3276         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3277
3278         /* RTRPCS: DCB related */
3279         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3280
3281         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3282         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3283         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3284         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3285
3286         /* VFTA - enable all vlan filters */
3287         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3288                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3289         }
3290
3291         /* VFRE: pool enabling for receive - 16 or 32 */
3292         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3293                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3294
3295         /*
3296          * MPSAR - allow pools to read specific mac addresses
3297          * In this case, all pools should be able to read from mac addr 0
3298          */
3299         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3300         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3301
3302         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3303         for (i = 0; i < cfg->nb_pool_maps; i++) {
3304                 /* set vlan id in VF register and set the valid bit */
3305                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3306                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3307                 /*
3308                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3309                  * pools, we only need to use the first half of the register
3310                  * i.e. bits 0-31
3311                  */
3312                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3313         }
3314 }
3315
3316 /**
3317  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3318  * @dev: pointer to eth_dev structure
3319  * @dcb_config: pointer to ixgbe_dcb_config structure
3320  */
3321 static void
3322 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3323                        struct ixgbe_dcb_config *dcb_config)
3324 {
3325         uint32_t reg;
3326         uint32_t q;
3327         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3328
3329         PMD_INIT_FUNC_TRACE();
3330         if (hw->mac.type != ixgbe_mac_82598EB) {
3331                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3332                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3333                 reg |= IXGBE_RTTDCS_ARBDIS;
3334                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3335
3336                 /* Enable DCB for Tx with 8 TCs */
3337                 if (dcb_config->num_tcs.pg_tcs == 8) {
3338                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3339                 } else {
3340                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3341                 }
3342                 if (dcb_config->vt_mode)
3343                         reg |= IXGBE_MTQC_VT_ENA;
3344                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3345
3346                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3347                         /* Disable drop for all queues in VMDQ mode*/
3348                         for (q = 0; q < 128; q++)
3349                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3350                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3351                 } else {
3352                         /* Enable drop for all queues in SRIOV mode */
3353                         for (q = 0; q < 128; q++)
3354                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3355                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE));
3356                 }
3357
3358                 /* Enable the Tx desc arbiter */
3359                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3360                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3361                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3362
3363                 /* Enable Security TX Buffer IFG for DCB */
3364                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3365                 reg |= IXGBE_SECTX_DCB;
3366                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3367         }
3368 }
3369
3370 /**
3371  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3372  * @dev: pointer to rte_eth_dev structure
3373  * @dcb_config: pointer to ixgbe_dcb_config structure
3374  */
3375 static void
3376 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3377                         struct ixgbe_dcb_config *dcb_config)
3378 {
3379         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3380                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3381         struct ixgbe_hw *hw =
3382                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3383
3384         PMD_INIT_FUNC_TRACE();
3385         if (hw->mac.type != ixgbe_mac_82598EB)
3386                 /*PF VF Transmit Enable*/
3387                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3388                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3389
3390         /*Configure general DCB TX parameters*/
3391         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3392 }
3393
3394 static void
3395 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3396                         struct ixgbe_dcb_config *dcb_config)
3397 {
3398         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3399                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3400         struct ixgbe_dcb_tc_config *tc;
3401         uint8_t i, j;
3402
3403         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3404         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3405                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3406                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3407         } else {
3408                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3409                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3410         }
3411         /* User Priority to Traffic Class mapping */
3412         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3413                 j = vmdq_rx_conf->dcb_tc[i];
3414                 tc = &dcb_config->tc_config[j];
3415                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3416                                                 (uint8_t)(1 << j);
3417         }
3418 }
3419
3420 static void
3421 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3422                         struct ixgbe_dcb_config *dcb_config)
3423 {
3424         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3425                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3426         struct ixgbe_dcb_tc_config *tc;
3427         uint8_t i, j;
3428
3429         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3430         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3431                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3432                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3433         } else {
3434                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3435                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3436         }
3437
3438         /* User Priority to Traffic Class mapping */
3439         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3440                 j = vmdq_tx_conf->dcb_tc[i];
3441                 tc = &dcb_config->tc_config[j];
3442                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3443                                                 (uint8_t)(1 << j);
3444         }
3445 }
3446
3447 static void
3448 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3449                 struct ixgbe_dcb_config *dcb_config)
3450 {
3451         struct rte_eth_dcb_rx_conf *rx_conf =
3452                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3453         struct ixgbe_dcb_tc_config *tc;
3454         uint8_t i, j;
3455
3456         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3457         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3458
3459         /* User Priority to Traffic Class mapping */
3460         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3461                 j = rx_conf->dcb_tc[i];
3462                 tc = &dcb_config->tc_config[j];
3463                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3464                                                 (uint8_t)(1 << j);
3465         }
3466 }
3467
3468 static void
3469 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3470                 struct ixgbe_dcb_config *dcb_config)
3471 {
3472         struct rte_eth_dcb_tx_conf *tx_conf =
3473                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3474         struct ixgbe_dcb_tc_config *tc;
3475         uint8_t i, j;
3476
3477         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3478         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3479
3480         /* User Priority to Traffic Class mapping */
3481         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3482                 j = tx_conf->dcb_tc[i];
3483                 tc = &dcb_config->tc_config[j];
3484                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3485                                                 (uint8_t)(1 << j);
3486         }
3487 }
3488
3489 /**
3490  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3491  * @hw: pointer to hardware structure
3492  * @dcb_config: pointer to ixgbe_dcb_config structure
3493  */
3494 static void
3495 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3496                struct ixgbe_dcb_config *dcb_config)
3497 {
3498         uint32_t reg;
3499         uint32_t vlanctrl;
3500         uint8_t i;
3501
3502         PMD_INIT_FUNC_TRACE();
3503         /*
3504          * Disable the arbiter before changing parameters
3505          * (always enable recycle mode; WSP)
3506          */
3507         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3508         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3509
3510         if (hw->mac.type != ixgbe_mac_82598EB) {
3511                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3512                 if (dcb_config->num_tcs.pg_tcs == 4) {
3513                         if (dcb_config->vt_mode)
3514                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3515                                         IXGBE_MRQC_VMDQRT4TCEN;
3516                         else {
3517                                 /* no matter the mode is DCB or DCB_RSS, just
3518                                  * set the MRQE to RSSXTCEN. RSS is controlled
3519                                  * by RSS_FIELD
3520                                  */
3521                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3522                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3523                                         IXGBE_MRQC_RTRSS4TCEN;
3524                         }
3525                 }
3526                 if (dcb_config->num_tcs.pg_tcs == 8) {
3527                         if (dcb_config->vt_mode)
3528                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3529                                         IXGBE_MRQC_VMDQRT8TCEN;
3530                         else {
3531                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3532                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3533                                         IXGBE_MRQC_RTRSS8TCEN;
3534                         }
3535                 }
3536
3537                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3538         }
3539
3540         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3541         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3542         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3543         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3544
3545         /* VFTA - enable all vlan filters */
3546         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3547                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3548         }
3549
3550         /*
3551          * Configure Rx packet plane (recycle mode; WSP) and
3552          * enable arbiter
3553          */
3554         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3555         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3556 }
3557
3558 static void
3559 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3560                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3561 {
3562         switch (hw->mac.type) {
3563         case ixgbe_mac_82598EB:
3564                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3565                 break;
3566         case ixgbe_mac_82599EB:
3567         case ixgbe_mac_X540:
3568         case ixgbe_mac_X550:
3569         case ixgbe_mac_X550EM_x:
3570         case ixgbe_mac_X550EM_a:
3571                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3572                                                   tsa, map);
3573                 break;
3574         default:
3575                 break;
3576         }
3577 }
3578
3579 static void
3580 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3581                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3582 {
3583         switch (hw->mac.type) {
3584         case ixgbe_mac_82598EB:
3585                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3586                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3587                 break;
3588         case ixgbe_mac_82599EB:
3589         case ixgbe_mac_X540:
3590         case ixgbe_mac_X550:
3591         case ixgbe_mac_X550EM_x:
3592         case ixgbe_mac_X550EM_a:
3593                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3594                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3595                 break;
3596         default:
3597                 break;
3598         }
3599 }
3600
3601 #define DCB_RX_CONFIG  1
3602 #define DCB_TX_CONFIG  1
3603 #define DCB_TX_PB      1024
3604 /**
3605  * ixgbe_dcb_hw_configure - Enable DCB and configure
3606  * general DCB in VT mode and non-VT mode parameters
3607  * @dev: pointer to rte_eth_dev structure
3608  * @dcb_config: pointer to ixgbe_dcb_config structure
3609  */
3610 static int
3611 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3612                         struct ixgbe_dcb_config *dcb_config)
3613 {
3614         int     ret = 0;
3615         uint8_t i, pfc_en, nb_tcs;
3616         uint16_t pbsize, rx_buffer_size;
3617         uint8_t config_dcb_rx = 0;
3618         uint8_t config_dcb_tx = 0;
3619         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3620         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3621         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3622         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3623         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3624         struct ixgbe_dcb_tc_config *tc;
3625         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3626         struct ixgbe_hw *hw =
3627                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3628
3629         switch (dev->data->dev_conf.rxmode.mq_mode) {
3630         case ETH_MQ_RX_VMDQ_DCB:
3631                 dcb_config->vt_mode = true;
3632                 if (hw->mac.type != ixgbe_mac_82598EB) {
3633                         config_dcb_rx = DCB_RX_CONFIG;
3634                         /*
3635                          *get dcb and VT rx configuration parameters
3636                          *from rte_eth_conf
3637                          */
3638                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3639                         /*Configure general VMDQ and DCB RX parameters*/
3640                         ixgbe_vmdq_dcb_configure(dev);
3641                 }
3642                 break;
3643         case ETH_MQ_RX_DCB:
3644         case ETH_MQ_RX_DCB_RSS:
3645                 dcb_config->vt_mode = false;
3646                 config_dcb_rx = DCB_RX_CONFIG;
3647                 /* Get dcb TX configuration parameters from rte_eth_conf */
3648                 ixgbe_dcb_rx_config(dev, dcb_config);
3649                 /*Configure general DCB RX parameters*/
3650                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3651                 break;
3652         default:
3653                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3654                 break;
3655         }
3656         switch (dev->data->dev_conf.txmode.mq_mode) {
3657         case ETH_MQ_TX_VMDQ_DCB:
3658                 dcb_config->vt_mode = true;
3659                 config_dcb_tx = DCB_TX_CONFIG;
3660                 /* get DCB and VT TX configuration parameters
3661                  * from rte_eth_conf
3662                  */
3663                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3664                 /*Configure general VMDQ and DCB TX parameters*/
3665                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3666                 break;
3667
3668         case ETH_MQ_TX_DCB:
3669                 dcb_config->vt_mode = false;
3670                 config_dcb_tx = DCB_TX_CONFIG;
3671                 /*get DCB TX configuration parameters from rte_eth_conf*/
3672                 ixgbe_dcb_tx_config(dev, dcb_config);
3673                 /*Configure general DCB TX parameters*/
3674                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3675                 break;
3676         default:
3677                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3678                 break;
3679         }
3680
3681         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3682         /* Unpack map */
3683         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3684         if (nb_tcs == ETH_4_TCS) {
3685                 /* Avoid un-configured priority mapping to TC0 */
3686                 uint8_t j = 4;
3687                 uint8_t mask = 0xFF;
3688
3689                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3690                         mask = (uint8_t)(mask & (~(1 << map[i])));
3691                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3692                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3693                                 map[j++] = i;
3694                         mask >>= 1;
3695                 }
3696                 /* Re-configure 4 TCs BW */
3697                 for (i = 0; i < nb_tcs; i++) {
3698                         tc = &dcb_config->tc_config[i];
3699                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3700                                                 (uint8_t)(100 / nb_tcs);
3701                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3702                                                 (uint8_t)(100 / nb_tcs);
3703                 }
3704                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3705                         tc = &dcb_config->tc_config[i];
3706                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3707                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3708                 }
3709         }
3710
3711         switch (hw->mac.type) {
3712         case ixgbe_mac_X550:
3713         case ixgbe_mac_X550EM_x:
3714         case ixgbe_mac_X550EM_a:
3715                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3716                 break;
3717         default:
3718                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3719                 break;
3720         }
3721
3722         if (config_dcb_rx) {
3723                 /* Set RX buffer size */
3724                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3725                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3726
3727                 for (i = 0; i < nb_tcs; i++) {
3728                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3729                 }
3730                 /* zero alloc all unused TCs */
3731                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3732                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3733                 }
3734         }
3735         if (config_dcb_tx) {
3736                 /* Only support an equally distributed
3737                  *  Tx packet buffer strategy.
3738                  */
3739                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3740                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3741
3742                 for (i = 0; i < nb_tcs; i++) {
3743                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3744                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3745                 }
3746                 /* Clear unused TCs, if any, to zero buffer size*/
3747                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3748                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3749                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3750                 }
3751         }
3752
3753         /*Calculates traffic class credits*/
3754         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3755                                 IXGBE_DCB_TX_CONFIG);
3756         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3757                                 IXGBE_DCB_RX_CONFIG);
3758
3759         if (config_dcb_rx) {
3760                 /* Unpack CEE standard containers */
3761                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3762                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3763                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3764                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3765                 /* Configure PG(ETS) RX */
3766                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3767         }
3768
3769         if (config_dcb_tx) {
3770                 /* Unpack CEE standard containers */
3771                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3772                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3773                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3774                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3775                 /* Configure PG(ETS) TX */
3776                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3777         }
3778
3779         /*Configure queue statistics registers*/
3780         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3781
3782         /* Check if the PFC is supported */
3783         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3784                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3785                 for (i = 0; i < nb_tcs; i++) {
3786                         /*
3787                         * If the TC count is 8,and the default high_water is 48,
3788                         * the low_water is 16 as default.
3789                         */
3790                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3791                         hw->fc.low_water[i] = pbsize / 4;
3792                         /* Enable pfc for this TC */
3793                         tc = &dcb_config->tc_config[i];
3794                         tc->pfc = ixgbe_dcb_pfc_enabled;
3795                 }
3796                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3797                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3798                         pfc_en &= 0x0F;
3799                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3800         }
3801
3802         return ret;
3803 }
3804
3805 /**
3806  * ixgbe_configure_dcb - Configure DCB  Hardware
3807  * @dev: pointer to rte_eth_dev
3808  */
3809 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3810 {
3811         struct ixgbe_dcb_config *dcb_cfg =
3812                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3813         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3814
3815         PMD_INIT_FUNC_TRACE();
3816
3817         /* check support mq_mode for DCB */
3818         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3819             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3820             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3821                 return;
3822
3823         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3824                 return;
3825
3826         /** Configure DCB hardware **/
3827         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3828 }
3829
3830 /*
3831  * VMDq only support for 10 GbE NIC.
3832  */
3833 static void
3834 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3835 {
3836         struct rte_eth_vmdq_rx_conf *cfg;
3837         struct ixgbe_hw *hw;
3838         enum rte_eth_nb_pools num_pools;
3839         uint32_t mrqc, vt_ctl, vlanctrl;
3840         uint32_t vmolr = 0;
3841         int i;
3842
3843         PMD_INIT_FUNC_TRACE();
3844         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3845         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3846         num_pools = cfg->nb_queue_pools;
3847
3848         ixgbe_rss_disable(dev);
3849
3850         /* MRQC: enable vmdq */
3851         mrqc = IXGBE_MRQC_VMDQEN;
3852         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3853
3854         /* PFVTCTL: turn on virtualisation and set the default pool */
3855         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3856         if (cfg->enable_default_pool)
3857                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3858         else
3859                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3860
3861         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3862
3863         for (i = 0; i < (int)num_pools; i++) {
3864                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3865                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3866         }
3867
3868         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3869         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3870         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3871         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3872
3873         /* VFTA - enable all vlan filters */
3874         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3875                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3876
3877         /* VFRE: pool enabling for receive - 64 */
3878         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3879         if (num_pools == ETH_64_POOLS)
3880                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3881
3882         /*
3883          * MPSAR - allow pools to read specific mac addresses
3884          * In this case, all pools should be able to read from mac addr 0
3885          */
3886         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3887         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3888
3889         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3890         for (i = 0; i < cfg->nb_pool_maps; i++) {
3891                 /* set vlan id in VF register and set the valid bit */
3892                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3893                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3894                 /*
3895                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3896                  * pools, we only need to use the first half of the register
3897                  * i.e. bits 0-31
3898                  */
3899                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3900                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3901                                         (cfg->pool_map[i].pools & UINT32_MAX));
3902                 else
3903                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3904                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3905
3906         }
3907
3908         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3909         if (cfg->enable_loop_back) {
3910                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3911                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3912                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3913         }
3914
3915         IXGBE_WRITE_FLUSH(hw);
3916 }
3917
3918 /*
3919  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3920  * @hw: pointer to hardware structure
3921  */
3922 static void
3923 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3924 {
3925         uint32_t reg;
3926         uint32_t q;
3927
3928         PMD_INIT_FUNC_TRACE();
3929         /*PF VF Transmit Enable*/
3930         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3931         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3932
3933         /* Disable the Tx desc arbiter so that MTQC can be changed */
3934         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3935         reg |= IXGBE_RTTDCS_ARBDIS;
3936         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3937
3938         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3939         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3940
3941         /* Disable drop for all queues */
3942         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3943                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3944                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3945
3946         /* Enable the Tx desc arbiter */
3947         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3948         reg &= ~IXGBE_RTTDCS_ARBDIS;
3949         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3950
3951         IXGBE_WRITE_FLUSH(hw);
3952 }
3953
3954 static int __attribute__((cold))
3955 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3956 {
3957         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3958         uint64_t dma_addr;
3959         unsigned int i;
3960
3961         /* Initialize software ring entries */
3962         for (i = 0; i < rxq->nb_rx_desc; i++) {
3963                 volatile union ixgbe_adv_rx_desc *rxd;
3964                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
3965
3966                 if (mbuf == NULL) {
3967                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3968                                      (unsigned) rxq->queue_id);
3969                         return -ENOMEM;
3970                 }
3971
3972                 rte_mbuf_refcnt_set(mbuf, 1);
3973                 mbuf->next = NULL;
3974                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3975                 mbuf->nb_segs = 1;
3976                 mbuf->port = rxq->port_id;
3977
3978                 dma_addr =
3979                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
3980                 rxd = &rxq->rx_ring[i];
3981                 rxd->read.hdr_addr = 0;
3982                 rxd->read.pkt_addr = dma_addr;
3983                 rxe[i].mbuf = mbuf;
3984         }
3985
3986         return 0;
3987 }
3988
3989 static int
3990 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3991 {
3992         struct ixgbe_hw *hw;
3993         uint32_t mrqc;
3994
3995         ixgbe_rss_configure(dev);
3996
3997         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3998
3999         /* MRQC: enable VF RSS */
4000         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4001         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4002         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4003         case ETH_64_POOLS:
4004                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4005                 break;
4006
4007         case ETH_32_POOLS:
4008                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4009                 break;
4010
4011         default:
4012                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4013                 return -EINVAL;
4014         }
4015
4016         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4017
4018         return 0;
4019 }
4020
4021 static int
4022 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4023 {
4024         struct ixgbe_hw *hw =
4025                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4026
4027         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4028         case ETH_64_POOLS:
4029                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4030                         IXGBE_MRQC_VMDQEN);
4031                 break;
4032
4033         case ETH_32_POOLS:
4034                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4035                         IXGBE_MRQC_VMDQRT4TCEN);
4036                 break;
4037
4038         case ETH_16_POOLS:
4039                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4040                         IXGBE_MRQC_VMDQRT8TCEN);
4041                 break;
4042         default:
4043                 PMD_INIT_LOG(ERR,
4044                         "invalid pool number in IOV mode");
4045                 break;
4046         }
4047         return 0;
4048 }
4049
4050 static int
4051 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4052 {
4053         struct ixgbe_hw *hw =
4054                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4055
4056         if (hw->mac.type == ixgbe_mac_82598EB)
4057                 return 0;
4058
4059         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4060                 /*
4061                  * SRIOV inactive scheme
4062                  * any DCB/RSS w/o VMDq multi-queue setting
4063                  */
4064                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4065                 case ETH_MQ_RX_RSS:
4066                 case ETH_MQ_RX_DCB_RSS:
4067                 case ETH_MQ_RX_VMDQ_RSS:
4068                         ixgbe_rss_configure(dev);
4069                         break;
4070
4071                 case ETH_MQ_RX_VMDQ_DCB:
4072                         ixgbe_vmdq_dcb_configure(dev);
4073                         break;
4074
4075                 case ETH_MQ_RX_VMDQ_ONLY:
4076                         ixgbe_vmdq_rx_hw_configure(dev);
4077                         break;
4078
4079                 case ETH_MQ_RX_NONE:
4080                 default:
4081                         /* if mq_mode is none, disable rss mode.*/
4082                         ixgbe_rss_disable(dev);
4083                         break;
4084                 }
4085         } else {
4086                 /*
4087                  * SRIOV active scheme
4088                  * Support RSS together with VMDq & SRIOV
4089                  */
4090                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4091                 case ETH_MQ_RX_RSS:
4092                 case ETH_MQ_RX_VMDQ_RSS:
4093                         ixgbe_config_vf_rss(dev);
4094                         break;
4095                 case ETH_MQ_RX_VMDQ_DCB:
4096                         ixgbe_vmdq_dcb_configure(dev);
4097                         break;
4098                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
4099                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4100                         PMD_INIT_LOG(ERR,
4101                                 "Could not support DCB/RSS with VMDq & SRIOV");
4102                         return -1;
4103                 default:
4104                         ixgbe_config_vf_default(dev);
4105                         break;
4106                 }
4107         }
4108
4109         return 0;
4110 }
4111
4112 static int
4113 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4114 {
4115         struct ixgbe_hw *hw =
4116                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4117         uint32_t mtqc;
4118         uint32_t rttdcs;
4119
4120         if (hw->mac.type == ixgbe_mac_82598EB)
4121                 return 0;
4122
4123         /* disable arbiter before setting MTQC */
4124         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4125         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4126         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4127
4128         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4129                 /*
4130                  * SRIOV inactive scheme
4131                  * any DCB w/o VMDq multi-queue setting
4132                  */
4133                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4134                         ixgbe_vmdq_tx_hw_configure(hw);
4135                 else {
4136                         mtqc = IXGBE_MTQC_64Q_1PB;
4137                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4138                 }
4139         } else {
4140                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4141
4142                 /*
4143                  * SRIOV active scheme
4144                  * FIXME if support DCB together with VMDq & SRIOV
4145                  */
4146                 case ETH_64_POOLS:
4147                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4148                         break;
4149                 case ETH_32_POOLS:
4150                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4151                         break;
4152                 case ETH_16_POOLS:
4153                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4154                                 IXGBE_MTQC_8TC_8TQ;
4155                         break;
4156                 default:
4157                         mtqc = IXGBE_MTQC_64Q_1PB;
4158                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4159                 }
4160                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4161         }
4162
4163         /* re-enable arbiter */
4164         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4165         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4166
4167         return 0;
4168 }
4169
4170 /**
4171  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4172  *
4173  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4174  * spec rev. 3.0 chapter 8.2.3.8.13.
4175  *
4176  * @pool Memory pool of the Rx queue
4177  */
4178 static inline uint32_t
4179 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4180 {
4181         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4182
4183         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4184         uint16_t maxdesc =
4185                 IPV4_MAX_PKT_LEN /
4186                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4187
4188         if (maxdesc >= 16)
4189                 return IXGBE_RSCCTL_MAXDESC_16;
4190         else if (maxdesc >= 8)
4191                 return IXGBE_RSCCTL_MAXDESC_8;
4192         else if (maxdesc >= 4)
4193                 return IXGBE_RSCCTL_MAXDESC_4;
4194         else
4195                 return IXGBE_RSCCTL_MAXDESC_1;
4196 }
4197
4198 /**
4199  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4200  * interrupt
4201  *
4202  * (Taken from FreeBSD tree)
4203  * (yes this is all very magic and confusing :)
4204  *
4205  * @dev port handle
4206  * @entry the register array entry
4207  * @vector the MSIX vector for this queue
4208  * @type RX/TX/MISC
4209  */
4210 static void
4211 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4212 {
4213         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4214         u32 ivar, index;
4215
4216         vector |= IXGBE_IVAR_ALLOC_VAL;
4217
4218         switch (hw->mac.type) {
4219
4220         case ixgbe_mac_82598EB:
4221                 if (type == -1)
4222                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4223                 else
4224                         entry += (type * 64);
4225                 index = (entry >> 2) & 0x1F;
4226                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4227                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4228                 ivar |= (vector << (8 * (entry & 0x3)));
4229                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4230                 break;
4231
4232         case ixgbe_mac_82599EB:
4233         case ixgbe_mac_X540:
4234                 if (type == -1) { /* MISC IVAR */
4235                         index = (entry & 1) * 8;
4236                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4237                         ivar &= ~(0xFF << index);
4238                         ivar |= (vector << index);
4239                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4240                 } else {        /* RX/TX IVARS */
4241                         index = (16 * (entry & 1)) + (8 * type);
4242                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4243                         ivar &= ~(0xFF << index);
4244                         ivar |= (vector << index);
4245                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4246                 }
4247
4248                 break;
4249
4250         default:
4251                 break;
4252         }
4253 }
4254
4255 void __attribute__((cold))
4256 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4257 {
4258         uint16_t i, rx_using_sse;
4259         struct ixgbe_adapter *adapter =
4260                 (struct ixgbe_adapter *)dev->data->dev_private;
4261
4262         /*
4263          * In order to allow Vector Rx there are a few configuration
4264          * conditions to be met and Rx Bulk Allocation should be allowed.
4265          */
4266         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4267             !adapter->rx_bulk_alloc_allowed) {
4268                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4269                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4270                                     "not enabled",
4271                              dev->data->port_id);
4272
4273                 adapter->rx_vec_allowed = false;
4274         }
4275
4276         /*
4277          * Initialize the appropriate LRO callback.
4278          *
4279          * If all queues satisfy the bulk allocation preconditions
4280          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4281          * Otherwise use a single allocation version.
4282          */
4283         if (dev->data->lro) {
4284                 if (adapter->rx_bulk_alloc_allowed) {
4285                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4286                                            "allocation version");
4287                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4288                 } else {
4289                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4290                                            "allocation version");
4291                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4292                 }
4293         } else if (dev->data->scattered_rx) {
4294                 /*
4295                  * Set the non-LRO scattered callback: there are Vector and
4296                  * single allocation versions.
4297                  */
4298                 if (adapter->rx_vec_allowed) {
4299                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4300                                             "callback (port=%d).",
4301                                      dev->data->port_id);
4302
4303                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4304                 } else if (adapter->rx_bulk_alloc_allowed) {
4305                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4306                                            "allocation callback (port=%d).",
4307                                      dev->data->port_id);
4308                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4309                 } else {
4310                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4311                                             "single allocation) "
4312                                             "Scattered Rx callback "
4313                                             "(port=%d).",
4314                                      dev->data->port_id);
4315
4316                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4317                 }
4318         /*
4319          * Below we set "simple" callbacks according to port/queues parameters.
4320          * If parameters allow we are going to choose between the following
4321          * callbacks:
4322          *    - Vector
4323          *    - Bulk Allocation
4324          *    - Single buffer allocation (the simplest one)
4325          */
4326         } else if (adapter->rx_vec_allowed) {
4327                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4328                                     "burst size no less than %d (port=%d).",
4329                              RTE_IXGBE_DESCS_PER_LOOP,
4330                              dev->data->port_id);
4331
4332                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4333         } else if (adapter->rx_bulk_alloc_allowed) {
4334                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4335                                     "satisfied. Rx Burst Bulk Alloc function "
4336                                     "will be used on port=%d.",
4337                              dev->data->port_id);
4338
4339                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4340         } else {
4341                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4342                                     "satisfied, or Scattered Rx is requested "
4343                                     "(port=%d).",
4344                              dev->data->port_id);
4345
4346                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4347         }
4348
4349         /* Propagate information about RX function choice through all queues. */
4350
4351         rx_using_sse =
4352                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4353                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4354
4355         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4356                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4357
4358                 rxq->rx_using_sse = rx_using_sse;
4359         }
4360 }
4361
4362 /**
4363  * ixgbe_set_rsc - configure RSC related port HW registers
4364  *
4365  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4366  * of 82599 Spec (x540 configuration is virtually the same).
4367  *
4368  * @dev port handle
4369  *
4370  * Returns 0 in case of success or a non-zero error code
4371  */
4372 static int
4373 ixgbe_set_rsc(struct rte_eth_dev *dev)
4374 {
4375         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4376         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4377         struct rte_eth_dev_info dev_info = { 0 };
4378         bool rsc_capable = false;
4379         uint16_t i;
4380         uint32_t rdrxctl;
4381
4382         /* Sanity check */
4383         dev->dev_ops->dev_infos_get(dev, &dev_info);
4384         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4385                 rsc_capable = true;
4386
4387         if (!rsc_capable && rx_conf->enable_lro) {
4388                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4389                                    "support it");
4390                 return -EINVAL;
4391         }
4392
4393         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4394
4395         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4396                 /*
4397                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4398                  * 3.0 RSC configuration requires HW CRC stripping being
4399                  * enabled. If user requested both HW CRC stripping off
4400                  * and RSC on - return an error.
4401                  */
4402                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4403                                     "is disabled");
4404                 return -EINVAL;
4405         }
4406
4407         /* RFCTL configuration  */
4408         if (rsc_capable) {
4409                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4410
4411                 if (rx_conf->enable_lro)
4412                         /*
4413                          * Since NFS packets coalescing is not supported - clear
4414                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4415                          * enabled.
4416                          */
4417                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4418                                    IXGBE_RFCTL_NFSR_DIS);
4419                 else
4420                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4421
4422                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4423         }
4424
4425         /* If LRO hasn't been requested - we are done here. */
4426         if (!rx_conf->enable_lro)
4427                 return 0;
4428
4429         /* Set RDRXCTL.RSCACKC bit */
4430         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4431         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4432         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4433
4434         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4435         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4436                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4437                 uint32_t srrctl =
4438                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4439                 uint32_t rscctl =
4440                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4441                 uint32_t psrtype =
4442                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4443                 uint32_t eitr =
4444                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4445
4446                 /*
4447                  * ixgbe PMD doesn't support header-split at the moment.
4448                  *
4449                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4450                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4451                  * should be configured even if header split is not
4452                  * enabled. We will configure it 128 bytes following the
4453                  * recommendation in the spec.
4454                  */
4455                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4456                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4457                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4458
4459                 /*
4460                  * TODO: Consider setting the Receive Descriptor Minimum
4461                  * Threshold Size for an RSC case. This is not an obviously
4462                  * beneficiary option but the one worth considering...
4463                  */
4464
4465                 rscctl |= IXGBE_RSCCTL_RSCEN;
4466                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4467                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4468
4469                 /*
4470                  * RSC: Set ITR interval corresponding to 2K ints/s.
4471                  *
4472                  * Full-sized RSC aggregations for a 10Gb/s link will
4473                  * arrive at about 20K aggregation/s rate.
4474                  *
4475                  * 2K inst/s rate will make only 10% of the
4476                  * aggregations to be closed due to the interrupt timer
4477                  * expiration for a streaming at wire-speed case.
4478                  *
4479                  * For a sparse streaming case this setting will yield
4480                  * at most 500us latency for a single RSC aggregation.
4481                  */
4482                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4483                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4484
4485                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4486                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4487                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4488                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4489
4490                 /*
4491                  * RSC requires the mapping of the queue to the
4492                  * interrupt vector.
4493                  */
4494                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4495         }
4496
4497         dev->data->lro = 1;
4498
4499         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4500
4501         return 0;
4502 }
4503
4504 /*
4505  * Initializes Receive Unit.
4506  */
4507 int __attribute__((cold))
4508 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4509 {
4510         struct ixgbe_hw     *hw;
4511         struct ixgbe_rx_queue *rxq;
4512         uint64_t bus_addr;
4513         uint32_t rxctrl;
4514         uint32_t fctrl;
4515         uint32_t hlreg0;
4516         uint32_t maxfrs;
4517         uint32_t srrctl;
4518         uint32_t rdrxctl;
4519         uint32_t rxcsum;
4520         uint16_t buf_size;
4521         uint16_t i;
4522         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4523         int rc;
4524
4525         PMD_INIT_FUNC_TRACE();
4526         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4527
4528         /*
4529          * Make sure receives are disabled while setting
4530          * up the RX context (registers, descriptor rings, etc.).
4531          */
4532         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4533         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4534
4535         /* Enable receipt of broadcasted frames */
4536         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4537         fctrl |= IXGBE_FCTRL_BAM;
4538         fctrl |= IXGBE_FCTRL_DPF;
4539         fctrl |= IXGBE_FCTRL_PMCF;
4540         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4541
4542         /*
4543          * Configure CRC stripping, if any.
4544          */
4545         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4546         if (rx_conf->hw_strip_crc)
4547                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4548         else
4549                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4550
4551         /*
4552          * Configure jumbo frame support, if any.
4553          */
4554         if (rx_conf->jumbo_frame == 1) {
4555                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4556                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4557                 maxfrs &= 0x0000FFFF;
4558                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4559                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4560         } else
4561                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4562
4563         /*
4564          * If loopback mode is configured for 82599, set LPBK bit.
4565          */
4566         if (hw->mac.type == ixgbe_mac_82599EB &&
4567                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4568                 hlreg0 |= IXGBE_HLREG0_LPBK;
4569         else
4570                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4571
4572         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4573
4574         /* Setup RX queues */
4575         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4576                 rxq = dev->data->rx_queues[i];
4577
4578                 /*
4579                  * Reset crc_len in case it was changed after queue setup by a
4580                  * call to configure.
4581                  */
4582                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4583
4584                 /* Setup the Base and Length of the Rx Descriptor Rings */
4585                 bus_addr = rxq->rx_ring_phys_addr;
4586                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4587                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4588                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4589                                 (uint32_t)(bus_addr >> 32));
4590                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4591                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4592                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4593                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4594
4595                 /* Configure the SRRCTL register */
4596 #ifdef RTE_HEADER_SPLIT_ENABLE
4597                 /*
4598                  * Configure Header Split
4599                  */
4600                 if (rx_conf->header_split) {
4601                         if (hw->mac.type == ixgbe_mac_82599EB) {
4602                                 /* Must setup the PSRTYPE register */
4603                                 uint32_t psrtype;
4604
4605                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4606                                         IXGBE_PSRTYPE_UDPHDR   |
4607                                         IXGBE_PSRTYPE_IPV4HDR  |
4608                                         IXGBE_PSRTYPE_IPV6HDR;
4609                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4610                         }
4611                         srrctl = ((rx_conf->split_hdr_size <<
4612                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4613                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4614                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4615                 } else
4616 #endif
4617                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4618
4619                 /* Set if packets are dropped when no descriptors available */
4620                 if (rxq->drop_en)
4621                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4622
4623                 /*
4624                  * Configure the RX buffer size in the BSIZEPACKET field of
4625                  * the SRRCTL register of the queue.
4626                  * The value is in 1 KB resolution. Valid values can be from
4627                  * 1 KB to 16 KB.
4628                  */
4629                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4630                         RTE_PKTMBUF_HEADROOM);
4631                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4632                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4633
4634                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4635
4636                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4637                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4638
4639                 /* It adds dual VLAN length for supporting dual VLAN */
4640                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4641                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4642                         dev->data->scattered_rx = 1;
4643         }
4644
4645         if (rx_conf->enable_scatter)
4646                 dev->data->scattered_rx = 1;
4647
4648         /*
4649          * Device configured with multiple RX queues.
4650          */
4651         ixgbe_dev_mq_rx_configure(dev);
4652
4653         /*
4654          * Setup the Checksum Register.
4655          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4656          * Enable IP/L4 checkum computation by hardware if requested to do so.
4657          */
4658         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4659         rxcsum |= IXGBE_RXCSUM_PCSD;
4660         if (rx_conf->hw_ip_checksum)
4661                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4662         else
4663                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4664
4665         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4666
4667         if (hw->mac.type == ixgbe_mac_82599EB ||
4668             hw->mac.type == ixgbe_mac_X540) {
4669                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4670                 if (rx_conf->hw_strip_crc)
4671                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4672                 else
4673                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4674                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4675                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4676         }
4677
4678         rc = ixgbe_set_rsc(dev);
4679         if (rc)
4680                 return rc;
4681
4682         ixgbe_set_rx_function(dev);
4683
4684         return 0;
4685 }
4686
4687 /*
4688  * Initializes Transmit Unit.
4689  */
4690 void __attribute__((cold))
4691 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4692 {
4693         struct ixgbe_hw     *hw;
4694         struct ixgbe_tx_queue *txq;
4695         uint64_t bus_addr;
4696         uint32_t hlreg0;
4697         uint32_t txctrl;
4698         uint16_t i;
4699
4700         PMD_INIT_FUNC_TRACE();
4701         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4702
4703         /* Enable TX CRC (checksum offload requirement) and hw padding
4704          * (TSO requirement)
4705          */
4706         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4707         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4708         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4709
4710         /* Setup the Base and Length of the Tx Descriptor Rings */
4711         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4712                 txq = dev->data->tx_queues[i];
4713
4714                 bus_addr = txq->tx_ring_phys_addr;
4715                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4716                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4717                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4718                                 (uint32_t)(bus_addr >> 32));
4719                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4720                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4721                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4722                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4723                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4724
4725                 /*
4726                  * Disable Tx Head Writeback RO bit, since this hoses
4727                  * bookkeeping if things aren't delivered in order.
4728                  */
4729                 switch (hw->mac.type) {
4730                 case ixgbe_mac_82598EB:
4731                         txctrl = IXGBE_READ_REG(hw,
4732                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4733                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4734                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4735                                         txctrl);
4736                         break;
4737
4738                 case ixgbe_mac_82599EB:
4739                 case ixgbe_mac_X540:
4740                 case ixgbe_mac_X550:
4741                 case ixgbe_mac_X550EM_x:
4742                 case ixgbe_mac_X550EM_a:
4743                 default:
4744                         txctrl = IXGBE_READ_REG(hw,
4745                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4746                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4747                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4748                                         txctrl);
4749                         break;
4750                 }
4751         }
4752
4753         /* Device configured with multiple TX queues. */
4754         ixgbe_dev_mq_tx_configure(dev);
4755 }
4756
4757 /*
4758  * Set up link for 82599 loopback mode Tx->Rx.
4759  */
4760 static inline void __attribute__((cold))
4761 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4762 {
4763         PMD_INIT_FUNC_TRACE();
4764
4765         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4766                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4767                                 IXGBE_SUCCESS) {
4768                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4769                         /* ignore error */
4770                         return;
4771                 }
4772         }
4773
4774         /* Restart link */
4775         IXGBE_WRITE_REG(hw,
4776                         IXGBE_AUTOC,
4777                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4778         ixgbe_reset_pipeline_82599(hw);
4779
4780         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4781         msec_delay(50);
4782 }
4783
4784
4785 /*
4786  * Start Transmit and Receive Units.
4787  */
4788 int __attribute__((cold))
4789 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4790 {
4791         struct ixgbe_hw     *hw;
4792         struct ixgbe_tx_queue *txq;
4793         struct ixgbe_rx_queue *rxq;
4794         uint32_t txdctl;
4795         uint32_t dmatxctl;
4796         uint32_t rxctrl;
4797         uint16_t i;
4798         int ret = 0;
4799
4800         PMD_INIT_FUNC_TRACE();
4801         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4802
4803         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4804                 txq = dev->data->tx_queues[i];
4805                 /* Setup Transmit Threshold Registers */
4806                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4807                 txdctl |= txq->pthresh & 0x7F;
4808                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4809                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4810                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4811         }
4812
4813         if (hw->mac.type != ixgbe_mac_82598EB) {
4814                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4815                 dmatxctl |= IXGBE_DMATXCTL_TE;
4816                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4817         }
4818
4819         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4820                 txq = dev->data->tx_queues[i];
4821                 if (!txq->tx_deferred_start) {
4822                         ret = ixgbe_dev_tx_queue_start(dev, i);
4823                         if (ret < 0)
4824                                 return ret;
4825                 }
4826         }
4827
4828         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4829                 rxq = dev->data->rx_queues[i];
4830                 if (!rxq->rx_deferred_start) {
4831                         ret = ixgbe_dev_rx_queue_start(dev, i);
4832                         if (ret < 0)
4833                                 return ret;
4834                 }
4835         }
4836
4837         /* Enable Receive engine */
4838         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4839         if (hw->mac.type == ixgbe_mac_82598EB)
4840                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4841         rxctrl |= IXGBE_RXCTRL_RXEN;
4842         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4843
4844         /* If loopback mode is enabled for 82599, set up the link accordingly */
4845         if (hw->mac.type == ixgbe_mac_82599EB &&
4846                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4847                 ixgbe_setup_loopback_link_82599(hw);
4848
4849         return 0;
4850 }
4851
4852 /*
4853  * Start Receive Units for specified queue.
4854  */
4855 int __attribute__((cold))
4856 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4857 {
4858         struct ixgbe_hw     *hw;
4859         struct ixgbe_rx_queue *rxq;
4860         uint32_t rxdctl;
4861         int poll_ms;
4862
4863         PMD_INIT_FUNC_TRACE();
4864         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4865
4866         if (rx_queue_id < dev->data->nb_rx_queues) {
4867                 rxq = dev->data->rx_queues[rx_queue_id];
4868
4869                 /* Allocate buffers for descriptor rings */
4870                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4871                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4872                                      rx_queue_id);
4873                         return -1;
4874                 }
4875                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4876                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4877                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4878
4879                 /* Wait until RX Enable ready */
4880                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4881                 do {
4882                         rte_delay_ms(1);
4883                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4884                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4885                 if (!poll_ms)
4886                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4887                                      rx_queue_id);
4888                 rte_wmb();
4889                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4890                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4891                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4892         } else
4893                 return -1;
4894
4895         return 0;
4896 }
4897
4898 /*
4899  * Stop Receive Units for specified queue.
4900  */
4901 int __attribute__((cold))
4902 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4903 {
4904         struct ixgbe_hw     *hw;
4905         struct ixgbe_adapter *adapter =
4906                 (struct ixgbe_adapter *)dev->data->dev_private;
4907         struct ixgbe_rx_queue *rxq;
4908         uint32_t rxdctl;
4909         int poll_ms;
4910
4911         PMD_INIT_FUNC_TRACE();
4912         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4913
4914         if (rx_queue_id < dev->data->nb_rx_queues) {
4915                 rxq = dev->data->rx_queues[rx_queue_id];
4916
4917                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4918                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4919                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4920
4921                 /* Wait until RX Enable bit clear */
4922                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4923                 do {
4924                         rte_delay_ms(1);
4925                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4926                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4927                 if (!poll_ms)
4928                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4929                                      rx_queue_id);
4930
4931                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4932
4933                 ixgbe_rx_queue_release_mbufs(rxq);
4934                 ixgbe_reset_rx_queue(adapter, rxq);
4935                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4936         } else
4937                 return -1;
4938
4939         return 0;
4940 }
4941
4942
4943 /*
4944  * Start Transmit Units for specified queue.
4945  */
4946 int __attribute__((cold))
4947 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4948 {
4949         struct ixgbe_hw     *hw;
4950         struct ixgbe_tx_queue *txq;
4951         uint32_t txdctl;
4952         int poll_ms;
4953
4954         PMD_INIT_FUNC_TRACE();
4955         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4956
4957         if (tx_queue_id < dev->data->nb_tx_queues) {
4958                 txq = dev->data->tx_queues[tx_queue_id];
4959                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4960                 txdctl |= IXGBE_TXDCTL_ENABLE;
4961                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4962
4963                 /* Wait until TX Enable ready */
4964                 if (hw->mac.type == ixgbe_mac_82599EB) {
4965                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4966                         do {
4967                                 rte_delay_ms(1);
4968                                 txdctl = IXGBE_READ_REG(hw,
4969                                         IXGBE_TXDCTL(txq->reg_idx));
4970                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4971                         if (!poll_ms)
4972                                 PMD_INIT_LOG(ERR, "Could not enable "
4973                                              "Tx Queue %d", tx_queue_id);
4974                 }
4975                 rte_wmb();
4976                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4977                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4978                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4979         } else
4980                 return -1;
4981
4982         return 0;
4983 }
4984
4985 /*
4986  * Stop Transmit Units for specified queue.
4987  */
4988 int __attribute__((cold))
4989 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4990 {
4991         struct ixgbe_hw     *hw;
4992         struct ixgbe_tx_queue *txq;
4993         uint32_t txdctl;
4994         uint32_t txtdh, txtdt;
4995         int poll_ms;
4996
4997         PMD_INIT_FUNC_TRACE();
4998         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4999
5000         if (tx_queue_id >= dev->data->nb_tx_queues)
5001                 return -1;
5002
5003         txq = dev->data->tx_queues[tx_queue_id];
5004
5005         /* Wait until TX queue is empty */
5006         if (hw->mac.type == ixgbe_mac_82599EB) {
5007                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5008                 do {
5009                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5010                         txtdh = IXGBE_READ_REG(hw,
5011                                                IXGBE_TDH(txq->reg_idx));
5012                         txtdt = IXGBE_READ_REG(hw,
5013                                                IXGBE_TDT(txq->reg_idx));
5014                 } while (--poll_ms && (txtdh != txtdt));
5015                 if (!poll_ms)
5016                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5017                                      "when stopping.", tx_queue_id);
5018         }
5019
5020         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5021         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5022         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5023
5024         /* Wait until TX Enable bit clear */
5025         if (hw->mac.type == ixgbe_mac_82599EB) {
5026                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5027                 do {
5028                         rte_delay_ms(1);
5029                         txdctl = IXGBE_READ_REG(hw,
5030                                                 IXGBE_TXDCTL(txq->reg_idx));
5031                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5032                 if (!poll_ms)
5033                         PMD_INIT_LOG(ERR, "Could not disable "
5034                                      "Tx Queue %d", tx_queue_id);
5035         }
5036
5037         if (txq->ops != NULL) {
5038                 txq->ops->release_mbufs(txq);
5039                 txq->ops->reset(txq);
5040         }
5041         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5042
5043         return 0;
5044 }
5045
5046 void
5047 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5048         struct rte_eth_rxq_info *qinfo)
5049 {
5050         struct ixgbe_rx_queue *rxq;
5051
5052         rxq = dev->data->rx_queues[queue_id];
5053
5054         qinfo->mp = rxq->mb_pool;
5055         qinfo->scattered_rx = dev->data->scattered_rx;
5056         qinfo->nb_desc = rxq->nb_rx_desc;
5057
5058         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5059         qinfo->conf.rx_drop_en = rxq->drop_en;
5060         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5061 }
5062
5063 void
5064 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5065         struct rte_eth_txq_info *qinfo)
5066 {
5067         struct ixgbe_tx_queue *txq;
5068
5069         txq = dev->data->tx_queues[queue_id];
5070
5071         qinfo->nb_desc = txq->nb_tx_desc;
5072
5073         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5074         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5075         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5076
5077         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5078         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5079         qinfo->conf.txq_flags = txq->txq_flags;
5080         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5081 }
5082
5083 /*
5084  * [VF] Initializes Receive Unit.
5085  */
5086 int __attribute__((cold))
5087 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5088 {
5089         struct ixgbe_hw     *hw;
5090         struct ixgbe_rx_queue *rxq;
5091         uint64_t bus_addr;
5092         uint32_t srrctl, psrtype = 0;
5093         uint16_t buf_size;
5094         uint16_t i;
5095         int ret;
5096
5097         PMD_INIT_FUNC_TRACE();
5098         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5099
5100         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5101                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5102                         "it should be power of 2");
5103                 return -1;
5104         }
5105
5106         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5107                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5108                         "it should be equal to or less than %d",
5109                         hw->mac.max_rx_queues);
5110                 return -1;
5111         }
5112
5113         /*
5114          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5115          * disables the VF receipt of packets if the PF MTU is > 1500.
5116          * This is done to deal with 82599 limitations that imposes
5117          * the PF and all VFs to share the same MTU.
5118          * Then, the PF driver enables again the VF receipt of packet when
5119          * the VF driver issues a IXGBE_VF_SET_LPE request.
5120          * In the meantime, the VF device cannot be used, even if the VF driver
5121          * and the Guest VM network stack are ready to accept packets with a
5122          * size up to the PF MTU.
5123          * As a work-around to this PF behaviour, force the call to
5124          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5125          * VF packets received can work in all cases.
5126          */
5127         ixgbevf_rlpml_set_vf(hw,
5128                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5129
5130         /* Setup RX queues */
5131         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5132                 rxq = dev->data->rx_queues[i];
5133
5134                 /* Allocate buffers for descriptor rings */
5135                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5136                 if (ret)
5137                         return ret;
5138
5139                 /* Setup the Base and Length of the Rx Descriptor Rings */
5140                 bus_addr = rxq->rx_ring_phys_addr;
5141
5142                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5143                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5144                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5145                                 (uint32_t)(bus_addr >> 32));
5146                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5147                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5148                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5149                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5150
5151
5152                 /* Configure the SRRCTL register */
5153 #ifdef RTE_HEADER_SPLIT_ENABLE
5154                 /*
5155                  * Configure Header Split
5156                  */
5157                 if (dev->data->dev_conf.rxmode.header_split) {
5158                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5159                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5160                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5161                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5162                 } else
5163 #endif
5164                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5165
5166                 /* Set if packets are dropped when no descriptors available */
5167                 if (rxq->drop_en)
5168                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5169
5170                 /*
5171                  * Configure the RX buffer size in the BSIZEPACKET field of
5172                  * the SRRCTL register of the queue.
5173                  * The value is in 1 KB resolution. Valid values can be from
5174                  * 1 KB to 16 KB.
5175                  */
5176                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5177                         RTE_PKTMBUF_HEADROOM);
5178                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5179                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5180
5181                 /*
5182                  * VF modification to write virtual function SRRCTL register
5183                  */
5184                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5185
5186                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5187                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5188
5189                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5190                     /* It adds dual VLAN length for supporting dual VLAN */
5191                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5192                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5193                         if (!dev->data->scattered_rx)
5194                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5195                         dev->data->scattered_rx = 1;
5196                 }
5197         }
5198
5199 #ifdef RTE_HEADER_SPLIT_ENABLE
5200         if (dev->data->dev_conf.rxmode.header_split)
5201                 /* Must setup the PSRTYPE register */
5202                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5203                         IXGBE_PSRTYPE_UDPHDR   |
5204                         IXGBE_PSRTYPE_IPV4HDR  |
5205                         IXGBE_PSRTYPE_IPV6HDR;
5206 #endif
5207
5208         /* Set RQPL for VF RSS according to max Rx queue */
5209         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5210                 IXGBE_PSRTYPE_RQPL_SHIFT;
5211         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5212
5213         ixgbe_set_rx_function(dev);
5214
5215         return 0;
5216 }
5217
5218 /*
5219  * [VF] Initializes Transmit Unit.
5220  */
5221 void __attribute__((cold))
5222 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5223 {
5224         struct ixgbe_hw     *hw;
5225         struct ixgbe_tx_queue *txq;
5226         uint64_t bus_addr;
5227         uint32_t txctrl;
5228         uint16_t i;
5229
5230         PMD_INIT_FUNC_TRACE();
5231         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5232
5233         /* Setup the Base and Length of the Tx Descriptor Rings */
5234         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5235                 txq = dev->data->tx_queues[i];
5236                 bus_addr = txq->tx_ring_phys_addr;
5237                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5238                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5239                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5240                                 (uint32_t)(bus_addr >> 32));
5241                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5242                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5243                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5244                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5245                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5246
5247                 /*
5248                  * Disable Tx Head Writeback RO bit, since this hoses
5249                  * bookkeeping if things aren't delivered in order.
5250                  */
5251                 txctrl = IXGBE_READ_REG(hw,
5252                                 IXGBE_VFDCA_TXCTRL(i));
5253                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5254                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5255                                 txctrl);
5256         }
5257 }
5258
5259 /*
5260  * [VF] Start Transmit and Receive Units.
5261  */
5262 void __attribute__((cold))
5263 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5264 {
5265         struct ixgbe_hw     *hw;
5266         struct ixgbe_tx_queue *txq;
5267         struct ixgbe_rx_queue *rxq;
5268         uint32_t txdctl;
5269         uint32_t rxdctl;
5270         uint16_t i;
5271         int poll_ms;
5272
5273         PMD_INIT_FUNC_TRACE();
5274         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5275
5276         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5277                 txq = dev->data->tx_queues[i];
5278                 /* Setup Transmit Threshold Registers */
5279                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5280                 txdctl |= txq->pthresh & 0x7F;
5281                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5282                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5283                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5284         }
5285
5286         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5287
5288                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5289                 txdctl |= IXGBE_TXDCTL_ENABLE;
5290                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5291
5292                 poll_ms = 10;
5293                 /* Wait until TX Enable ready */
5294                 do {
5295                         rte_delay_ms(1);
5296                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5297                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5298                 if (!poll_ms)
5299                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5300         }
5301         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5302
5303                 rxq = dev->data->rx_queues[i];
5304
5305                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5306                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5307                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5308
5309                 /* Wait until RX Enable ready */
5310                 poll_ms = 10;
5311                 do {
5312                         rte_delay_ms(1);
5313                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5314                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5315                 if (!poll_ms)
5316                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5317                 rte_wmb();
5318                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5319
5320         }
5321 }
5322
5323 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5324 int __attribute__((weak))
5325 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5326 {
5327         return -1;
5328 }
5329
5330 uint16_t __attribute__((weak))
5331 ixgbe_recv_pkts_vec(
5332         void __rte_unused *rx_queue,
5333         struct rte_mbuf __rte_unused **rx_pkts,
5334         uint16_t __rte_unused nb_pkts)
5335 {
5336         return 0;
5337 }
5338
5339 uint16_t __attribute__((weak))
5340 ixgbe_recv_scattered_pkts_vec(
5341         void __rte_unused *rx_queue,
5342         struct rte_mbuf __rte_unused **rx_pkts,
5343         uint16_t __rte_unused nb_pkts)
5344 {
5345         return 0;
5346 }
5347
5348 int __attribute__((weak))
5349 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5350 {
5351         return -1;
5352 }