New upstream version 16.11.4
[deb_dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73
74 #include "ixgbe_logs.h"
75 #include "base/ixgbe_api.h"
76 #include "base/ixgbe_vf.h"
77 #include "ixgbe_ethdev.h"
78 #include "base/ixgbe_dcb.h"
79 #include "base/ixgbe_common.h"
80 #include "ixgbe_rxtx.h"
81
82 /* Bit Mask to indicate what bits required for building TX context */
83 #define IXGBE_TX_OFFLOAD_MASK (                  \
84                 PKT_TX_VLAN_PKT |                \
85                 PKT_TX_IP_CKSUM |                \
86                 PKT_TX_L4_MASK |                 \
87                 PKT_TX_TCP_SEG |                 \
88                 PKT_TX_OUTER_IP_CKSUM)
89
90 #if 1
91 #define RTE_PMD_USE_PREFETCH
92 #endif
93
94 #ifdef RTE_PMD_USE_PREFETCH
95 /*
96  * Prefetch a cache line into all cache levels.
97  */
98 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
99 #else
100 #define rte_ixgbe_prefetch(p)   do {} while (0)
101 #endif
102
103 /*********************************************************************
104  *
105  *  TX functions
106  *
107  **********************************************************************/
108
109 /*
110  * Check for descriptors with their DD bit set and free mbufs.
111  * Return the total number of buffers freed.
112  */
113 static inline int __attribute__((always_inline))
114 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
115 {
116         struct ixgbe_tx_entry *txep;
117         uint32_t status;
118         int i, nb_free = 0;
119         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
120
121         /* check DD bit on threshold descriptor */
122         status = txq->tx_ring[txq->tx_next_dd].wb.status;
123         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
124                 return 0;
125
126         /*
127          * first buffer to free from S/W ring is at index
128          * tx_next_dd - (tx_rs_thresh-1)
129          */
130         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
131
132         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
133                 /* free buffers one at a time */
134                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
135                 txep->mbuf = NULL;
136
137                 if (unlikely(m == NULL))
138                         continue;
139
140                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
141                     (nb_free > 0 && m->pool != free[0]->pool)) {
142                         rte_mempool_put_bulk(free[0]->pool,
143                                              (void **)free, nb_free);
144                         nb_free = 0;
145                 }
146
147                 free[nb_free++] = m;
148         }
149
150         if (nb_free > 0)
151                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
152
153         /* buffers were freed, update counters */
154         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
155         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
156         if (txq->tx_next_dd >= txq->nb_tx_desc)
157                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
158
159         return txq->tx_rs_thresh;
160 }
161
162 /* Populate 4 descriptors with data from 4 mbufs */
163 static inline void
164 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
165 {
166         uint64_t buf_dma_addr;
167         uint32_t pkt_len;
168         int i;
169
170         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
171                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
172                 pkt_len = (*pkts)->data_len;
173
174                 /* write data to descriptor */
175                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
176
177                 txdp->read.cmd_type_len =
178                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
179
180                 txdp->read.olinfo_status =
181                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
182
183                 rte_prefetch0(&(*pkts)->pool);
184         }
185 }
186
187 /* Populate 1 descriptor with data from 1 mbuf */
188 static inline void
189 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
190 {
191         uint64_t buf_dma_addr;
192         uint32_t pkt_len;
193
194         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
195         pkt_len = (*pkts)->data_len;
196
197         /* write data to descriptor */
198         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
199         txdp->read.cmd_type_len =
200                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
201         txdp->read.olinfo_status =
202                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
203         rte_prefetch0(&(*pkts)->pool);
204 }
205
206 /*
207  * Fill H/W descriptor ring with mbuf data.
208  * Copy mbuf pointers to the S/W ring.
209  */
210 static inline void
211 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
212                       uint16_t nb_pkts)
213 {
214         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
215         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
216         const int N_PER_LOOP = 4;
217         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
218         int mainpart, leftover;
219         int i, j;
220
221         /*
222          * Process most of the packets in chunks of N pkts.  Any
223          * leftover packets will get processed one at a time.
224          */
225         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
226         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
227         for (i = 0; i < mainpart; i += N_PER_LOOP) {
228                 /* Copy N mbuf pointers to the S/W ring */
229                 for (j = 0; j < N_PER_LOOP; ++j) {
230                         (txep + i + j)->mbuf = *(pkts + i + j);
231                 }
232                 tx4(txdp + i, pkts + i);
233         }
234
235         if (unlikely(leftover > 0)) {
236                 for (i = 0; i < leftover; ++i) {
237                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
238                         tx1(txdp + mainpart + i, pkts + mainpart + i);
239                 }
240         }
241 }
242
243 static inline uint16_t
244 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
245              uint16_t nb_pkts)
246 {
247         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
248         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
249         uint16_t n = 0;
250
251         /*
252          * Begin scanning the H/W ring for done descriptors when the
253          * number of available descriptors drops below tx_free_thresh.  For
254          * each done descriptor, free the associated buffer.
255          */
256         if (txq->nb_tx_free < txq->tx_free_thresh)
257                 ixgbe_tx_free_bufs(txq);
258
259         /* Only use descriptors that are available */
260         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
261         if (unlikely(nb_pkts == 0))
262                 return 0;
263
264         /* Use exactly nb_pkts descriptors */
265         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
266
267         /*
268          * At this point, we know there are enough descriptors in the
269          * ring to transmit all the packets.  This assumes that each
270          * mbuf contains a single segment, and that no new offloads
271          * are expected, which would require a new context descriptor.
272          */
273
274         /*
275          * See if we're going to wrap-around. If so, handle the top
276          * of the descriptor ring first, then do the bottom.  If not,
277          * the processing looks just like the "bottom" part anyway...
278          */
279         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
280                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
281                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
282
283                 /*
284                  * We know that the last descriptor in the ring will need to
285                  * have its RS bit set because tx_rs_thresh has to be
286                  * a divisor of the ring size
287                  */
288                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
289                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
290                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
291
292                 txq->tx_tail = 0;
293         }
294
295         /* Fill H/W descriptor ring with mbuf data */
296         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
297         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
298
299         /*
300          * Determine if RS bit should be set
301          * This is what we actually want:
302          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
303          * but instead of subtracting 1 and doing >=, we can just do
304          * greater than without subtracting.
305          */
306         if (txq->tx_tail > txq->tx_next_rs) {
307                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
308                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
309                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
310                                                 txq->tx_rs_thresh);
311                 if (txq->tx_next_rs >= txq->nb_tx_desc)
312                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
313         }
314
315         /*
316          * Check for wrap-around. This would only happen if we used
317          * up to the last descriptor in the ring, no more, no less.
318          */
319         if (txq->tx_tail >= txq->nb_tx_desc)
320                 txq->tx_tail = 0;
321
322         /* update tail pointer */
323         rte_wmb();
324         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
325
326         return nb_pkts;
327 }
328
329 uint16_t
330 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
331                        uint16_t nb_pkts)
332 {
333         uint16_t nb_tx;
334
335         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
336         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
337                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
338
339         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
340         nb_tx = 0;
341         while (nb_pkts) {
342                 uint16_t ret, n;
343
344                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
345                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
346                 nb_tx = (uint16_t)(nb_tx + ret);
347                 nb_pkts = (uint16_t)(nb_pkts - ret);
348                 if (ret < n)
349                         break;
350         }
351
352         return nb_tx;
353 }
354
355 static inline void
356 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
357                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
358                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
359 {
360         uint32_t type_tucmd_mlhl;
361         uint32_t mss_l4len_idx = 0;
362         uint32_t ctx_idx;
363         uint32_t vlan_macip_lens;
364         union ixgbe_tx_offload tx_offload_mask;
365         uint32_t seqnum_seed = 0;
366
367         ctx_idx = txq->ctx_curr;
368         tx_offload_mask.data[0] = 0;
369         tx_offload_mask.data[1] = 0;
370         type_tucmd_mlhl = 0;
371
372         /* Specify which HW CTX to upload. */
373         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
374
375         if (ol_flags & PKT_TX_VLAN_PKT) {
376                 tx_offload_mask.vlan_tci |= ~0;
377         }
378
379         /* check if TCP segmentation required for this packet */
380         if (ol_flags & PKT_TX_TCP_SEG) {
381                 /* implies IP cksum in IPv4 */
382                 if (ol_flags & PKT_TX_IP_CKSUM)
383                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
384                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
385                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
386                 else
387                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
388                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
390
391                 tx_offload_mask.l2_len |= ~0;
392                 tx_offload_mask.l3_len |= ~0;
393                 tx_offload_mask.l4_len |= ~0;
394                 tx_offload_mask.tso_segsz |= ~0;
395                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
396                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
397         } else { /* no TSO, check if hardware checksum is needed */
398                 if (ol_flags & PKT_TX_IP_CKSUM) {
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
400                         tx_offload_mask.l2_len |= ~0;
401                         tx_offload_mask.l3_len |= ~0;
402                 }
403
404                 switch (ol_flags & PKT_TX_L4_MASK) {
405                 case PKT_TX_UDP_CKSUM:
406                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
407                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
408                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
409                         tx_offload_mask.l2_len |= ~0;
410                         tx_offload_mask.l3_len |= ~0;
411                         break;
412                 case PKT_TX_TCP_CKSUM:
413                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
414                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
415                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
416                         tx_offload_mask.l2_len |= ~0;
417                         tx_offload_mask.l3_len |= ~0;
418                         break;
419                 case PKT_TX_SCTP_CKSUM:
420                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
421                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
422                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
423                         tx_offload_mask.l2_len |= ~0;
424                         tx_offload_mask.l3_len |= ~0;
425                         break;
426                 default:
427                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
428                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
429                         break;
430                 }
431         }
432
433         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
434                 tx_offload_mask.outer_l2_len |= ~0;
435                 tx_offload_mask.outer_l3_len |= ~0;
436                 tx_offload_mask.l2_len |= ~0;
437                 seqnum_seed |= tx_offload.outer_l3_len
438                                << IXGBE_ADVTXD_OUTER_IPLEN;
439                 seqnum_seed |= tx_offload.l2_len
440                                << IXGBE_ADVTXD_TUNNEL_LEN;
441         }
442
443         txq->ctx_cache[ctx_idx].flags = ol_flags;
444         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
445                 tx_offload_mask.data[0] & tx_offload.data[0];
446         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
447                 tx_offload_mask.data[1] & tx_offload.data[1];
448         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
449
450         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
451         vlan_macip_lens = tx_offload.l3_len;
452         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
453                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
454                                     IXGBE_ADVTXD_MACLEN_SHIFT);
455         else
456                 vlan_macip_lens |= (tx_offload.l2_len <<
457                                     IXGBE_ADVTXD_MACLEN_SHIFT);
458         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
459         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
460         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
461         ctx_txd->seqnum_seed     = seqnum_seed;
462 }
463
464 /*
465  * Check which hardware context can be used. Use the existing match
466  * or create a new context descriptor.
467  */
468 static inline uint32_t
469 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
470                    union ixgbe_tx_offload tx_offload)
471 {
472         /* If match with the current used context */
473         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
474                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
475                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
476                      & tx_offload.data[0])) &&
477                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
478                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
479                      & tx_offload.data[1]))))
480                 return txq->ctx_curr;
481
482         /* What if match with the next context  */
483         txq->ctx_curr ^= 1;
484         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
485                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
486                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
487                      & tx_offload.data[0])) &&
488                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
489                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
490                      & tx_offload.data[1]))))
491                 return txq->ctx_curr;
492
493         /* Mismatch, use the previous context */
494         return IXGBE_CTX_NUM;
495 }
496
497 static inline uint32_t
498 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
499 {
500         uint32_t tmp = 0;
501
502         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
503                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
504         if (ol_flags & PKT_TX_IP_CKSUM)
505                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
506         if (ol_flags & PKT_TX_TCP_SEG)
507                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
508         return tmp;
509 }
510
511 static inline uint32_t
512 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
513 {
514         uint32_t cmdtype = 0;
515
516         if (ol_flags & PKT_TX_VLAN_PKT)
517                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
518         if (ol_flags & PKT_TX_TCP_SEG)
519                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
520         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
521                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
522         return cmdtype;
523 }
524
525 /* Default RS bit threshold values */
526 #ifndef DEFAULT_TX_RS_THRESH
527 #define DEFAULT_TX_RS_THRESH   32
528 #endif
529 #ifndef DEFAULT_TX_FREE_THRESH
530 #define DEFAULT_TX_FREE_THRESH 32
531 #endif
532
533 /* Reset transmit descriptors after they have been used */
534 static inline int
535 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
536 {
537         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
538         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
539         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
540         uint16_t nb_tx_desc = txq->nb_tx_desc;
541         uint16_t desc_to_clean_to;
542         uint16_t nb_tx_to_clean;
543         uint32_t status;
544
545         /* Determine the last descriptor needing to be cleaned */
546         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
547         if (desc_to_clean_to >= nb_tx_desc)
548                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
549
550         /* Check to make sure the last descriptor to clean is done */
551         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
552         status = txr[desc_to_clean_to].wb.status;
553         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
554                 PMD_TX_FREE_LOG(DEBUG,
555                                 "TX descriptor %4u is not done"
556                                 "(port=%d queue=%d)",
557                                 desc_to_clean_to,
558                                 txq->port_id, txq->queue_id);
559                 /* Failed to clean any descriptors, better luck next time */
560                 return -(1);
561         }
562
563         /* Figure out how many descriptors will be cleaned */
564         if (last_desc_cleaned > desc_to_clean_to)
565                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
566                                                         desc_to_clean_to);
567         else
568                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
569                                                 last_desc_cleaned);
570
571         PMD_TX_FREE_LOG(DEBUG,
572                         "Cleaning %4u TX descriptors: %4u to %4u "
573                         "(port=%d queue=%d)",
574                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
575                         txq->port_id, txq->queue_id);
576
577         /*
578          * The last descriptor to clean is done, so that means all the
579          * descriptors from the last descriptor that was cleaned
580          * up to the last descriptor with the RS bit set
581          * are done. Only reset the threshold descriptor.
582          */
583         txr[desc_to_clean_to].wb.status = 0;
584
585         /* Update the txq to reflect the last descriptor that was cleaned */
586         txq->last_desc_cleaned = desc_to_clean_to;
587         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
588
589         /* No Error */
590         return 0;
591 }
592
593 uint16_t
594 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
595                 uint16_t nb_pkts)
596 {
597         struct ixgbe_tx_queue *txq;
598         struct ixgbe_tx_entry *sw_ring;
599         struct ixgbe_tx_entry *txe, *txn;
600         volatile union ixgbe_adv_tx_desc *txr;
601         volatile union ixgbe_adv_tx_desc *txd, *txp;
602         struct rte_mbuf     *tx_pkt;
603         struct rte_mbuf     *m_seg;
604         uint64_t buf_dma_addr;
605         uint32_t olinfo_status;
606         uint32_t cmd_type_len;
607         uint32_t pkt_len;
608         uint16_t slen;
609         uint64_t ol_flags;
610         uint16_t tx_id;
611         uint16_t tx_last;
612         uint16_t nb_tx;
613         uint16_t nb_used;
614         uint64_t tx_ol_req;
615         uint32_t ctx = 0;
616         uint32_t new_ctx;
617         union ixgbe_tx_offload tx_offload;
618
619         tx_offload.data[0] = 0;
620         tx_offload.data[1] = 0;
621         txq = tx_queue;
622         sw_ring = txq->sw_ring;
623         txr     = txq->tx_ring;
624         tx_id   = txq->tx_tail;
625         txe = &sw_ring[tx_id];
626         txp = NULL;
627
628         /* Determine if the descriptor ring needs to be cleaned. */
629         if (txq->nb_tx_free < txq->tx_free_thresh)
630                 ixgbe_xmit_cleanup(txq);
631
632         rte_prefetch0(&txe->mbuf->pool);
633
634         /* TX loop */
635         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
636                 new_ctx = 0;
637                 tx_pkt = *tx_pkts++;
638                 pkt_len = tx_pkt->pkt_len;
639
640                 /*
641                  * Determine how many (if any) context descriptors
642                  * are needed for offload functionality.
643                  */
644                 ol_flags = tx_pkt->ol_flags;
645
646                 /* If hardware offload required */
647                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
648                 if (tx_ol_req) {
649                         tx_offload.l2_len = tx_pkt->l2_len;
650                         tx_offload.l3_len = tx_pkt->l3_len;
651                         tx_offload.l4_len = tx_pkt->l4_len;
652                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
653                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
654                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
655                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
656
657                         /* If new context need be built or reuse the exist ctx. */
658                         ctx = what_advctx_update(txq, tx_ol_req,
659                                 tx_offload);
660                         /* Only allocate context descriptor if required*/
661                         new_ctx = (ctx == IXGBE_CTX_NUM);
662                         ctx = txq->ctx_curr;
663                 }
664
665                 /*
666                  * Keep track of how many descriptors are used this loop
667                  * This will always be the number of segments + the number of
668                  * Context descriptors required to transmit the packet
669                  */
670                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
671
672                 if (txp != NULL &&
673                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
674                         /* set RS on the previous packet in the burst */
675                         txp->read.cmd_type_len |=
676                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
677
678                 /*
679                  * The number of descriptors that must be allocated for a
680                  * packet is the number of segments of that packet, plus 1
681                  * Context Descriptor for the hardware offload, if any.
682                  * Determine the last TX descriptor to allocate in the TX ring
683                  * for the packet, starting from the current position (tx_id)
684                  * in the ring.
685                  */
686                 tx_last = (uint16_t) (tx_id + nb_used - 1);
687
688                 /* Circular ring */
689                 if (tx_last >= txq->nb_tx_desc)
690                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
691
692                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
693                            " tx_first=%u tx_last=%u",
694                            (unsigned) txq->port_id,
695                            (unsigned) txq->queue_id,
696                            (unsigned) pkt_len,
697                            (unsigned) tx_id,
698                            (unsigned) tx_last);
699
700                 /*
701                  * Make sure there are enough TX descriptors available to
702                  * transmit the entire packet.
703                  * nb_used better be less than or equal to txq->tx_rs_thresh
704                  */
705                 if (nb_used > txq->nb_tx_free) {
706                         PMD_TX_FREE_LOG(DEBUG,
707                                         "Not enough free TX descriptors "
708                                         "nb_used=%4u nb_free=%4u "
709                                         "(port=%d queue=%d)",
710                                         nb_used, txq->nb_tx_free,
711                                         txq->port_id, txq->queue_id);
712
713                         if (ixgbe_xmit_cleanup(txq) != 0) {
714                                 /* Could not clean any descriptors */
715                                 if (nb_tx == 0)
716                                         return 0;
717                                 goto end_of_tx;
718                         }
719
720                         /* nb_used better be <= txq->tx_rs_thresh */
721                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
722                                 PMD_TX_FREE_LOG(DEBUG,
723                                         "The number of descriptors needed to "
724                                         "transmit the packet exceeds the "
725                                         "RS bit threshold. This will impact "
726                                         "performance."
727                                         "nb_used=%4u nb_free=%4u "
728                                         "tx_rs_thresh=%4u. "
729                                         "(port=%d queue=%d)",
730                                         nb_used, txq->nb_tx_free,
731                                         txq->tx_rs_thresh,
732                                         txq->port_id, txq->queue_id);
733                                 /*
734                                  * Loop here until there are enough TX
735                                  * descriptors or until the ring cannot be
736                                  * cleaned.
737                                  */
738                                 while (nb_used > txq->nb_tx_free) {
739                                         if (ixgbe_xmit_cleanup(txq) != 0) {
740                                                 /*
741                                                  * Could not clean any
742                                                  * descriptors
743                                                  */
744                                                 if (nb_tx == 0)
745                                                         return 0;
746                                                 goto end_of_tx;
747                                         }
748                                 }
749                         }
750                 }
751
752                 /*
753                  * By now there are enough free TX descriptors to transmit
754                  * the packet.
755                  */
756
757                 /*
758                  * Set common flags of all TX Data Descriptors.
759                  *
760                  * The following bits must be set in all Data Descriptors:
761                  *   - IXGBE_ADVTXD_DTYP_DATA
762                  *   - IXGBE_ADVTXD_DCMD_DEXT
763                  *
764                  * The following bits must be set in the first Data Descriptor
765                  * and are ignored in the other ones:
766                  *   - IXGBE_ADVTXD_DCMD_IFCS
767                  *   - IXGBE_ADVTXD_MAC_1588
768                  *   - IXGBE_ADVTXD_DCMD_VLE
769                  *
770                  * The following bits must only be set in the last Data
771                  * Descriptor:
772                  *   - IXGBE_TXD_CMD_EOP
773                  *
774                  * The following bits can be set in any Data Descriptor, but
775                  * are only set in the last Data Descriptor:
776                  *   - IXGBE_TXD_CMD_RS
777                  */
778                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
779                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
780
781 #ifdef RTE_LIBRTE_IEEE1588
782                 if (ol_flags & PKT_TX_IEEE1588_TMST)
783                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
784 #endif
785
786                 olinfo_status = 0;
787                 if (tx_ol_req) {
788
789                         if (ol_flags & PKT_TX_TCP_SEG) {
790                                 /* when TSO is on, paylen in descriptor is the
791                                  * not the packet len but the tcp payload len */
792                                 pkt_len -= (tx_offload.l2_len +
793                                         tx_offload.l3_len + tx_offload.l4_len);
794                         }
795
796                         /*
797                          * Setup the TX Advanced Context Descriptor if required
798                          */
799                         if (new_ctx) {
800                                 volatile struct ixgbe_adv_tx_context_desc *
801                                     ctx_txd;
802
803                                 ctx_txd = (volatile struct
804                                     ixgbe_adv_tx_context_desc *)
805                                     &txr[tx_id];
806
807                                 txn = &sw_ring[txe->next_id];
808                                 rte_prefetch0(&txn->mbuf->pool);
809
810                                 if (txe->mbuf != NULL) {
811                                         rte_pktmbuf_free_seg(txe->mbuf);
812                                         txe->mbuf = NULL;
813                                 }
814
815                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
816                                         tx_offload);
817
818                                 txe->last_id = tx_last;
819                                 tx_id = txe->next_id;
820                                 txe = txn;
821                         }
822
823                         /*
824                          * Setup the TX Advanced Data Descriptor,
825                          * This path will go through
826                          * whatever new/reuse the context descriptor
827                          */
828                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
829                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
830                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
831                 }
832
833                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
834
835                 m_seg = tx_pkt;
836                 do {
837                         txd = &txr[tx_id];
838                         txn = &sw_ring[txe->next_id];
839                         rte_prefetch0(&txn->mbuf->pool);
840
841                         if (txe->mbuf != NULL)
842                                 rte_pktmbuf_free_seg(txe->mbuf);
843                         txe->mbuf = m_seg;
844
845                         /*
846                          * Set up Transmit Data Descriptor.
847                          */
848                         slen = m_seg->data_len;
849                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
850                         txd->read.buffer_addr =
851                                 rte_cpu_to_le_64(buf_dma_addr);
852                         txd->read.cmd_type_len =
853                                 rte_cpu_to_le_32(cmd_type_len | slen);
854                         txd->read.olinfo_status =
855                                 rte_cpu_to_le_32(olinfo_status);
856                         txe->last_id = tx_last;
857                         tx_id = txe->next_id;
858                         txe = txn;
859                         m_seg = m_seg->next;
860                 } while (m_seg != NULL);
861
862                 /*
863                  * The last packet data descriptor needs End Of Packet (EOP)
864                  */
865                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
866                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
867                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
868
869                 /* Set RS bit only on threshold packets' last descriptor */
870                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
871                         PMD_TX_FREE_LOG(DEBUG,
872                                         "Setting RS bit on TXD id="
873                                         "%4u (port=%d queue=%d)",
874                                         tx_last, txq->port_id, txq->queue_id);
875
876                         cmd_type_len |= IXGBE_TXD_CMD_RS;
877
878                         /* Update txq RS bit counters */
879                         txq->nb_tx_used = 0;
880                         txp = NULL;
881                 } else
882                         txp = txd;
883
884                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
885         }
886
887 end_of_tx:
888         /* set RS on last packet in the burst */
889         if (txp != NULL)
890                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
891
892         rte_wmb();
893
894         /*
895          * Set the Transmit Descriptor Tail (TDT)
896          */
897         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
898                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
899                    (unsigned) tx_id, (unsigned) nb_tx);
900         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
901         txq->tx_tail = tx_id;
902
903         return nb_tx;
904 }
905
906 /*********************************************************************
907  *
908  *  RX functions
909  *
910  **********************************************************************/
911
912 #define IXGBE_PACKET_TYPE_ETHER                         0X00
913 #define IXGBE_PACKET_TYPE_IPV4                          0X01
914 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
915 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
916 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
917 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
918 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
919 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
920 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
921 #define IXGBE_PACKET_TYPE_IPV6                          0X04
922 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
923 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
924 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
925 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
926 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
927 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
928 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
929 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
930 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
931 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
932 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
933 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
934 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
935 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
936 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
937 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
938 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
939 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
940 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
941 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
942 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
943 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
944 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
945
946 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
947 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
948 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
949 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
950 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
951 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
952 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
953 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
954 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
955 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
956 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
957 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
958 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
959 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
960 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
961 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
962 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
963 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
964 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
965 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
966 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
967 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
968 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
969
970 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
971 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
972 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
973 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
974 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
975 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
976 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
977 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
978 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
979 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
980 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
981 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
982 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
983 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
984 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
985 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
986 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
987 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
988 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
989 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
990 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
991 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
992 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
993
994 #define IXGBE_PACKET_TYPE_MAX               0X80
995 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
996 #define IXGBE_PACKET_TYPE_SHIFT             0X04
997
998 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
999 static inline uint32_t
1000 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1001 {
1002         /**
1003          * Use 2 different table for normal packet and tunnel packet
1004          * to save the space.
1005          */
1006         static const uint32_t
1007                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1008                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1009                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1010                         RTE_PTYPE_L3_IPV4,
1011                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1012                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1013                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1014                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1015                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1016                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1017                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1018                         RTE_PTYPE_L3_IPV4_EXT,
1019                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1020                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1021                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1022                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1023                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1024                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1025                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1026                         RTE_PTYPE_L3_IPV6,
1027                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1028                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1029                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1030                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1031                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1032                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1033                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1034                         RTE_PTYPE_L3_IPV6_EXT,
1035                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1036                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1037                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1038                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1039                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1040                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1041                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1042                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1043                         RTE_PTYPE_INNER_L3_IPV6,
1044                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1045                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1046                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1047                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1048                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1049                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1050                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1051                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1052                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1053                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1054                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1055                         RTE_PTYPE_INNER_L3_IPV6,
1056                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1057                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1058                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1059                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1060                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1061                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1062                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1063                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1064                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1065                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1066                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1067                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1068                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1069                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1070                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1071                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1072                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1073                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1074                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1075                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1076                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1077                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1078                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1079                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1080                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1081                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1082                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1083                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1084                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1085                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1086                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1087                         RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1089                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1090         };
1091
1092         static const uint32_t
1093                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1094                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1095                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1096                         RTE_PTYPE_INNER_L2_ETHER,
1097                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1098                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1099                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1100                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1101                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1102                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1103                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1104                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1105                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1106                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1107                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1108                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1109                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1110                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1111                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1112                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1113                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1114                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1115                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1116                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1117                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1118                         RTE_PTYPE_INNER_L4_TCP,
1119                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1120                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1121                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1122                         RTE_PTYPE_INNER_L4_TCP,
1123                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1125                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1126                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1128                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1129                         RTE_PTYPE_INNER_L4_TCP,
1130                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1131                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1132                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1133                         RTE_PTYPE_INNER_L3_IPV4,
1134                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1135                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1136                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1137                         RTE_PTYPE_INNER_L4_UDP,
1138                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1140                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1141                         RTE_PTYPE_INNER_L4_UDP,
1142                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1143                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1144                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1145                         RTE_PTYPE_INNER_L4_SCTP,
1146                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1147                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1148                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1149                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1150                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1151                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1152                         RTE_PTYPE_INNER_L4_UDP,
1153                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1154                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1155                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1156                         RTE_PTYPE_INNER_L4_SCTP,
1157                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1158                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1159                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1160                         RTE_PTYPE_INNER_L3_IPV4,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1164                         RTE_PTYPE_INNER_L4_SCTP,
1165                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1166                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1167                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1168                         RTE_PTYPE_INNER_L4_SCTP,
1169                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1170                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1171                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1172                         RTE_PTYPE_INNER_L4_TCP,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1176                         RTE_PTYPE_INNER_L4_UDP,
1177
1178                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1179                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1180                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1181                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1182                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1183                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1184                         RTE_PTYPE_INNER_L3_IPV4,
1185                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1186                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1187                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1188                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1189                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1190                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1191                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1192                         RTE_PTYPE_INNER_L3_IPV6,
1193                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1194                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1195                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1196                         RTE_PTYPE_INNER_L3_IPV4,
1197                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1198                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1199                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1200                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1201                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1202                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1203                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1204                         RTE_PTYPE_INNER_L3_IPV4,
1205                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1206                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1207                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1208                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1209                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1210                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1211                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1212                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1213                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1214                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1215                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1216                         RTE_PTYPE_INNER_L3_IPV4,
1217                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1218                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1219                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1220                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1221                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1222                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1223                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1224                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1225                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1226                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1227                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1228                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1229                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1230                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1231                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1232                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1233                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1234                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1235                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1236                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1237                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1238                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1239                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1240                         RTE_PTYPE_INNER_L3_IPV4,
1241                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1242                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1243                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1244                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1245                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1246                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1247                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1248                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1249                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1250                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1251                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1252                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1253                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1254                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1255                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1256                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1257                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1258                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1259                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1260                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1261                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1262                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1263                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1264                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1265                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1266                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1267                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1268                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1269         };
1270
1271         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1272                 return RTE_PTYPE_UNKNOWN;
1273
1274         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1275
1276         /* For tunnel packet */
1277         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1278                 /* Remove the tunnel bit to save the space. */
1279                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1280                 return ptype_table_tn[pkt_info];
1281         }
1282
1283         /**
1284          * For x550, if it's not tunnel,
1285          * tunnel type bit should be set to 0.
1286          * Reuse 82599's mask.
1287          */
1288         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1289
1290         return ptype_table[pkt_info];
1291 }
1292
1293 static inline uint64_t
1294 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1295 {
1296         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1297                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1298                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1299                 PKT_RX_RSS_HASH, 0, 0, 0,
1300                 0, 0, 0,  PKT_RX_FDIR,
1301         };
1302 #ifdef RTE_LIBRTE_IEEE1588
1303         static uint64_t ip_pkt_etqf_map[8] = {
1304                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1305                 0, 0, 0, 0,
1306         };
1307
1308         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1309                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1310                                 ip_rss_types_map[pkt_info & 0XF];
1311         else
1312                 return ip_rss_types_map[pkt_info & 0XF];
1313 #else
1314         return ip_rss_types_map[pkt_info & 0XF];
1315 #endif
1316 }
1317
1318 static inline uint64_t
1319 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1320 {
1321         uint64_t pkt_flags;
1322
1323         /*
1324          * Check if VLAN present only.
1325          * Do not check whether L3/L4 rx checksum done by NIC or not,
1326          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1327          */
1328         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1329
1330 #ifdef RTE_LIBRTE_IEEE1588
1331         if (rx_status & IXGBE_RXD_STAT_TMST)
1332                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1333 #endif
1334         return pkt_flags;
1335 }
1336
1337 static inline uint64_t
1338 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1339 {
1340         uint64_t pkt_flags;
1341
1342         /*
1343          * Bit 31: IPE, IPv4 checksum error
1344          * Bit 30: L4I, L4I integrity error
1345          */
1346         static uint64_t error_to_pkt_flags_map[4] = {
1347                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1348                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1349                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1350                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1351         };
1352         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1353                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1354
1355         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1356             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1357                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1358         }
1359
1360         return pkt_flags;
1361 }
1362
1363 /*
1364  * LOOK_AHEAD defines how many desc statuses to check beyond the
1365  * current descriptor.
1366  * It must be a pound define for optimal performance.
1367  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1368  * function only works with LOOK_AHEAD=8.
1369  */
1370 #define LOOK_AHEAD 8
1371 #if (LOOK_AHEAD != 8)
1372 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1373 #endif
1374 static inline int
1375 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1376 {
1377         volatile union ixgbe_adv_rx_desc *rxdp;
1378         struct ixgbe_rx_entry *rxep;
1379         struct rte_mbuf *mb;
1380         uint16_t pkt_len;
1381         uint64_t pkt_flags;
1382         int nb_dd;
1383         uint32_t s[LOOK_AHEAD];
1384         uint32_t pkt_info[LOOK_AHEAD];
1385         int i, j, nb_rx = 0;
1386         uint32_t status;
1387         uint64_t vlan_flags = rxq->vlan_flags;
1388
1389         /* get references to current descriptor and S/W ring entry */
1390         rxdp = &rxq->rx_ring[rxq->rx_tail];
1391         rxep = &rxq->sw_ring[rxq->rx_tail];
1392
1393         status = rxdp->wb.upper.status_error;
1394         /* check to make sure there is at least 1 packet to receive */
1395         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1396                 return 0;
1397
1398         /*
1399          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1400          * reference packets that are ready to be received.
1401          */
1402         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1403              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1404                 /* Read desc statuses backwards to avoid race condition */
1405                 for (j = 0; j < LOOK_AHEAD; j++)
1406                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1407
1408                 rte_smp_rmb();
1409
1410                 /* Compute how many status bits were set */
1411                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1412                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1413                         ;
1414
1415                 for (j = 0; j < nb_dd; j++)
1416                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1417                                                        lo_dword.data);
1418
1419                 nb_rx += nb_dd;
1420
1421                 /* Translate descriptor info to mbuf format */
1422                 for (j = 0; j < nb_dd; ++j) {
1423                         mb = rxep[j].mbuf;
1424                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1425                                   rxq->crc_len;
1426                         mb->data_len = pkt_len;
1427                         mb->pkt_len = pkt_len;
1428                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1429
1430                         /* convert descriptor fields to rte mbuf flags */
1431                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1432                                 vlan_flags);
1433                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1434                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1435                                         ((uint16_t)pkt_info[j]);
1436                         mb->ol_flags = pkt_flags;
1437                         mb->packet_type =
1438                                 ixgbe_rxd_pkt_info_to_pkt_type
1439                                         (pkt_info[j], rxq->pkt_type_mask);
1440
1441                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1442                                 mb->hash.rss = rte_le_to_cpu_32(
1443                                     rxdp[j].wb.lower.hi_dword.rss);
1444                         else if (pkt_flags & PKT_RX_FDIR) {
1445                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1446                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1447                                     IXGBE_ATR_HASH_MASK;
1448                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1449                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1450                         }
1451                 }
1452
1453                 /* Move mbuf pointers from the S/W ring to the stage */
1454                 for (j = 0; j < LOOK_AHEAD; ++j) {
1455                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1456                 }
1457
1458                 /* stop if all requested packets could not be received */
1459                 if (nb_dd != LOOK_AHEAD)
1460                         break;
1461         }
1462
1463         /* clear software ring entries so we can cleanup correctly */
1464         for (i = 0; i < nb_rx; ++i) {
1465                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1466         }
1467
1468
1469         return nb_rx;
1470 }
1471
1472 static inline int
1473 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1474 {
1475         volatile union ixgbe_adv_rx_desc *rxdp;
1476         struct ixgbe_rx_entry *rxep;
1477         struct rte_mbuf *mb;
1478         uint16_t alloc_idx;
1479         __le64 dma_addr;
1480         int diag, i;
1481
1482         /* allocate buffers in bulk directly into the S/W ring */
1483         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1484         rxep = &rxq->sw_ring[alloc_idx];
1485         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1486                                     rxq->rx_free_thresh);
1487         if (unlikely(diag != 0))
1488                 return -ENOMEM;
1489
1490         rxdp = &rxq->rx_ring[alloc_idx];
1491         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1492                 /* populate the static rte mbuf fields */
1493                 mb = rxep[i].mbuf;
1494                 if (reset_mbuf) {
1495                         mb->next = NULL;
1496                         mb->nb_segs = 1;
1497                         mb->port = rxq->port_id;
1498                 }
1499
1500                 rte_mbuf_refcnt_set(mb, 1);
1501                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1502
1503                 /* populate the descriptors */
1504                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1505                 rxdp[i].read.hdr_addr = 0;
1506                 rxdp[i].read.pkt_addr = dma_addr;
1507         }
1508
1509         /* update state of internal queue structure */
1510         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1511         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1512                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1513
1514         /* no errors */
1515         return 0;
1516 }
1517
1518 static inline uint16_t
1519 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1520                          uint16_t nb_pkts)
1521 {
1522         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1523         int i;
1524
1525         /* how many packets are ready to return? */
1526         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1527
1528         /* copy mbuf pointers to the application's packet list */
1529         for (i = 0; i < nb_pkts; ++i)
1530                 rx_pkts[i] = stage[i];
1531
1532         /* update internal queue state */
1533         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1534         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1535
1536         return nb_pkts;
1537 }
1538
1539 static inline uint16_t
1540 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1541              uint16_t nb_pkts)
1542 {
1543         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1544         uint16_t nb_rx = 0;
1545
1546         /* Any previously recv'd pkts will be returned from the Rx stage */
1547         if (rxq->rx_nb_avail)
1548                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1549
1550         /* Scan the H/W ring for packets to receive */
1551         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1552
1553         /* update internal queue state */
1554         rxq->rx_next_avail = 0;
1555         rxq->rx_nb_avail = nb_rx;
1556         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1557
1558         /* if required, allocate new buffers to replenish descriptors */
1559         if (rxq->rx_tail > rxq->rx_free_trigger) {
1560                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1561
1562                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1563                         int i, j;
1564
1565                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1566                                    "queue_id=%u", (unsigned) rxq->port_id,
1567                                    (unsigned) rxq->queue_id);
1568
1569                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1570                                 rxq->rx_free_thresh;
1571
1572                         /*
1573                          * Need to rewind any previous receives if we cannot
1574                          * allocate new buffers to replenish the old ones.
1575                          */
1576                         rxq->rx_nb_avail = 0;
1577                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1578                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1579                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1580
1581                         return 0;
1582                 }
1583
1584                 /* update tail pointer */
1585                 rte_wmb();
1586                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1587         }
1588
1589         if (rxq->rx_tail >= rxq->nb_rx_desc)
1590                 rxq->rx_tail = 0;
1591
1592         /* received any packets this loop? */
1593         if (rxq->rx_nb_avail)
1594                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1595
1596         return 0;
1597 }
1598
1599 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1600 uint16_t
1601 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1602                            uint16_t nb_pkts)
1603 {
1604         uint16_t nb_rx;
1605
1606         if (unlikely(nb_pkts == 0))
1607                 return 0;
1608
1609         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1610                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1611
1612         /* request is relatively large, chunk it up */
1613         nb_rx = 0;
1614         while (nb_pkts) {
1615                 uint16_t ret, n;
1616
1617                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1618                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1619                 nb_rx = (uint16_t)(nb_rx + ret);
1620                 nb_pkts = (uint16_t)(nb_pkts - ret);
1621                 if (ret < n)
1622                         break;
1623         }
1624
1625         return nb_rx;
1626 }
1627
1628 uint16_t
1629 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1630                 uint16_t nb_pkts)
1631 {
1632         struct ixgbe_rx_queue *rxq;
1633         volatile union ixgbe_adv_rx_desc *rx_ring;
1634         volatile union ixgbe_adv_rx_desc *rxdp;
1635         struct ixgbe_rx_entry *sw_ring;
1636         struct ixgbe_rx_entry *rxe;
1637         struct rte_mbuf *rxm;
1638         struct rte_mbuf *nmb;
1639         union ixgbe_adv_rx_desc rxd;
1640         uint64_t dma_addr;
1641         uint32_t staterr;
1642         uint32_t pkt_info;
1643         uint16_t pkt_len;
1644         uint16_t rx_id;
1645         uint16_t nb_rx;
1646         uint16_t nb_hold;
1647         uint64_t pkt_flags;
1648         uint64_t vlan_flags;
1649
1650         nb_rx = 0;
1651         nb_hold = 0;
1652         rxq = rx_queue;
1653         rx_id = rxq->rx_tail;
1654         rx_ring = rxq->rx_ring;
1655         sw_ring = rxq->sw_ring;
1656         vlan_flags = rxq->vlan_flags;
1657         while (nb_rx < nb_pkts) {
1658                 /*
1659                  * The order of operations here is important as the DD status
1660                  * bit must not be read after any other descriptor fields.
1661                  * rx_ring and rxdp are pointing to volatile data so the order
1662                  * of accesses cannot be reordered by the compiler. If they were
1663                  * not volatile, they could be reordered which could lead to
1664                  * using invalid descriptor fields when read from rxd.
1665                  */
1666                 rxdp = &rx_ring[rx_id];
1667                 staterr = rxdp->wb.upper.status_error;
1668                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1669                         break;
1670                 rxd = *rxdp;
1671
1672                 /*
1673                  * End of packet.
1674                  *
1675                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1676                  * is likely to be invalid and to be dropped by the various
1677                  * validation checks performed by the network stack.
1678                  *
1679                  * Allocate a new mbuf to replenish the RX ring descriptor.
1680                  * If the allocation fails:
1681                  *    - arrange for that RX descriptor to be the first one
1682                  *      being parsed the next time the receive function is
1683                  *      invoked [on the same queue].
1684                  *
1685                  *    - Stop parsing the RX ring and return immediately.
1686                  *
1687                  * This policy do not drop the packet received in the RX
1688                  * descriptor for which the allocation of a new mbuf failed.
1689                  * Thus, it allows that packet to be later retrieved if
1690                  * mbuf have been freed in the mean time.
1691                  * As a side effect, holding RX descriptors instead of
1692                  * systematically giving them back to the NIC may lead to
1693                  * RX ring exhaustion situations.
1694                  * However, the NIC can gracefully prevent such situations
1695                  * to happen by sending specific "back-pressure" flow control
1696                  * frames to its peer(s).
1697                  */
1698                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1699                            "ext_err_stat=0x%08x pkt_len=%u",
1700                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1701                            (unsigned) rx_id, (unsigned) staterr,
1702                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1703
1704                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1705                 if (nmb == NULL) {
1706                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1707                                    "queue_id=%u", (unsigned) rxq->port_id,
1708                                    (unsigned) rxq->queue_id);
1709                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1710                         break;
1711                 }
1712
1713                 nb_hold++;
1714                 rxe = &sw_ring[rx_id];
1715                 rx_id++;
1716                 if (rx_id == rxq->nb_rx_desc)
1717                         rx_id = 0;
1718
1719                 /* Prefetch next mbuf while processing current one. */
1720                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1721
1722                 /*
1723                  * When next RX descriptor is on a cache-line boundary,
1724                  * prefetch the next 4 RX descriptors and the next 8 pointers
1725                  * to mbufs.
1726                  */
1727                 if ((rx_id & 0x3) == 0) {
1728                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1729                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1730                 }
1731
1732                 rxm = rxe->mbuf;
1733                 rxe->mbuf = nmb;
1734                 dma_addr =
1735                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1736                 rxdp->read.hdr_addr = 0;
1737                 rxdp->read.pkt_addr = dma_addr;
1738
1739                 /*
1740                  * Initialize the returned mbuf.
1741                  * 1) setup generic mbuf fields:
1742                  *    - number of segments,
1743                  *    - next segment,
1744                  *    - packet length,
1745                  *    - RX port identifier.
1746                  * 2) integrate hardware offload data, if any:
1747                  *    - RSS flag & hash,
1748                  *    - IP checksum flag,
1749                  *    - VLAN TCI, if any,
1750                  *    - error flags.
1751                  */
1752                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1753                                       rxq->crc_len);
1754                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1755                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1756                 rxm->nb_segs = 1;
1757                 rxm->next = NULL;
1758                 rxm->pkt_len = pkt_len;
1759                 rxm->data_len = pkt_len;
1760                 rxm->port = rxq->port_id;
1761
1762                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1763                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1764                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1765
1766                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1767                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1768                 pkt_flags = pkt_flags |
1769                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1770                 rxm->ol_flags = pkt_flags;
1771                 rxm->packet_type =
1772                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1773                                                        rxq->pkt_type_mask);
1774
1775                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1776                         rxm->hash.rss = rte_le_to_cpu_32(
1777                                                 rxd.wb.lower.hi_dword.rss);
1778                 else if (pkt_flags & PKT_RX_FDIR) {
1779                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1780                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1781                                         IXGBE_ATR_HASH_MASK;
1782                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1783                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1784                 }
1785                 /*
1786                  * Store the mbuf address into the next entry of the array
1787                  * of returned packets.
1788                  */
1789                 rx_pkts[nb_rx++] = rxm;
1790         }
1791         rxq->rx_tail = rx_id;
1792
1793         /*
1794          * If the number of free RX descriptors is greater than the RX free
1795          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1796          * register.
1797          * Update the RDT with the value of the last processed RX descriptor
1798          * minus 1, to guarantee that the RDT register is never equal to the
1799          * RDH register, which creates a "full" ring situtation from the
1800          * hardware point of view...
1801          */
1802         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1803         if (nb_hold > rxq->rx_free_thresh) {
1804                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1805                            "nb_hold=%u nb_rx=%u",
1806                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1807                            (unsigned) rx_id, (unsigned) nb_hold,
1808                            (unsigned) nb_rx);
1809                 rx_id = (uint16_t) ((rx_id == 0) ?
1810                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1811                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1812                 nb_hold = 0;
1813         }
1814         rxq->nb_rx_hold = nb_hold;
1815         return nb_rx;
1816 }
1817
1818 /**
1819  * Detect an RSC descriptor.
1820  */
1821 static inline uint32_t
1822 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1823 {
1824         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1825                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1826 }
1827
1828 /**
1829  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1830  *
1831  * Fill the following info in the HEAD buffer of the Rx cluster:
1832  *    - RX port identifier
1833  *    - hardware offload data, if any:
1834  *      - RSS flag & hash
1835  *      - IP checksum flag
1836  *      - VLAN TCI, if any
1837  *      - error flags
1838  * @head HEAD of the packet cluster
1839  * @desc HW descriptor to get data from
1840  * @rxq Pointer to the Rx queue
1841  */
1842 static inline void
1843 ixgbe_fill_cluster_head_buf(
1844         struct rte_mbuf *head,
1845         union ixgbe_adv_rx_desc *desc,
1846         struct ixgbe_rx_queue *rxq,
1847         uint32_t staterr)
1848 {
1849         uint32_t pkt_info;
1850         uint64_t pkt_flags;
1851
1852         head->port = rxq->port_id;
1853
1854         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1855          * set in the pkt_flags field.
1856          */
1857         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1858         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1859         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1860         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1861         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1862         head->ol_flags = pkt_flags;
1863         head->packet_type =
1864                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1865
1866         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1867                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1868         else if (pkt_flags & PKT_RX_FDIR) {
1869                 head->hash.fdir.hash =
1870                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1871                                                           & IXGBE_ATR_HASH_MASK;
1872                 head->hash.fdir.id =
1873                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1874         }
1875 }
1876
1877 /**
1878  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1879  *
1880  * @rx_queue Rx queue handle
1881  * @rx_pkts table of received packets
1882  * @nb_pkts size of rx_pkts table
1883  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1884  *
1885  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1886  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1887  *
1888  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1889  * 1) When non-EOP RSC completion arrives:
1890  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1891  *       segment's data length.
1892  *    b) Set the "next" pointer of the current segment to point to the segment
1893  *       at the NEXTP index.
1894  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1895  *       in the sw_rsc_ring.
1896  * 2) When EOP arrives we just update the cluster's total length and offload
1897  *    flags and deliver the cluster up to the upper layers. In our case - put it
1898  *    in the rx_pkts table.
1899  *
1900  * Returns the number of received packets/clusters (according to the "bulk
1901  * receive" interface).
1902  */
1903 static inline uint16_t
1904 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1905                     bool bulk_alloc)
1906 {
1907         struct ixgbe_rx_queue *rxq = rx_queue;
1908         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1909         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1910         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1911         uint16_t rx_id = rxq->rx_tail;
1912         uint16_t nb_rx = 0;
1913         uint16_t nb_hold = rxq->nb_rx_hold;
1914         uint16_t prev_id = rxq->rx_tail;
1915
1916         while (nb_rx < nb_pkts) {
1917                 bool eop;
1918                 struct ixgbe_rx_entry *rxe;
1919                 struct ixgbe_scattered_rx_entry *sc_entry;
1920                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1921                 struct ixgbe_rx_entry *next_rxe = NULL;
1922                 struct rte_mbuf *first_seg;
1923                 struct rte_mbuf *rxm;
1924                 struct rte_mbuf *nmb;
1925                 union ixgbe_adv_rx_desc rxd;
1926                 uint16_t data_len;
1927                 uint16_t next_id;
1928                 volatile union ixgbe_adv_rx_desc *rxdp;
1929                 uint32_t staterr;
1930
1931 next_desc:
1932                 /*
1933                  * The code in this whole file uses the volatile pointer to
1934                  * ensure the read ordering of the status and the rest of the
1935                  * descriptor fields (on the compiler level only!!!). This is so
1936                  * UGLY - why not to just use the compiler barrier instead? DPDK
1937                  * even has the rte_compiler_barrier() for that.
1938                  *
1939                  * But most importantly this is just wrong because this doesn't
1940                  * ensure memory ordering in a general case at all. For
1941                  * instance, DPDK is supposed to work on Power CPUs where
1942                  * compiler barrier may just not be enough!
1943                  *
1944                  * I tried to write only this function properly to have a
1945                  * starting point (as a part of an LRO/RSC series) but the
1946                  * compiler cursed at me when I tried to cast away the
1947                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1948                  * keeping it the way it is for now.
1949                  *
1950                  * The code in this file is broken in so many other places and
1951                  * will just not work on a big endian CPU anyway therefore the
1952                  * lines below will have to be revisited together with the rest
1953                  * of the ixgbe PMD.
1954                  *
1955                  * TODO:
1956                  *    - Get rid of "volatile" crap and let the compiler do its
1957                  *      job.
1958                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1959                  *      memory ordering below.
1960                  */
1961                 rxdp = &rx_ring[rx_id];
1962                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1963
1964                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1965                         break;
1966
1967                 rxd = *rxdp;
1968
1969                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1970                                   "staterr=0x%x data_len=%u",
1971                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1972                            rte_le_to_cpu_16(rxd.wb.upper.length));
1973
1974                 if (!bulk_alloc) {
1975                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1976                         if (nmb == NULL) {
1977                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1978                                                   "port_id=%u queue_id=%u",
1979                                            rxq->port_id, rxq->queue_id);
1980
1981                                 rte_eth_devices[rxq->port_id].data->
1982                                                         rx_mbuf_alloc_failed++;
1983                                 break;
1984                         }
1985                 } else if (nb_hold > rxq->rx_free_thresh) {
1986                         uint16_t next_rdt = rxq->rx_free_trigger;
1987
1988                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1989                                 rte_wmb();
1990                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1991                                                     next_rdt);
1992                                 nb_hold -= rxq->rx_free_thresh;
1993                         } else {
1994                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1995                                                   "port_id=%u queue_id=%u",
1996                                            rxq->port_id, rxq->queue_id);
1997
1998                                 rte_eth_devices[rxq->port_id].data->
1999                                                         rx_mbuf_alloc_failed++;
2000                                 break;
2001                         }
2002                 }
2003
2004                 nb_hold++;
2005                 rxe = &sw_ring[rx_id];
2006                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2007
2008                 next_id = rx_id + 1;
2009                 if (next_id == rxq->nb_rx_desc)
2010                         next_id = 0;
2011
2012                 /* Prefetch next mbuf while processing current one. */
2013                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2014
2015                 /*
2016                  * When next RX descriptor is on a cache-line boundary,
2017                  * prefetch the next 4 RX descriptors and the next 4 pointers
2018                  * to mbufs.
2019                  */
2020                 if ((next_id & 0x3) == 0) {
2021                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2022                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2023                 }
2024
2025                 rxm = rxe->mbuf;
2026
2027                 if (!bulk_alloc) {
2028                         __le64 dma =
2029                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2030                         /*
2031                          * Update RX descriptor with the physical address of the
2032                          * new data buffer of the new allocated mbuf.
2033                          */
2034                         rxe->mbuf = nmb;
2035
2036                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2037                         rxdp->read.hdr_addr = 0;
2038                         rxdp->read.pkt_addr = dma;
2039                 } else
2040                         rxe->mbuf = NULL;
2041
2042                 /*
2043                  * Set data length & data buffer address of mbuf.
2044                  */
2045                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2046                 rxm->data_len = data_len;
2047
2048                 if (!eop) {
2049                         uint16_t nextp_id;
2050                         /*
2051                          * Get next descriptor index:
2052                          *  - For RSC it's in the NEXTP field.
2053                          *  - For a scattered packet - it's just a following
2054                          *    descriptor.
2055                          */
2056                         if (ixgbe_rsc_count(&rxd))
2057                                 nextp_id =
2058                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2059                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2060                         else
2061                                 nextp_id = next_id;
2062
2063                         next_sc_entry = &sw_sc_ring[nextp_id];
2064                         next_rxe = &sw_ring[nextp_id];
2065                         rte_ixgbe_prefetch(next_rxe);
2066                 }
2067
2068                 sc_entry = &sw_sc_ring[rx_id];
2069                 first_seg = sc_entry->fbuf;
2070                 sc_entry->fbuf = NULL;
2071
2072                 /*
2073                  * If this is the first buffer of the received packet,
2074                  * set the pointer to the first mbuf of the packet and
2075                  * initialize its context.
2076                  * Otherwise, update the total length and the number of segments
2077                  * of the current scattered packet, and update the pointer to
2078                  * the last mbuf of the current packet.
2079                  */
2080                 if (first_seg == NULL) {
2081                         first_seg = rxm;
2082                         first_seg->pkt_len = data_len;
2083                         first_seg->nb_segs = 1;
2084                 } else {
2085                         first_seg->pkt_len += data_len;
2086                         first_seg->nb_segs++;
2087                 }
2088
2089                 prev_id = rx_id;
2090                 rx_id = next_id;
2091
2092                 /*
2093                  * If this is not the last buffer of the received packet, update
2094                  * the pointer to the first mbuf at the NEXTP entry in the
2095                  * sw_sc_ring and continue to parse the RX ring.
2096                  */
2097                 if (!eop && next_rxe) {
2098                         rxm->next = next_rxe->mbuf;
2099                         next_sc_entry->fbuf = first_seg;
2100                         goto next_desc;
2101                 }
2102
2103                 /*
2104                  * This is the last buffer of the received packet - return
2105                  * the current cluster to the user.
2106                  */
2107                 rxm->next = NULL;
2108
2109                 /* Initialize the first mbuf of the returned packet */
2110                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2111
2112                 /*
2113                  * Deal with the case, when HW CRC srip is disabled.
2114                  * That can't happen when LRO is enabled, but still could
2115                  * happen for scattered RX mode.
2116                  */
2117                 first_seg->pkt_len -= rxq->crc_len;
2118                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2119                         struct rte_mbuf *lp;
2120
2121                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2122                                 ;
2123
2124                         first_seg->nb_segs--;
2125                         lp->data_len -= rxq->crc_len - rxm->data_len;
2126                         lp->next = NULL;
2127                         rte_pktmbuf_free_seg(rxm);
2128                 } else
2129                         rxm->data_len -= rxq->crc_len;
2130
2131                 /* Prefetch data of first segment, if configured to do so. */
2132                 rte_packet_prefetch((char *)first_seg->buf_addr +
2133                         first_seg->data_off);
2134
2135                 /*
2136                  * Store the mbuf address into the next entry of the array
2137                  * of returned packets.
2138                  */
2139                 rx_pkts[nb_rx++] = first_seg;
2140         }
2141
2142         /*
2143          * Record index of the next RX descriptor to probe.
2144          */
2145         rxq->rx_tail = rx_id;
2146
2147         /*
2148          * If the number of free RX descriptors is greater than the RX free
2149          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2150          * register.
2151          * Update the RDT with the value of the last processed RX descriptor
2152          * minus 1, to guarantee that the RDT register is never equal to the
2153          * RDH register, which creates a "full" ring situtation from the
2154          * hardware point of view...
2155          */
2156         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2157                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2158                            "nb_hold=%u nb_rx=%u",
2159                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2160
2161                 rte_wmb();
2162                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
2163                 nb_hold = 0;
2164         }
2165
2166         rxq->nb_rx_hold = nb_hold;
2167         return nb_rx;
2168 }
2169
2170 uint16_t
2171 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2172                                  uint16_t nb_pkts)
2173 {
2174         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2175 }
2176
2177 uint16_t
2178 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2179                                uint16_t nb_pkts)
2180 {
2181         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2182 }
2183
2184 /*********************************************************************
2185  *
2186  *  Queue management functions
2187  *
2188  **********************************************************************/
2189
2190 static void __attribute__((cold))
2191 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2192 {
2193         unsigned i;
2194
2195         if (txq->sw_ring != NULL) {
2196                 for (i = 0; i < txq->nb_tx_desc; i++) {
2197                         if (txq->sw_ring[i].mbuf != NULL) {
2198                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2199                                 txq->sw_ring[i].mbuf = NULL;
2200                         }
2201                 }
2202         }
2203 }
2204
2205 static void __attribute__((cold))
2206 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2207 {
2208         if (txq != NULL &&
2209             txq->sw_ring != NULL)
2210                 rte_free(txq->sw_ring);
2211 }
2212
2213 static void __attribute__((cold))
2214 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2215 {
2216         if (txq != NULL && txq->ops != NULL) {
2217                 txq->ops->release_mbufs(txq);
2218                 txq->ops->free_swring(txq);
2219                 rte_free(txq);
2220         }
2221 }
2222
2223 void __attribute__((cold))
2224 ixgbe_dev_tx_queue_release(void *txq)
2225 {
2226         ixgbe_tx_queue_release(txq);
2227 }
2228
2229 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2230 static void __attribute__((cold))
2231 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2232 {
2233         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2234         struct ixgbe_tx_entry *txe = txq->sw_ring;
2235         uint16_t prev, i;
2236
2237         /* Zero out HW ring memory */
2238         for (i = 0; i < txq->nb_tx_desc; i++) {
2239                 txq->tx_ring[i] = zeroed_desc;
2240         }
2241
2242         /* Initialize SW ring entries */
2243         prev = (uint16_t) (txq->nb_tx_desc - 1);
2244         for (i = 0; i < txq->nb_tx_desc; i++) {
2245                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2246
2247                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2248                 txe[i].mbuf = NULL;
2249                 txe[i].last_id = i;
2250                 txe[prev].next_id = i;
2251                 prev = i;
2252         }
2253
2254         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2255         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2256
2257         txq->tx_tail = 0;
2258         txq->nb_tx_used = 0;
2259         /*
2260          * Always allow 1 descriptor to be un-allocated to avoid
2261          * a H/W race condition
2262          */
2263         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2264         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2265         txq->ctx_curr = 0;
2266         memset((void *)&txq->ctx_cache, 0,
2267                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2268 }
2269
2270 static const struct ixgbe_txq_ops def_txq_ops = {
2271         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2272         .free_swring = ixgbe_tx_free_swring,
2273         .reset = ixgbe_reset_tx_queue,
2274 };
2275
2276 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2277  * the queue parameters. Used in tx_queue_setup by primary process and then
2278  * in dev_init by secondary process when attaching to an existing ethdev.
2279  */
2280 void __attribute__((cold))
2281 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2282 {
2283         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2284         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2285                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2286                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2287 #ifdef RTE_IXGBE_INC_VECTOR
2288                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2289                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2290                                         ixgbe_txq_vec_setup(txq) == 0)) {
2291                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2292                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2293                 } else
2294 #endif
2295                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2296         } else {
2297                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2298                 PMD_INIT_LOG(DEBUG,
2299                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2300                                 (unsigned long)txq->txq_flags,
2301                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2302                 PMD_INIT_LOG(DEBUG,
2303                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2304                                 (unsigned long)txq->tx_rs_thresh,
2305                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2306                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2307         }
2308 }
2309
2310 int __attribute__((cold))
2311 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2312                          uint16_t queue_idx,
2313                          uint16_t nb_desc,
2314                          unsigned int socket_id,
2315                          const struct rte_eth_txconf *tx_conf)
2316 {
2317         const struct rte_memzone *tz;
2318         struct ixgbe_tx_queue *txq;
2319         struct ixgbe_hw     *hw;
2320         uint16_t tx_rs_thresh, tx_free_thresh;
2321
2322         PMD_INIT_FUNC_TRACE();
2323         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2324
2325         /*
2326          * Validate number of transmit descriptors.
2327          * It must not exceed hardware maximum, and must be multiple
2328          * of IXGBE_ALIGN.
2329          */
2330         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2331                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2332                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2333                 return -EINVAL;
2334         }
2335
2336         /*
2337          * The following two parameters control the setting of the RS bit on
2338          * transmit descriptors.
2339          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2340          * descriptors have been used.
2341          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2342          * descriptors are used or if the number of descriptors required
2343          * to transmit a packet is greater than the number of free TX
2344          * descriptors.
2345          * The following constraints must be satisfied:
2346          *  tx_rs_thresh must be greater than 0.
2347          *  tx_rs_thresh must be less than the size of the ring minus 2.
2348          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2349          *  tx_rs_thresh must be a divisor of the ring size.
2350          *  tx_free_thresh must be greater than 0.
2351          *  tx_free_thresh must be less than the size of the ring minus 3.
2352          * One descriptor in the TX ring is used as a sentinel to avoid a
2353          * H/W race condition, hence the maximum threshold constraints.
2354          * When set to zero use default values.
2355          */
2356         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2357                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2358         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2359                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2360         if (tx_rs_thresh >= (nb_desc - 2)) {
2361                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2362                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2363                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2364                         (int)dev->data->port_id, (int)queue_idx);
2365                 return -(EINVAL);
2366         }
2367         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2368                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2369                         "(tx_rs_thresh=%u port=%d queue=%d)",
2370                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2371                         (int)dev->data->port_id, (int)queue_idx);
2372                 return -(EINVAL);
2373         }
2374         if (tx_free_thresh >= (nb_desc - 3)) {
2375                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2376                              "tx_free_thresh must be less than the number of "
2377                              "TX descriptors minus 3. (tx_free_thresh=%u "
2378                              "port=%d queue=%d)",
2379                              (unsigned int)tx_free_thresh,
2380                              (int)dev->data->port_id, (int)queue_idx);
2381                 return -(EINVAL);
2382         }
2383         if (tx_rs_thresh > tx_free_thresh) {
2384                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2385                              "tx_free_thresh. (tx_free_thresh=%u "
2386                              "tx_rs_thresh=%u port=%d queue=%d)",
2387                              (unsigned int)tx_free_thresh,
2388                              (unsigned int)tx_rs_thresh,
2389                              (int)dev->data->port_id,
2390                              (int)queue_idx);
2391                 return -(EINVAL);
2392         }
2393         if ((nb_desc % tx_rs_thresh) != 0) {
2394                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2395                              "number of TX descriptors. (tx_rs_thresh=%u "
2396                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2397                              (int)dev->data->port_id, (int)queue_idx);
2398                 return -(EINVAL);
2399         }
2400
2401         /*
2402          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2403          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2404          * by the NIC and all descriptors are written back after the NIC
2405          * accumulates WTHRESH descriptors.
2406          */
2407         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2408                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2409                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2410                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2411                              (int)dev->data->port_id, (int)queue_idx);
2412                 return -(EINVAL);
2413         }
2414
2415         /* Free memory prior to re-allocation if needed... */
2416         if (dev->data->tx_queues[queue_idx] != NULL) {
2417                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2418                 dev->data->tx_queues[queue_idx] = NULL;
2419         }
2420
2421         /* First allocate the tx queue data structure */
2422         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2423                                  RTE_CACHE_LINE_SIZE, socket_id);
2424         if (txq == NULL)
2425                 return -ENOMEM;
2426
2427         /*
2428          * Allocate TX ring hardware descriptors. A memzone large enough to
2429          * handle the maximum ring size is allocated in order to allow for
2430          * resizing in later calls to the queue setup function.
2431          */
2432         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2433                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2434                         IXGBE_ALIGN, socket_id);
2435         if (tz == NULL) {
2436                 ixgbe_tx_queue_release(txq);
2437                 return -ENOMEM;
2438         }
2439
2440         txq->nb_tx_desc = nb_desc;
2441         txq->tx_rs_thresh = tx_rs_thresh;
2442         txq->tx_free_thresh = tx_free_thresh;
2443         txq->pthresh = tx_conf->tx_thresh.pthresh;
2444         txq->hthresh = tx_conf->tx_thresh.hthresh;
2445         txq->wthresh = tx_conf->tx_thresh.wthresh;
2446         txq->queue_id = queue_idx;
2447         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2448                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2449         txq->port_id = dev->data->port_id;
2450         txq->txq_flags = tx_conf->txq_flags;
2451         txq->ops = &def_txq_ops;
2452         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2453
2454         /*
2455          * Modification to set VFTDT for virtual function if vf is detected
2456          */
2457         if (hw->mac.type == ixgbe_mac_82599_vf ||
2458             hw->mac.type == ixgbe_mac_X540_vf ||
2459             hw->mac.type == ixgbe_mac_X550_vf ||
2460             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2461             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2462                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2463         else
2464                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2465
2466         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2467         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2468
2469         /* Allocate software ring */
2470         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2471                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2472                                 RTE_CACHE_LINE_SIZE, socket_id);
2473         if (txq->sw_ring == NULL) {
2474                 ixgbe_tx_queue_release(txq);
2475                 return -ENOMEM;
2476         }
2477         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2478                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2479
2480         /* set up vector or scalar TX function as appropriate */
2481         ixgbe_set_tx_function(dev, txq);
2482
2483         txq->ops->reset(txq);
2484
2485         dev->data->tx_queues[queue_idx] = txq;
2486
2487
2488         return 0;
2489 }
2490
2491 /**
2492  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2493  *
2494  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2495  * in the sw_rsc_ring is not set to NULL but rather points to the next
2496  * mbuf of this RSC aggregation (that has not been completed yet and still
2497  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2498  * will just free first "nb_segs" segments of the cluster explicitly by calling
2499  * an rte_pktmbuf_free_seg().
2500  *
2501  * @m scattered cluster head
2502  */
2503 static void __attribute__((cold))
2504 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2505 {
2506         uint8_t i, nb_segs = m->nb_segs;
2507         struct rte_mbuf *next_seg;
2508
2509         for (i = 0; i < nb_segs; i++) {
2510                 next_seg = m->next;
2511                 rte_pktmbuf_free_seg(m);
2512                 m = next_seg;
2513         }
2514 }
2515
2516 static void __attribute__((cold))
2517 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2518 {
2519         unsigned i;
2520
2521 #ifdef RTE_IXGBE_INC_VECTOR
2522         /* SSE Vector driver has a different way of releasing mbufs. */
2523         if (rxq->rx_using_sse) {
2524                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2525                 return;
2526         }
2527 #endif
2528
2529         if (rxq->sw_ring != NULL) {
2530                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2531                         if (rxq->sw_ring[i].mbuf != NULL) {
2532                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2533                                 rxq->sw_ring[i].mbuf = NULL;
2534                         }
2535                 }
2536                 if (rxq->rx_nb_avail) {
2537                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2538                                 struct rte_mbuf *mb;
2539
2540                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2541                                 rte_pktmbuf_free_seg(mb);
2542                         }
2543                         rxq->rx_nb_avail = 0;
2544                 }
2545         }
2546
2547         if (rxq->sw_sc_ring)
2548                 for (i = 0; i < rxq->nb_rx_desc; i++)
2549                         if (rxq->sw_sc_ring[i].fbuf) {
2550                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2551                                 rxq->sw_sc_ring[i].fbuf = NULL;
2552                         }
2553 }
2554
2555 static void __attribute__((cold))
2556 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2557 {
2558         if (rxq != NULL) {
2559                 ixgbe_rx_queue_release_mbufs(rxq);
2560                 rte_free(rxq->sw_ring);
2561                 rte_free(rxq->sw_sc_ring);
2562                 rte_free(rxq);
2563         }
2564 }
2565
2566 void __attribute__((cold))
2567 ixgbe_dev_rx_queue_release(void *rxq)
2568 {
2569         ixgbe_rx_queue_release(rxq);
2570 }
2571
2572 /*
2573  * Check if Rx Burst Bulk Alloc function can be used.
2574  * Return
2575  *        0: the preconditions are satisfied and the bulk allocation function
2576  *           can be used.
2577  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2578  *           function must be used.
2579  */
2580 static inline int __attribute__((cold))
2581 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2582 {
2583         int ret = 0;
2584
2585         /*
2586          * Make sure the following pre-conditions are satisfied:
2587          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2588          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2589          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2590          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2591          * Scattered packets are not supported.  This should be checked
2592          * outside of this function.
2593          */
2594         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2595                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2596                              "rxq->rx_free_thresh=%d, "
2597                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2598                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2599                 ret = -EINVAL;
2600         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2601                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2602                              "rxq->rx_free_thresh=%d, "
2603                              "rxq->nb_rx_desc=%d",
2604                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2605                 ret = -EINVAL;
2606         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2607                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2608                              "rxq->nb_rx_desc=%d, "
2609                              "rxq->rx_free_thresh=%d",
2610                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2611                 ret = -EINVAL;
2612         } else if (!(rxq->nb_rx_desc <
2613                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2614                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2615                              "rxq->nb_rx_desc=%d, "
2616                              "IXGBE_MAX_RING_DESC=%d, "
2617                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2618                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2619                              RTE_PMD_IXGBE_RX_MAX_BURST);
2620                 ret = -EINVAL;
2621         }
2622
2623         return ret;
2624 }
2625
2626 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2627 static void __attribute__((cold))
2628 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2629 {
2630         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2631         unsigned i;
2632         uint16_t len = rxq->nb_rx_desc;
2633
2634         /*
2635          * By default, the Rx queue setup function allocates enough memory for
2636          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2637          * extra memory at the end of the descriptor ring to be zero'd out. A
2638          * pre-condition for using the Rx burst bulk alloc function is that the
2639          * number of descriptors is less than or equal to
2640          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2641          * constraints here to see if we need to zero out memory after the end
2642          * of the H/W descriptor ring.
2643          */
2644         if (adapter->rx_bulk_alloc_allowed)
2645                 /* zero out extra memory */
2646                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2647
2648         /*
2649          * Zero out HW ring memory. Zero out extra memory at the end of
2650          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2651          * reads extra memory as zeros.
2652          */
2653         for (i = 0; i < len; i++) {
2654                 rxq->rx_ring[i] = zeroed_desc;
2655         }
2656
2657         /*
2658          * initialize extra software ring entries. Space for these extra
2659          * entries is always allocated
2660          */
2661         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2662         for (i = rxq->nb_rx_desc; i < len; ++i) {
2663                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2664         }
2665
2666         rxq->rx_nb_avail = 0;
2667         rxq->rx_next_avail = 0;
2668         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2669         rxq->rx_tail = 0;
2670         rxq->nb_rx_hold = 0;
2671         rxq->pkt_first_seg = NULL;
2672         rxq->pkt_last_seg = NULL;
2673
2674 #ifdef RTE_IXGBE_INC_VECTOR
2675         rxq->rxrearm_start = 0;
2676         rxq->rxrearm_nb = 0;
2677 #endif
2678 }
2679
2680 int __attribute__((cold))
2681 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2682                          uint16_t queue_idx,
2683                          uint16_t nb_desc,
2684                          unsigned int socket_id,
2685                          const struct rte_eth_rxconf *rx_conf,
2686                          struct rte_mempool *mp)
2687 {
2688         const struct rte_memzone *rz;
2689         struct ixgbe_rx_queue *rxq;
2690         struct ixgbe_hw     *hw;
2691         uint16_t len;
2692         struct ixgbe_adapter *adapter =
2693                 (struct ixgbe_adapter *)dev->data->dev_private;
2694
2695         PMD_INIT_FUNC_TRACE();
2696         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2697
2698         /*
2699          * Validate number of receive descriptors.
2700          * It must not exceed hardware maximum, and must be multiple
2701          * of IXGBE_ALIGN.
2702          */
2703         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2704                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2705                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2706                 return -EINVAL;
2707         }
2708
2709         /* Free memory prior to re-allocation if needed... */
2710         if (dev->data->rx_queues[queue_idx] != NULL) {
2711                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2712                 dev->data->rx_queues[queue_idx] = NULL;
2713         }
2714
2715         /* First allocate the rx queue data structure */
2716         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2717                                  RTE_CACHE_LINE_SIZE, socket_id);
2718         if (rxq == NULL)
2719                 return -ENOMEM;
2720         rxq->mb_pool = mp;
2721         rxq->nb_rx_desc = nb_desc;
2722         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2723         rxq->queue_id = queue_idx;
2724         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2725                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2726         rxq->port_id = dev->data->port_id;
2727         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2728                                                         0 : ETHER_CRC_LEN);
2729         rxq->drop_en = rx_conf->rx_drop_en;
2730         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2731
2732         /*
2733          * The packet type in RX descriptor is different for different NICs.
2734          * Some bits are used for x550 but reserved for other NICS.
2735          * So set different masks for different NICs.
2736          */
2737         if (hw->mac.type == ixgbe_mac_X550 ||
2738             hw->mac.type == ixgbe_mac_X550EM_x ||
2739             hw->mac.type == ixgbe_mac_X550EM_a ||
2740             hw->mac.type == ixgbe_mac_X550_vf ||
2741             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2742             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2743                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2744         else
2745                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2746
2747         /*
2748          * Allocate RX ring hardware descriptors. A memzone large enough to
2749          * handle the maximum ring size is allocated in order to allow for
2750          * resizing in later calls to the queue setup function.
2751          */
2752         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2753                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2754         if (rz == NULL) {
2755                 ixgbe_rx_queue_release(rxq);
2756                 return -ENOMEM;
2757         }
2758
2759         /*
2760          * Zero init all the descriptors in the ring.
2761          */
2762         memset(rz->addr, 0, RX_RING_SZ);
2763
2764         /*
2765          * Modified to setup VFRDT for Virtual Function
2766          */
2767         if (hw->mac.type == ixgbe_mac_82599_vf ||
2768             hw->mac.type == ixgbe_mac_X540_vf ||
2769             hw->mac.type == ixgbe_mac_X550_vf ||
2770             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2771             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2772                 rxq->rdt_reg_addr =
2773                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2774                 rxq->rdh_reg_addr =
2775                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2776         } else {
2777                 rxq->rdt_reg_addr =
2778                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2779                 rxq->rdh_reg_addr =
2780                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2781         }
2782
2783         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2784         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2785
2786         /*
2787          * Certain constraints must be met in order to use the bulk buffer
2788          * allocation Rx burst function. If any of Rx queues doesn't meet them
2789          * the feature should be disabled for the whole port.
2790          */
2791         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2792                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2793                                     "preconditions - canceling the feature for "
2794                                     "the whole port[%d]",
2795                              rxq->queue_id, rxq->port_id);
2796                 adapter->rx_bulk_alloc_allowed = false;
2797         }
2798
2799         /*
2800          * Allocate software ring. Allow for space at the end of the
2801          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2802          * function does not access an invalid memory region.
2803          */
2804         len = nb_desc;
2805         if (adapter->rx_bulk_alloc_allowed)
2806                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2807
2808         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2809                                           sizeof(struct ixgbe_rx_entry) * len,
2810                                           RTE_CACHE_LINE_SIZE, socket_id);
2811         if (!rxq->sw_ring) {
2812                 ixgbe_rx_queue_release(rxq);
2813                 return -ENOMEM;
2814         }
2815
2816         /*
2817          * Always allocate even if it's not going to be needed in order to
2818          * simplify the code.
2819          *
2820          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2821          * be requested in ixgbe_dev_rx_init(), which is called later from
2822          * dev_start() flow.
2823          */
2824         rxq->sw_sc_ring =
2825                 rte_zmalloc_socket("rxq->sw_sc_ring",
2826                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2827                                    RTE_CACHE_LINE_SIZE, socket_id);
2828         if (!rxq->sw_sc_ring) {
2829                 ixgbe_rx_queue_release(rxq);
2830                 return -ENOMEM;
2831         }
2832
2833         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2834                             "dma_addr=0x%"PRIx64,
2835                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2836                      rxq->rx_ring_phys_addr);
2837
2838         if (!rte_is_power_of_2(nb_desc)) {
2839                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2840                                     "preconditions - canceling the feature for "
2841                                     "the whole port[%d]",
2842                              rxq->queue_id, rxq->port_id);
2843                 adapter->rx_vec_allowed = false;
2844         } else
2845                 ixgbe_rxq_vec_setup(rxq);
2846
2847         dev->data->rx_queues[queue_idx] = rxq;
2848
2849         ixgbe_reset_rx_queue(adapter, rxq);
2850
2851         return 0;
2852 }
2853
2854 uint32_t
2855 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2856 {
2857 #define IXGBE_RXQ_SCAN_INTERVAL 4
2858         volatile union ixgbe_adv_rx_desc *rxdp;
2859         struct ixgbe_rx_queue *rxq;
2860         uint32_t desc = 0;
2861
2862         if (rx_queue_id >= dev->data->nb_rx_queues) {
2863                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2864                 return 0;
2865         }
2866
2867         rxq = dev->data->rx_queues[rx_queue_id];
2868         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2869
2870         while ((desc < rxq->nb_rx_desc) &&
2871                 (rxdp->wb.upper.status_error &
2872                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2873                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2874                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2875                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2876                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2877                                 desc - rxq->nb_rx_desc]);
2878         }
2879
2880         return desc;
2881 }
2882
2883 int
2884 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2885 {
2886         volatile union ixgbe_adv_rx_desc *rxdp;
2887         struct ixgbe_rx_queue *rxq = rx_queue;
2888         uint32_t desc;
2889
2890         if (unlikely(offset >= rxq->nb_rx_desc))
2891                 return 0;
2892         desc = rxq->rx_tail + offset;
2893         if (desc >= rxq->nb_rx_desc)
2894                 desc -= rxq->nb_rx_desc;
2895
2896         rxdp = &rxq->rx_ring[desc];
2897         return !!(rxdp->wb.upper.status_error &
2898                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2899 }
2900
2901 void __attribute__((cold))
2902 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2903 {
2904         unsigned i;
2905         struct ixgbe_adapter *adapter =
2906                 (struct ixgbe_adapter *)dev->data->dev_private;
2907
2908         PMD_INIT_FUNC_TRACE();
2909
2910         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2911                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2912
2913                 if (txq != NULL) {
2914                         txq->ops->release_mbufs(txq);
2915                         txq->ops->reset(txq);
2916                 }
2917         }
2918
2919         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2920                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2921
2922                 if (rxq != NULL) {
2923                         ixgbe_rx_queue_release_mbufs(rxq);
2924                         ixgbe_reset_rx_queue(adapter, rxq);
2925                 }
2926         }
2927 }
2928
2929 void
2930 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2931 {
2932         unsigned i;
2933
2934         PMD_INIT_FUNC_TRACE();
2935
2936         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2937                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2938                 dev->data->rx_queues[i] = NULL;
2939         }
2940         dev->data->nb_rx_queues = 0;
2941
2942         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2943                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2944                 dev->data->tx_queues[i] = NULL;
2945         }
2946         dev->data->nb_tx_queues = 0;
2947 }
2948
2949 /*********************************************************************
2950  *
2951  *  Device RX/TX init functions
2952  *
2953  **********************************************************************/
2954
2955 /**
2956  * Receive Side Scaling (RSS)
2957  * See section 7.1.2.8 in the following document:
2958  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2959  *
2960  * Principles:
2961  * The source and destination IP addresses of the IP header and the source
2962  * and destination ports of TCP/UDP headers, if any, of received packets are
2963  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2964  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2965  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2966  * RSS output index which is used as the RX queue index where to store the
2967  * received packets.
2968  * The following output is supplied in the RX write-back descriptor:
2969  *     - 32-bit result of the Microsoft RSS hash function,
2970  *     - 4-bit RSS type field.
2971  */
2972
2973 /*
2974  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2975  * Used as the default key.
2976  */
2977 static uint8_t rss_intel_key[40] = {
2978         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2979         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2980         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2981         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2982         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2983 };
2984
2985 static void
2986 ixgbe_rss_disable(struct rte_eth_dev *dev)
2987 {
2988         struct ixgbe_hw *hw;
2989         uint32_t mrqc;
2990         uint32_t mrqc_reg;
2991
2992         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2993         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2994         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2995         mrqc &= ~IXGBE_MRQC_RSSEN;
2996         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2997 }
2998
2999 static void
3000 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3001 {
3002         uint8_t  *hash_key;
3003         uint32_t mrqc;
3004         uint32_t rss_key;
3005         uint64_t rss_hf;
3006         uint16_t i;
3007         uint32_t mrqc_reg;
3008         uint32_t rssrk_reg;
3009
3010         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3011         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3012
3013         hash_key = rss_conf->rss_key;
3014         if (hash_key != NULL) {
3015                 /* Fill in RSS hash key */
3016                 for (i = 0; i < 10; i++) {
3017                         rss_key  = hash_key[(i * 4)];
3018                         rss_key |= hash_key[(i * 4) + 1] << 8;
3019                         rss_key |= hash_key[(i * 4) + 2] << 16;
3020                         rss_key |= hash_key[(i * 4) + 3] << 24;
3021                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3022                 }
3023         }
3024
3025         /* Set configured hashing protocols in MRQC register */
3026         rss_hf = rss_conf->rss_hf;
3027         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3028         if (rss_hf & ETH_RSS_IPV4)
3029                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3030         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3031                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3032         if (rss_hf & ETH_RSS_IPV6)
3033                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3034         if (rss_hf & ETH_RSS_IPV6_EX)
3035                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3036         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3037                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3038         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3039                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3040         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3041                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3042         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3043                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3044         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3045                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3046         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3047 }
3048
3049 int
3050 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3051                           struct rte_eth_rss_conf *rss_conf)
3052 {
3053         struct ixgbe_hw *hw;
3054         uint32_t mrqc;
3055         uint64_t rss_hf;
3056         uint32_t mrqc_reg;
3057
3058         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3059
3060         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3061                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3062                         "NIC.");
3063                 return -ENOTSUP;
3064         }
3065         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3066
3067         /*
3068          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3069          *     "RSS enabling cannot be done dynamically while it must be
3070          *      preceded by a software reset"
3071          * Before changing anything, first check that the update RSS operation
3072          * does not attempt to disable RSS, if RSS was enabled at
3073          * initialization time, or does not attempt to enable RSS, if RSS was
3074          * disabled at initialization time.
3075          */
3076         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3077         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3078         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3079                 if (rss_hf != 0) /* Enable RSS */
3080                         return -(EINVAL);
3081                 return 0; /* Nothing to do */
3082         }
3083         /* RSS enabled */
3084         if (rss_hf == 0) /* Disable RSS */
3085                 return -(EINVAL);
3086         ixgbe_hw_rss_hash_set(hw, rss_conf);
3087         return 0;
3088 }
3089
3090 int
3091 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3092                             struct rte_eth_rss_conf *rss_conf)
3093 {
3094         struct ixgbe_hw *hw;
3095         uint8_t *hash_key;
3096         uint32_t mrqc;
3097         uint32_t rss_key;
3098         uint64_t rss_hf;
3099         uint16_t i;
3100         uint32_t mrqc_reg;
3101         uint32_t rssrk_reg;
3102
3103         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3104         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3105         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3106         hash_key = rss_conf->rss_key;
3107         if (hash_key != NULL) {
3108                 /* Return RSS hash key */
3109                 for (i = 0; i < 10; i++) {
3110                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3111                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3112                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3113                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3114                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3115                 }
3116         }
3117
3118         /* Get RSS functions configured in MRQC register */
3119         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3120         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3121                 rss_conf->rss_hf = 0;
3122                 return 0;
3123         }
3124         rss_hf = 0;
3125         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3126                 rss_hf |= ETH_RSS_IPV4;
3127         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3128                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3129         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3130                 rss_hf |= ETH_RSS_IPV6;
3131         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3132                 rss_hf |= ETH_RSS_IPV6_EX;
3133         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3134                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3135         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3136                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3137         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3138                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3139         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3140                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3141         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3142                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3143         rss_conf->rss_hf = rss_hf;
3144         return 0;
3145 }
3146
3147 static void
3148 ixgbe_rss_configure(struct rte_eth_dev *dev)
3149 {
3150         struct rte_eth_rss_conf rss_conf;
3151         struct ixgbe_hw *hw;
3152         uint32_t reta;
3153         uint16_t i;
3154         uint16_t j;
3155         uint16_t sp_reta_size;
3156         uint32_t reta_reg;
3157
3158         PMD_INIT_FUNC_TRACE();
3159         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3160
3161         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3162
3163         /*
3164          * Fill in redirection table
3165          * The byte-swap is needed because NIC registers are in
3166          * little-endian order.
3167          */
3168         reta = 0;
3169         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3170                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3171
3172                 if (j == dev->data->nb_rx_queues)
3173                         j = 0;
3174                 reta = (reta << 8) | j;
3175                 if ((i & 3) == 3)
3176                         IXGBE_WRITE_REG(hw, reta_reg,
3177                                         rte_bswap32(reta));
3178         }
3179
3180         /*
3181          * Configure the RSS key and the RSS protocols used to compute
3182          * the RSS hash of input packets.
3183          */
3184         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3185         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3186                 ixgbe_rss_disable(dev);
3187                 return;
3188         }
3189         if (rss_conf.rss_key == NULL)
3190                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3191         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3192 }
3193
3194 #define NUM_VFTA_REGISTERS 128
3195 #define NIC_RX_BUFFER_SIZE 0x200
3196 #define X550_RX_BUFFER_SIZE 0x180
3197
3198 static void
3199 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3200 {
3201         struct rte_eth_vmdq_dcb_conf *cfg;
3202         struct ixgbe_hw *hw;
3203         enum rte_eth_nb_pools num_pools;
3204         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3205         uint16_t pbsize;
3206         uint8_t nb_tcs; /* number of traffic classes */
3207         int i;
3208
3209         PMD_INIT_FUNC_TRACE();
3210         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3211         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3212         num_pools = cfg->nb_queue_pools;
3213         /* Check we have a valid number of pools */
3214         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3215                 ixgbe_rss_disable(dev);
3216                 return;
3217         }
3218         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3219         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3220
3221         /*
3222          * RXPBSIZE
3223          * split rx buffer up into sections, each for 1 traffic class
3224          */
3225         switch (hw->mac.type) {
3226         case ixgbe_mac_X550:
3227         case ixgbe_mac_X550EM_x:
3228         case ixgbe_mac_X550EM_a:
3229                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3230                 break;
3231         default:
3232                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3233                 break;
3234         }
3235         for (i = 0; i < nb_tcs; i++) {
3236                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3237
3238                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3239                 /* clear 10 bits. */
3240                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3241                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3242         }
3243         /* zero alloc all unused TCs */
3244         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3245                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3246
3247                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3248                 /* clear 10 bits. */
3249                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3250         }
3251
3252         /* MRQC: enable vmdq and dcb */
3253         mrqc = (num_pools == ETH_16_POOLS) ?
3254                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3255         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3256
3257         /* PFVTCTL: turn on virtualisation and set the default pool */
3258         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3259         if (cfg->enable_default_pool) {
3260                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3261         } else {
3262                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3263         }
3264
3265         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3266
3267         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3268         queue_mapping = 0;
3269         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3270                 /*
3271                  * mapping is done with 3 bits per priority,
3272                  * so shift by i*3 each time
3273                  */
3274                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3275
3276         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3277
3278         /* RTRPCS: DCB related */
3279         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3280
3281         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3282         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3283         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3284         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3285
3286         /* VFTA - enable all vlan filters */
3287         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3288                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3289         }
3290
3291         /* VFRE: pool enabling for receive - 16 or 32 */
3292         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3293                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3294
3295         /*
3296          * MPSAR - allow pools to read specific mac addresses
3297          * In this case, all pools should be able to read from mac addr 0
3298          */
3299         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3300         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3301
3302         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3303         for (i = 0; i < cfg->nb_pool_maps; i++) {
3304                 /* set vlan id in VF register and set the valid bit */
3305                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3306                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3307                 /*
3308                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3309                  * pools, we only need to use the first half of the register
3310                  * i.e. bits 0-31
3311                  */
3312                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3313         }
3314 }
3315
3316 /**
3317  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3318  * @dev: pointer to eth_dev structure
3319  * @dcb_config: pointer to ixgbe_dcb_config structure
3320  */
3321 static void
3322 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3323                        struct ixgbe_dcb_config *dcb_config)
3324 {
3325         uint32_t reg;
3326         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3327
3328         PMD_INIT_FUNC_TRACE();
3329         if (hw->mac.type != ixgbe_mac_82598EB) {
3330                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3331                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3332                 reg |= IXGBE_RTTDCS_ARBDIS;
3333                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3334
3335                 /* Enable DCB for Tx with 8 TCs */
3336                 if (dcb_config->num_tcs.pg_tcs == 8) {
3337                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3338                 } else {
3339                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3340                 }
3341                 if (dcb_config->vt_mode)
3342                         reg |= IXGBE_MTQC_VT_ENA;
3343                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3344
3345                 /* Enable the Tx desc arbiter */
3346                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3347                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3348                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3349
3350                 /* Enable Security TX Buffer IFG for DCB */
3351                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3352                 reg |= IXGBE_SECTX_DCB;
3353                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3354         }
3355 }
3356
3357 /**
3358  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3359  * @dev: pointer to rte_eth_dev structure
3360  * @dcb_config: pointer to ixgbe_dcb_config structure
3361  */
3362 static void
3363 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3364                         struct ixgbe_dcb_config *dcb_config)
3365 {
3366         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3367                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3368         struct ixgbe_hw *hw =
3369                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3370
3371         PMD_INIT_FUNC_TRACE();
3372         if (hw->mac.type != ixgbe_mac_82598EB)
3373                 /*PF VF Transmit Enable*/
3374                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3375                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3376
3377         /*Configure general DCB TX parameters*/
3378         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3379 }
3380
3381 static void
3382 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3383                         struct ixgbe_dcb_config *dcb_config)
3384 {
3385         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3386                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3387         struct ixgbe_dcb_tc_config *tc;
3388         uint8_t i, j;
3389
3390         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3391         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3392                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3393                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3394         } else {
3395                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3396                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3397         }
3398
3399         /* Initialize User Priority to Traffic Class mapping */
3400         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3401                 tc = &dcb_config->tc_config[j];
3402                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3403         }
3404
3405         /* User Priority to Traffic Class mapping */
3406         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3407                 j = vmdq_rx_conf->dcb_tc[i];
3408                 tc = &dcb_config->tc_config[j];
3409                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3410                                                 (uint8_t)(1 << i);
3411         }
3412 }
3413
3414 static void
3415 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3416                         struct ixgbe_dcb_config *dcb_config)
3417 {
3418         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3419                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3420         struct ixgbe_dcb_tc_config *tc;
3421         uint8_t i, j;
3422
3423         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3424         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3425                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3426                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3427         } else {
3428                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3429                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3430         }
3431
3432         /* Initialize User Priority to Traffic Class mapping */
3433         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3434                 tc = &dcb_config->tc_config[j];
3435                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3436         }
3437
3438         /* User Priority to Traffic Class mapping */
3439         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3440                 j = vmdq_tx_conf->dcb_tc[i];
3441                 tc = &dcb_config->tc_config[j];
3442                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3443                                                 (uint8_t)(1 << i);
3444         }
3445 }
3446
3447 static void
3448 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3449                 struct ixgbe_dcb_config *dcb_config)
3450 {
3451         struct rte_eth_dcb_rx_conf *rx_conf =
3452                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3453         struct ixgbe_dcb_tc_config *tc;
3454         uint8_t i, j;
3455
3456         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3457         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3458
3459         /* Initialize User Priority to Traffic Class mapping */
3460         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3461                 tc = &dcb_config->tc_config[j];
3462                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3463         }
3464
3465         /* User Priority to Traffic Class mapping */
3466         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3467                 j = rx_conf->dcb_tc[i];
3468                 tc = &dcb_config->tc_config[j];
3469                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3470                                                 (uint8_t)(1 << i);
3471         }
3472 }
3473
3474 static void
3475 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3476                 struct ixgbe_dcb_config *dcb_config)
3477 {
3478         struct rte_eth_dcb_tx_conf *tx_conf =
3479                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3480         struct ixgbe_dcb_tc_config *tc;
3481         uint8_t i, j;
3482
3483         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3484         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3485
3486         /* Initialize User Priority to Traffic Class mapping */
3487         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3488                 tc = &dcb_config->tc_config[j];
3489                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3490         }
3491
3492         /* User Priority to Traffic Class mapping */
3493         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3494                 j = tx_conf->dcb_tc[i];
3495                 tc = &dcb_config->tc_config[j];
3496                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3497                                                 (uint8_t)(1 << i);
3498         }
3499 }
3500
3501 /**
3502  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3503  * @dev: pointer to eth_dev structure
3504  * @dcb_config: pointer to ixgbe_dcb_config structure
3505  */
3506 static void
3507 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3508                        struct ixgbe_dcb_config *dcb_config)
3509 {
3510         uint32_t reg;
3511         uint32_t vlanctrl;
3512         uint8_t i;
3513         uint32_t q;
3514         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3515
3516         PMD_INIT_FUNC_TRACE();
3517         /*
3518          * Disable the arbiter before changing parameters
3519          * (always enable recycle mode; WSP)
3520          */
3521         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3522         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3523
3524         if (hw->mac.type != ixgbe_mac_82598EB) {
3525                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3526                 if (dcb_config->num_tcs.pg_tcs == 4) {
3527                         if (dcb_config->vt_mode)
3528                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3529                                         IXGBE_MRQC_VMDQRT4TCEN;
3530                         else {
3531                                 /* no matter the mode is DCB or DCB_RSS, just
3532                                  * set the MRQE to RSSXTCEN. RSS is controlled
3533                                  * by RSS_FIELD
3534                                  */
3535                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3536                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3537                                         IXGBE_MRQC_RTRSS4TCEN;
3538                         }
3539                 }
3540                 if (dcb_config->num_tcs.pg_tcs == 8) {
3541                         if (dcb_config->vt_mode)
3542                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3543                                         IXGBE_MRQC_VMDQRT8TCEN;
3544                         else {
3545                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3546                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3547                                         IXGBE_MRQC_RTRSS8TCEN;
3548                         }
3549                 }
3550
3551                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3552
3553                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3554                         /* Disable drop for all queues in VMDQ mode*/
3555                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3556                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3557                                                 (IXGBE_QDE_WRITE |
3558                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3559                 } else {
3560                         /* Enable drop for all queues in SRIOV mode */
3561                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3562                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3563                                                 (IXGBE_QDE_WRITE |
3564                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3565                                                  IXGBE_QDE_ENABLE));
3566                 }
3567         }
3568
3569         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3570         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3571         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3572         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3573
3574         /* VFTA - enable all vlan filters */
3575         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3576                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3577         }
3578
3579         /*
3580          * Configure Rx packet plane (recycle mode; WSP) and
3581          * enable arbiter
3582          */
3583         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3584         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3585 }
3586
3587 static void
3588 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3589                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3590 {
3591         switch (hw->mac.type) {
3592         case ixgbe_mac_82598EB:
3593                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3594                 break;
3595         case ixgbe_mac_82599EB:
3596         case ixgbe_mac_X540:
3597         case ixgbe_mac_X550:
3598         case ixgbe_mac_X550EM_x:
3599         case ixgbe_mac_X550EM_a:
3600                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3601                                                   tsa, map);
3602                 break;
3603         default:
3604                 break;
3605         }
3606 }
3607
3608 static void
3609 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3610                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3611 {
3612         switch (hw->mac.type) {
3613         case ixgbe_mac_82598EB:
3614                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3615                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3616                 break;
3617         case ixgbe_mac_82599EB:
3618         case ixgbe_mac_X540:
3619         case ixgbe_mac_X550:
3620         case ixgbe_mac_X550EM_x:
3621         case ixgbe_mac_X550EM_a:
3622                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3623                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3624                 break;
3625         default:
3626                 break;
3627         }
3628 }
3629
3630 #define DCB_RX_CONFIG  1
3631 #define DCB_TX_CONFIG  1
3632 #define DCB_TX_PB      1024
3633 /**
3634  * ixgbe_dcb_hw_configure - Enable DCB and configure
3635  * general DCB in VT mode and non-VT mode parameters
3636  * @dev: pointer to rte_eth_dev structure
3637  * @dcb_config: pointer to ixgbe_dcb_config structure
3638  */
3639 static int
3640 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3641                         struct ixgbe_dcb_config *dcb_config)
3642 {
3643         int     ret = 0;
3644         uint8_t i, pfc_en, nb_tcs;
3645         uint16_t pbsize, rx_buffer_size;
3646         uint8_t config_dcb_rx = 0;
3647         uint8_t config_dcb_tx = 0;
3648         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3649         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3650         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3651         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3652         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3653         struct ixgbe_dcb_tc_config *tc;
3654         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3655         struct ixgbe_hw *hw =
3656                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3657
3658         switch (dev->data->dev_conf.rxmode.mq_mode) {
3659         case ETH_MQ_RX_VMDQ_DCB:
3660                 dcb_config->vt_mode = true;
3661                 if (hw->mac.type != ixgbe_mac_82598EB) {
3662                         config_dcb_rx = DCB_RX_CONFIG;
3663                         /*
3664                          *get dcb and VT rx configuration parameters
3665                          *from rte_eth_conf
3666                          */
3667                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3668                         /*Configure general VMDQ and DCB RX parameters*/
3669                         ixgbe_vmdq_dcb_configure(dev);
3670                 }
3671                 break;
3672         case ETH_MQ_RX_DCB:
3673         case ETH_MQ_RX_DCB_RSS:
3674                 dcb_config->vt_mode = false;
3675                 config_dcb_rx = DCB_RX_CONFIG;
3676                 /* Get dcb TX configuration parameters from rte_eth_conf */
3677                 ixgbe_dcb_rx_config(dev, dcb_config);
3678                 /*Configure general DCB RX parameters*/
3679                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3680                 break;
3681         default:
3682                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3683                 break;
3684         }
3685         switch (dev->data->dev_conf.txmode.mq_mode) {
3686         case ETH_MQ_TX_VMDQ_DCB:
3687                 dcb_config->vt_mode = true;
3688                 config_dcb_tx = DCB_TX_CONFIG;
3689                 /* get DCB and VT TX configuration parameters
3690                  * from rte_eth_conf
3691                  */
3692                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3693                 /*Configure general VMDQ and DCB TX parameters*/
3694                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3695                 break;
3696
3697         case ETH_MQ_TX_DCB:
3698                 dcb_config->vt_mode = false;
3699                 config_dcb_tx = DCB_TX_CONFIG;
3700                 /*get DCB TX configuration parameters from rte_eth_conf*/
3701                 ixgbe_dcb_tx_config(dev, dcb_config);
3702                 /*Configure general DCB TX parameters*/
3703                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3704                 break;
3705         default:
3706                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3707                 break;
3708         }
3709
3710         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3711         /* Unpack map */
3712         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3713         if (nb_tcs == ETH_4_TCS) {
3714                 /* Avoid un-configured priority mapping to TC0 */
3715                 uint8_t j = 4;
3716                 uint8_t mask = 0xFF;
3717
3718                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3719                         mask = (uint8_t)(mask & (~(1 << map[i])));
3720                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3721                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3722                                 map[j++] = i;
3723                         mask >>= 1;
3724                 }
3725                 /* Re-configure 4 TCs BW */
3726                 for (i = 0; i < nb_tcs; i++) {
3727                         tc = &dcb_config->tc_config[i];
3728                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3729                                                 (uint8_t)(100 / nb_tcs);
3730                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3731                                                 (uint8_t)(100 / nb_tcs);
3732                 }
3733                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3734                         tc = &dcb_config->tc_config[i];
3735                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3736                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3737                 }
3738         } else {
3739                 /* Re-configure 8 TCs BW */
3740                 for (i = 0; i < nb_tcs; i++) {
3741                         tc = &dcb_config->tc_config[i];
3742                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3743                                 (uint8_t)(100 / nb_tcs + (i & 1));
3744                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3745                                 (uint8_t)(100 / nb_tcs + (i & 1));
3746                 }
3747         }
3748
3749         switch (hw->mac.type) {
3750         case ixgbe_mac_X550:
3751         case ixgbe_mac_X550EM_x:
3752         case ixgbe_mac_X550EM_a:
3753                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3754                 break;
3755         default:
3756                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3757                 break;
3758         }
3759
3760         if (config_dcb_rx) {
3761                 /* Set RX buffer size */
3762                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3763                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3764
3765                 for (i = 0; i < nb_tcs; i++) {
3766                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3767                 }
3768                 /* zero alloc all unused TCs */
3769                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3770                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3771                 }
3772         }
3773         if (config_dcb_tx) {
3774                 /* Only support an equally distributed
3775                  *  Tx packet buffer strategy.
3776                  */
3777                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3778                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3779
3780                 for (i = 0; i < nb_tcs; i++) {
3781                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3782                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3783                 }
3784                 /* Clear unused TCs, if any, to zero buffer size*/
3785                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3786                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3787                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3788                 }
3789         }
3790
3791         /*Calculates traffic class credits*/
3792         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3793                                 IXGBE_DCB_TX_CONFIG);
3794         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3795                                 IXGBE_DCB_RX_CONFIG);
3796
3797         if (config_dcb_rx) {
3798                 /* Unpack CEE standard containers */
3799                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3800                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3801                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3802                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3803                 /* Configure PG(ETS) RX */
3804                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3805         }
3806
3807         if (config_dcb_tx) {
3808                 /* Unpack CEE standard containers */
3809                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3810                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3811                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3812                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3813                 /* Configure PG(ETS) TX */
3814                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3815         }
3816
3817         /*Configure queue statistics registers*/
3818         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3819
3820         /* Check if the PFC is supported */
3821         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3822                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3823                 for (i = 0; i < nb_tcs; i++) {
3824                         /*
3825                         * If the TC count is 8,and the default high_water is 48,
3826                         * the low_water is 16 as default.
3827                         */
3828                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3829                         hw->fc.low_water[i] = pbsize / 4;
3830                         /* Enable pfc for this TC */
3831                         tc = &dcb_config->tc_config[i];
3832                         tc->pfc = ixgbe_dcb_pfc_enabled;
3833                 }
3834                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3835                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3836                         pfc_en &= 0x0F;
3837                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3838         }
3839
3840         return ret;
3841 }
3842
3843 /**
3844  * ixgbe_configure_dcb - Configure DCB  Hardware
3845  * @dev: pointer to rte_eth_dev
3846  */
3847 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3848 {
3849         struct ixgbe_dcb_config *dcb_cfg =
3850                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3851         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3852
3853         PMD_INIT_FUNC_TRACE();
3854
3855         /* check support mq_mode for DCB */
3856         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3857             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3858             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3859                 return;
3860
3861         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3862                 return;
3863
3864         /** Configure DCB hardware **/
3865         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3866 }
3867
3868 /*
3869  * VMDq only support for 10 GbE NIC.
3870  */
3871 static void
3872 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3873 {
3874         struct rte_eth_vmdq_rx_conf *cfg;
3875         struct ixgbe_hw *hw;
3876         enum rte_eth_nb_pools num_pools;
3877         uint32_t mrqc, vt_ctl, vlanctrl;
3878         uint32_t vmolr = 0;
3879         int i;
3880
3881         PMD_INIT_FUNC_TRACE();
3882         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3883         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3884         num_pools = cfg->nb_queue_pools;
3885
3886         ixgbe_rss_disable(dev);
3887
3888         /* MRQC: enable vmdq */
3889         mrqc = IXGBE_MRQC_VMDQEN;
3890         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3891
3892         /* PFVTCTL: turn on virtualisation and set the default pool */
3893         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3894         if (cfg->enable_default_pool)
3895                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3896         else
3897                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3898
3899         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3900
3901         for (i = 0; i < (int)num_pools; i++) {
3902                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3903                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3904         }
3905
3906         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3907         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3908         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3909         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3910
3911         /* VFTA - enable all vlan filters */
3912         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3913                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3914
3915         /* VFRE: pool enabling for receive - 64 */
3916         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3917         if (num_pools == ETH_64_POOLS)
3918                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3919
3920         /*
3921          * MPSAR - allow pools to read specific mac addresses
3922          * In this case, all pools should be able to read from mac addr 0
3923          */
3924         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3925         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3926
3927         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3928         for (i = 0; i < cfg->nb_pool_maps; i++) {
3929                 /* set vlan id in VF register and set the valid bit */
3930                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3931                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3932                 /*
3933                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3934                  * pools, we only need to use the first half of the register
3935                  * i.e. bits 0-31
3936                  */
3937                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3938                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3939                                         (cfg->pool_map[i].pools & UINT32_MAX));
3940                 else
3941                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3942                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3943
3944         }
3945
3946         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3947         if (cfg->enable_loop_back) {
3948                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3949                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3950                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3951         }
3952
3953         IXGBE_WRITE_FLUSH(hw);
3954 }
3955
3956 /*
3957  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3958  * @hw: pointer to hardware structure
3959  */
3960 static void
3961 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3962 {
3963         uint32_t reg;
3964         uint32_t q;
3965
3966         PMD_INIT_FUNC_TRACE();
3967         /*PF VF Transmit Enable*/
3968         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3969         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3970
3971         /* Disable the Tx desc arbiter so that MTQC can be changed */
3972         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3973         reg |= IXGBE_RTTDCS_ARBDIS;
3974         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3975
3976         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3977         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3978
3979         /* Disable drop for all queues */
3980         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3981                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3982                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3983
3984         /* Enable the Tx desc arbiter */
3985         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3986         reg &= ~IXGBE_RTTDCS_ARBDIS;
3987         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3988
3989         IXGBE_WRITE_FLUSH(hw);
3990 }
3991
3992 static int __attribute__((cold))
3993 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3994 {
3995         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3996         uint64_t dma_addr;
3997         unsigned int i;
3998
3999         /* Initialize software ring entries */
4000         for (i = 0; i < rxq->nb_rx_desc; i++) {
4001                 volatile union ixgbe_adv_rx_desc *rxd;
4002                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4003
4004                 if (mbuf == NULL) {
4005                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4006                                      (unsigned) rxq->queue_id);
4007                         return -ENOMEM;
4008                 }
4009
4010                 rte_mbuf_refcnt_set(mbuf, 1);
4011                 mbuf->next = NULL;
4012                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4013                 mbuf->nb_segs = 1;
4014                 mbuf->port = rxq->port_id;
4015
4016                 dma_addr =
4017                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4018                 rxd = &rxq->rx_ring[i];
4019                 rxd->read.hdr_addr = 0;
4020                 rxd->read.pkt_addr = dma_addr;
4021                 rxe[i].mbuf = mbuf;
4022         }
4023
4024         return 0;
4025 }
4026
4027 static int
4028 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4029 {
4030         struct ixgbe_hw *hw;
4031         uint32_t mrqc;
4032
4033         ixgbe_rss_configure(dev);
4034
4035         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4036
4037         /* MRQC: enable VF RSS */
4038         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4039         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4040         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4041         case ETH_64_POOLS:
4042                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4043                 break;
4044
4045         case ETH_32_POOLS:
4046                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4047                 break;
4048
4049         default:
4050                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4051                 return -EINVAL;
4052         }
4053
4054         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4055
4056         return 0;
4057 }
4058
4059 static int
4060 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4061 {
4062         struct ixgbe_hw *hw =
4063                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4064
4065         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4066         case ETH_64_POOLS:
4067                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4068                         IXGBE_MRQC_VMDQEN);
4069                 break;
4070
4071         case ETH_32_POOLS:
4072                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4073                         IXGBE_MRQC_VMDQRT4TCEN);
4074                 break;
4075
4076         case ETH_16_POOLS:
4077                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4078                         IXGBE_MRQC_VMDQRT8TCEN);
4079                 break;
4080         default:
4081                 PMD_INIT_LOG(ERR,
4082                         "invalid pool number in IOV mode");
4083                 break;
4084         }
4085         return 0;
4086 }
4087
4088 static int
4089 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4090 {
4091         struct ixgbe_hw *hw =
4092                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4093
4094         if (hw->mac.type == ixgbe_mac_82598EB)
4095                 return 0;
4096
4097         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4098                 /*
4099                  * SRIOV inactive scheme
4100                  * any DCB/RSS w/o VMDq multi-queue setting
4101                  */
4102                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4103                 case ETH_MQ_RX_RSS:
4104                 case ETH_MQ_RX_DCB_RSS:
4105                 case ETH_MQ_RX_VMDQ_RSS:
4106                         ixgbe_rss_configure(dev);
4107                         break;
4108
4109                 case ETH_MQ_RX_VMDQ_DCB:
4110                         ixgbe_vmdq_dcb_configure(dev);
4111                         break;
4112
4113                 case ETH_MQ_RX_VMDQ_ONLY:
4114                         ixgbe_vmdq_rx_hw_configure(dev);
4115                         break;
4116
4117                 case ETH_MQ_RX_NONE:
4118                 default:
4119                         /* if mq_mode is none, disable rss mode.*/
4120                         ixgbe_rss_disable(dev);
4121                         break;
4122                 }
4123         } else {
4124                 /* SRIOV active scheme
4125                  * Support RSS together with SRIOV.
4126                  */
4127                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4128                 case ETH_MQ_RX_RSS:
4129                 case ETH_MQ_RX_VMDQ_RSS:
4130                         ixgbe_config_vf_rss(dev);
4131                         break;
4132                 case ETH_MQ_RX_VMDQ_DCB:
4133                 case ETH_MQ_RX_DCB:
4134                 /* In SRIOV, the configuration is the same as VMDq case */
4135                         ixgbe_vmdq_dcb_configure(dev);
4136                         break;
4137                 /* DCB/RSS together with SRIOV is not supported */
4138                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4139                 case ETH_MQ_RX_DCB_RSS:
4140                         PMD_INIT_LOG(ERR,
4141                                 "Could not support DCB/RSS with VMDq & SRIOV");
4142                         return -1;
4143                 default:
4144                         ixgbe_config_vf_default(dev);
4145                         break;
4146                 }
4147         }
4148
4149         return 0;
4150 }
4151
4152 static int
4153 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4154 {
4155         struct ixgbe_hw *hw =
4156                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4157         uint32_t mtqc;
4158         uint32_t rttdcs;
4159
4160         if (hw->mac.type == ixgbe_mac_82598EB)
4161                 return 0;
4162
4163         /* disable arbiter before setting MTQC */
4164         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4165         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4166         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4167
4168         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4169                 /*
4170                  * SRIOV inactive scheme
4171                  * any DCB w/o VMDq multi-queue setting
4172                  */
4173                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4174                         ixgbe_vmdq_tx_hw_configure(hw);
4175                 else {
4176                         mtqc = IXGBE_MTQC_64Q_1PB;
4177                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4178                 }
4179         } else {
4180                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4181
4182                 /*
4183                  * SRIOV active scheme
4184                  * FIXME if support DCB together with VMDq & SRIOV
4185                  */
4186                 case ETH_64_POOLS:
4187                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4188                         break;
4189                 case ETH_32_POOLS:
4190                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4191                         break;
4192                 case ETH_16_POOLS:
4193                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4194                                 IXGBE_MTQC_8TC_8TQ;
4195                         break;
4196                 default:
4197                         mtqc = IXGBE_MTQC_64Q_1PB;
4198                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4199                 }
4200                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4201         }
4202
4203         /* re-enable arbiter */
4204         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4205         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4206
4207         return 0;
4208 }
4209
4210 /**
4211  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4212  *
4213  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4214  * spec rev. 3.0 chapter 8.2.3.8.13.
4215  *
4216  * @pool Memory pool of the Rx queue
4217  */
4218 static inline uint32_t
4219 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4220 {
4221         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4222
4223         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4224         uint16_t maxdesc =
4225                 IPV4_MAX_PKT_LEN /
4226                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4227
4228         if (maxdesc >= 16)
4229                 return IXGBE_RSCCTL_MAXDESC_16;
4230         else if (maxdesc >= 8)
4231                 return IXGBE_RSCCTL_MAXDESC_8;
4232         else if (maxdesc >= 4)
4233                 return IXGBE_RSCCTL_MAXDESC_4;
4234         else
4235                 return IXGBE_RSCCTL_MAXDESC_1;
4236 }
4237
4238 /**
4239  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4240  * interrupt
4241  *
4242  * (Taken from FreeBSD tree)
4243  * (yes this is all very magic and confusing :)
4244  *
4245  * @dev port handle
4246  * @entry the register array entry
4247  * @vector the MSIX vector for this queue
4248  * @type RX/TX/MISC
4249  */
4250 static void
4251 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4252 {
4253         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4254         u32 ivar, index;
4255
4256         vector |= IXGBE_IVAR_ALLOC_VAL;
4257
4258         switch (hw->mac.type) {
4259
4260         case ixgbe_mac_82598EB:
4261                 if (type == -1)
4262                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4263                 else
4264                         entry += (type * 64);
4265                 index = (entry >> 2) & 0x1F;
4266                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4267                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4268                 ivar |= (vector << (8 * (entry & 0x3)));
4269                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4270                 break;
4271
4272         case ixgbe_mac_82599EB:
4273         case ixgbe_mac_X540:
4274                 if (type == -1) { /* MISC IVAR */
4275                         index = (entry & 1) * 8;
4276                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4277                         ivar &= ~(0xFF << index);
4278                         ivar |= (vector << index);
4279                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4280                 } else {        /* RX/TX IVARS */
4281                         index = (16 * (entry & 1)) + (8 * type);
4282                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4283                         ivar &= ~(0xFF << index);
4284                         ivar |= (vector << index);
4285                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4286                 }
4287
4288                 break;
4289
4290         default:
4291                 break;
4292         }
4293 }
4294
4295 void __attribute__((cold))
4296 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4297 {
4298         uint16_t i, rx_using_sse;
4299         struct ixgbe_adapter *adapter =
4300                 (struct ixgbe_adapter *)dev->data->dev_private;
4301
4302         /*
4303          * In order to allow Vector Rx there are a few configuration
4304          * conditions to be met and Rx Bulk Allocation should be allowed.
4305          */
4306         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4307             !adapter->rx_bulk_alloc_allowed) {
4308                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4309                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4310                                     "not enabled",
4311                              dev->data->port_id);
4312
4313                 adapter->rx_vec_allowed = false;
4314         }
4315
4316         /*
4317          * Initialize the appropriate LRO callback.
4318          *
4319          * If all queues satisfy the bulk allocation preconditions
4320          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4321          * Otherwise use a single allocation version.
4322          */
4323         if (dev->data->lro) {
4324                 if (adapter->rx_bulk_alloc_allowed) {
4325                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4326                                            "allocation version");
4327                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4328                 } else {
4329                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4330                                            "allocation version");
4331                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4332                 }
4333         } else if (dev->data->scattered_rx) {
4334                 /*
4335                  * Set the non-LRO scattered callback: there are Vector and
4336                  * single allocation versions.
4337                  */
4338                 if (adapter->rx_vec_allowed) {
4339                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4340                                             "callback (port=%d).",
4341                                      dev->data->port_id);
4342
4343                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4344                 } else if (adapter->rx_bulk_alloc_allowed) {
4345                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4346                                            "allocation callback (port=%d).",
4347                                      dev->data->port_id);
4348                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4349                 } else {
4350                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4351                                             "single allocation) "
4352                                             "Scattered Rx callback "
4353                                             "(port=%d).",
4354                                      dev->data->port_id);
4355
4356                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4357                 }
4358         /*
4359          * Below we set "simple" callbacks according to port/queues parameters.
4360          * If parameters allow we are going to choose between the following
4361          * callbacks:
4362          *    - Vector
4363          *    - Bulk Allocation
4364          *    - Single buffer allocation (the simplest one)
4365          */
4366         } else if (adapter->rx_vec_allowed) {
4367                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4368                                     "burst size no less than %d (port=%d).",
4369                              RTE_IXGBE_DESCS_PER_LOOP,
4370                              dev->data->port_id);
4371
4372                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4373         } else if (adapter->rx_bulk_alloc_allowed) {
4374                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4375                                     "satisfied. Rx Burst Bulk Alloc function "
4376                                     "will be used on port=%d.",
4377                              dev->data->port_id);
4378
4379                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4380         } else {
4381                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4382                                     "satisfied, or Scattered Rx is requested "
4383                                     "(port=%d).",
4384                              dev->data->port_id);
4385
4386                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4387         }
4388
4389         /* Propagate information about RX function choice through all queues. */
4390
4391         rx_using_sse =
4392                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4393                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4394
4395         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4396                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4397
4398                 rxq->rx_using_sse = rx_using_sse;
4399         }
4400 }
4401
4402 /**
4403  * ixgbe_set_rsc - configure RSC related port HW registers
4404  *
4405  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4406  * of 82599 Spec (x540 configuration is virtually the same).
4407  *
4408  * @dev port handle
4409  *
4410  * Returns 0 in case of success or a non-zero error code
4411  */
4412 static int
4413 ixgbe_set_rsc(struct rte_eth_dev *dev)
4414 {
4415         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4416         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4417         struct rte_eth_dev_info dev_info = { 0 };
4418         bool rsc_capable = false;
4419         uint16_t i;
4420         uint32_t rdrxctl;
4421
4422         /* Sanity check */
4423         dev->dev_ops->dev_infos_get(dev, &dev_info);
4424         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4425                 rsc_capable = true;
4426
4427         if (!rsc_capable && rx_conf->enable_lro) {
4428                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4429                                    "support it");
4430                 return -EINVAL;
4431         }
4432
4433         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4434
4435         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4436                 /*
4437                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4438                  * 3.0 RSC configuration requires HW CRC stripping being
4439                  * enabled. If user requested both HW CRC stripping off
4440                  * and RSC on - return an error.
4441                  */
4442                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4443                                     "is disabled");
4444                 return -EINVAL;
4445         }
4446
4447         /* RFCTL configuration  */
4448         if (rsc_capable) {
4449                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4450
4451                 if (rx_conf->enable_lro)
4452                         /*
4453                          * Since NFS packets coalescing is not supported - clear
4454                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4455                          * enabled.
4456                          */
4457                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4458                                    IXGBE_RFCTL_NFSR_DIS);
4459                 else
4460                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4461
4462                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4463         }
4464
4465         /* If LRO hasn't been requested - we are done here. */
4466         if (!rx_conf->enable_lro)
4467                 return 0;
4468
4469         /* Set RDRXCTL.RSCACKC bit */
4470         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4471         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4472         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4473
4474         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4475         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4476                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4477                 uint32_t srrctl =
4478                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4479                 uint32_t rscctl =
4480                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4481                 uint32_t psrtype =
4482                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4483                 uint32_t eitr =
4484                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4485
4486                 /*
4487                  * ixgbe PMD doesn't support header-split at the moment.
4488                  *
4489                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4490                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4491                  * should be configured even if header split is not
4492                  * enabled. We will configure it 128 bytes following the
4493                  * recommendation in the spec.
4494                  */
4495                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4496                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4497                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4498
4499                 /*
4500                  * TODO: Consider setting the Receive Descriptor Minimum
4501                  * Threshold Size for an RSC case. This is not an obviously
4502                  * beneficiary option but the one worth considering...
4503                  */
4504
4505                 rscctl |= IXGBE_RSCCTL_RSCEN;
4506                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4507                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4508
4509                 /*
4510                  * RSC: Set ITR interval corresponding to 2K ints/s.
4511                  *
4512                  * Full-sized RSC aggregations for a 10Gb/s link will
4513                  * arrive at about 20K aggregation/s rate.
4514                  *
4515                  * 2K inst/s rate will make only 10% of the
4516                  * aggregations to be closed due to the interrupt timer
4517                  * expiration for a streaming at wire-speed case.
4518                  *
4519                  * For a sparse streaming case this setting will yield
4520                  * at most 500us latency for a single RSC aggregation.
4521                  */
4522                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4523                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4524
4525                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4526                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4527                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4528                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4529
4530                 /*
4531                  * RSC requires the mapping of the queue to the
4532                  * interrupt vector.
4533                  */
4534                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4535         }
4536
4537         dev->data->lro = 1;
4538
4539         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4540
4541         return 0;
4542 }
4543
4544 /*
4545  * Initializes Receive Unit.
4546  */
4547 int __attribute__((cold))
4548 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4549 {
4550         struct ixgbe_hw     *hw;
4551         struct ixgbe_rx_queue *rxq;
4552         uint64_t bus_addr;
4553         uint32_t rxctrl;
4554         uint32_t fctrl;
4555         uint32_t hlreg0;
4556         uint32_t maxfrs;
4557         uint32_t srrctl;
4558         uint32_t rdrxctl;
4559         uint32_t rxcsum;
4560         uint16_t buf_size;
4561         uint16_t i;
4562         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4563         int rc;
4564
4565         PMD_INIT_FUNC_TRACE();
4566         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4567
4568         /*
4569          * Make sure receives are disabled while setting
4570          * up the RX context (registers, descriptor rings, etc.).
4571          */
4572         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4573         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4574
4575         /* Enable receipt of broadcasted frames */
4576         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4577         fctrl |= IXGBE_FCTRL_BAM;
4578         fctrl |= IXGBE_FCTRL_DPF;
4579         fctrl |= IXGBE_FCTRL_PMCF;
4580         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4581
4582         /*
4583          * Configure CRC stripping, if any.
4584          */
4585         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4586         if (rx_conf->hw_strip_crc)
4587                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4588         else
4589                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4590
4591         /*
4592          * Configure jumbo frame support, if any.
4593          */
4594         if (rx_conf->jumbo_frame == 1) {
4595                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4596                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4597                 maxfrs &= 0x0000FFFF;
4598                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4599                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4600         } else
4601                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4602
4603         /*
4604          * If loopback mode is configured for 82599, set LPBK bit.
4605          */
4606         if (hw->mac.type == ixgbe_mac_82599EB &&
4607                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4608                 hlreg0 |= IXGBE_HLREG0_LPBK;
4609         else
4610                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4611
4612         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4613
4614         /* Setup RX queues */
4615         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4616                 rxq = dev->data->rx_queues[i];
4617
4618                 /*
4619                  * Reset crc_len in case it was changed after queue setup by a
4620                  * call to configure.
4621                  */
4622                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4623
4624                 /* Setup the Base and Length of the Rx Descriptor Rings */
4625                 bus_addr = rxq->rx_ring_phys_addr;
4626                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4627                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4628                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4629                                 (uint32_t)(bus_addr >> 32));
4630                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4631                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4632                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4633                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4634
4635                 /* Configure the SRRCTL register */
4636 #ifdef RTE_HEADER_SPLIT_ENABLE
4637                 /*
4638                  * Configure Header Split
4639                  */
4640                 if (rx_conf->header_split) {
4641                         if (hw->mac.type == ixgbe_mac_82599EB) {
4642                                 /* Must setup the PSRTYPE register */
4643                                 uint32_t psrtype;
4644
4645                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4646                                         IXGBE_PSRTYPE_UDPHDR   |
4647                                         IXGBE_PSRTYPE_IPV4HDR  |
4648                                         IXGBE_PSRTYPE_IPV6HDR;
4649                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4650                         }
4651                         srrctl = ((rx_conf->split_hdr_size <<
4652                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4653                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4654                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4655                 } else
4656 #endif
4657                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4658
4659                 /* Set if packets are dropped when no descriptors available */
4660                 if (rxq->drop_en)
4661                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4662
4663                 /*
4664                  * Configure the RX buffer size in the BSIZEPACKET field of
4665                  * the SRRCTL register of the queue.
4666                  * The value is in 1 KB resolution. Valid values can be from
4667                  * 1 KB to 16 KB.
4668                  */
4669                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4670                         RTE_PKTMBUF_HEADROOM);
4671                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4672                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4673
4674                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4675
4676                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4677                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4678
4679                 /* It adds dual VLAN length for supporting dual VLAN */
4680                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4681                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4682                         dev->data->scattered_rx = 1;
4683         }
4684
4685         if (rx_conf->enable_scatter)
4686                 dev->data->scattered_rx = 1;
4687
4688         /*
4689          * Device configured with multiple RX queues.
4690          */
4691         ixgbe_dev_mq_rx_configure(dev);
4692
4693         /*
4694          * Setup the Checksum Register.
4695          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4696          * Enable IP/L4 checkum computation by hardware if requested to do so.
4697          */
4698         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4699         rxcsum |= IXGBE_RXCSUM_PCSD;
4700         if (rx_conf->hw_ip_checksum)
4701                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4702         else
4703                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4704
4705         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4706
4707         if (hw->mac.type == ixgbe_mac_82599EB ||
4708             hw->mac.type == ixgbe_mac_X540) {
4709                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4710                 if (rx_conf->hw_strip_crc)
4711                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4712                 else
4713                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4714                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4715                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4716         }
4717
4718         rc = ixgbe_set_rsc(dev);
4719         if (rc)
4720                 return rc;
4721
4722         ixgbe_set_rx_function(dev);
4723
4724         return 0;
4725 }
4726
4727 /*
4728  * Initializes Transmit Unit.
4729  */
4730 void __attribute__((cold))
4731 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4732 {
4733         struct ixgbe_hw     *hw;
4734         struct ixgbe_tx_queue *txq;
4735         uint64_t bus_addr;
4736         uint32_t hlreg0;
4737         uint32_t txctrl;
4738         uint16_t i;
4739
4740         PMD_INIT_FUNC_TRACE();
4741         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4742
4743         /* Enable TX CRC (checksum offload requirement) and hw padding
4744          * (TSO requirement)
4745          */
4746         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4747         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4748         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4749
4750         /* Setup the Base and Length of the Tx Descriptor Rings */
4751         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4752                 txq = dev->data->tx_queues[i];
4753
4754                 bus_addr = txq->tx_ring_phys_addr;
4755                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4756                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4757                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4758                                 (uint32_t)(bus_addr >> 32));
4759                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4760                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4761                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4762                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4763                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4764
4765                 /*
4766                  * Disable Tx Head Writeback RO bit, since this hoses
4767                  * bookkeeping if things aren't delivered in order.
4768                  */
4769                 switch (hw->mac.type) {
4770                 case ixgbe_mac_82598EB:
4771                         txctrl = IXGBE_READ_REG(hw,
4772                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4773                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4774                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4775                                         txctrl);
4776                         break;
4777
4778                 case ixgbe_mac_82599EB:
4779                 case ixgbe_mac_X540:
4780                 case ixgbe_mac_X550:
4781                 case ixgbe_mac_X550EM_x:
4782                 case ixgbe_mac_X550EM_a:
4783                 default:
4784                         txctrl = IXGBE_READ_REG(hw,
4785                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4786                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4787                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4788                                         txctrl);
4789                         break;
4790                 }
4791         }
4792
4793         /* Device configured with multiple TX queues. */
4794         ixgbe_dev_mq_tx_configure(dev);
4795 }
4796
4797 /*
4798  * Set up link for 82599 loopback mode Tx->Rx.
4799  */
4800 static inline void __attribute__((cold))
4801 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4802 {
4803         PMD_INIT_FUNC_TRACE();
4804
4805         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4806                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4807                                 IXGBE_SUCCESS) {
4808                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4809                         /* ignore error */
4810                         return;
4811                 }
4812         }
4813
4814         /* Restart link */
4815         IXGBE_WRITE_REG(hw,
4816                         IXGBE_AUTOC,
4817                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4818         ixgbe_reset_pipeline_82599(hw);
4819
4820         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4821         msec_delay(50);
4822 }
4823
4824
4825 /*
4826  * Start Transmit and Receive Units.
4827  */
4828 int __attribute__((cold))
4829 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4830 {
4831         struct ixgbe_hw     *hw;
4832         struct ixgbe_tx_queue *txq;
4833         struct ixgbe_rx_queue *rxq;
4834         uint32_t txdctl;
4835         uint32_t dmatxctl;
4836         uint32_t rxctrl;
4837         uint16_t i;
4838         int ret = 0;
4839
4840         PMD_INIT_FUNC_TRACE();
4841         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4842
4843         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4844                 txq = dev->data->tx_queues[i];
4845                 /* Setup Transmit Threshold Registers */
4846                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4847                 txdctl |= txq->pthresh & 0x7F;
4848                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4849                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4850                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4851         }
4852
4853         if (hw->mac.type != ixgbe_mac_82598EB) {
4854                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4855                 dmatxctl |= IXGBE_DMATXCTL_TE;
4856                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4857         }
4858
4859         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4860                 txq = dev->data->tx_queues[i];
4861                 if (!txq->tx_deferred_start) {
4862                         ret = ixgbe_dev_tx_queue_start(dev, i);
4863                         if (ret < 0)
4864                                 return ret;
4865                 }
4866         }
4867
4868         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4869                 rxq = dev->data->rx_queues[i];
4870                 if (!rxq->rx_deferred_start) {
4871                         ret = ixgbe_dev_rx_queue_start(dev, i);
4872                         if (ret < 0)
4873                                 return ret;
4874                 }
4875         }
4876
4877         /* Enable Receive engine */
4878         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4879         if (hw->mac.type == ixgbe_mac_82598EB)
4880                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4881         rxctrl |= IXGBE_RXCTRL_RXEN;
4882         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4883
4884         /* If loopback mode is enabled for 82599, set up the link accordingly */
4885         if (hw->mac.type == ixgbe_mac_82599EB &&
4886                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4887                 ixgbe_setup_loopback_link_82599(hw);
4888
4889         return 0;
4890 }
4891
4892 /*
4893  * Start Receive Units for specified queue.
4894  */
4895 int __attribute__((cold))
4896 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4897 {
4898         struct ixgbe_hw     *hw;
4899         struct ixgbe_rx_queue *rxq;
4900         uint32_t rxdctl;
4901         int poll_ms;
4902
4903         PMD_INIT_FUNC_TRACE();
4904         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4905
4906         if (rx_queue_id < dev->data->nb_rx_queues) {
4907                 rxq = dev->data->rx_queues[rx_queue_id];
4908
4909                 /* Allocate buffers for descriptor rings */
4910                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4911                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4912                                      rx_queue_id);
4913                         return -1;
4914                 }
4915                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4916                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4917                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4918
4919                 /* Wait until RX Enable ready */
4920                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4921                 do {
4922                         rte_delay_ms(1);
4923                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4924                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4925                 if (!poll_ms)
4926                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4927                                      rx_queue_id);
4928                 rte_wmb();
4929                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4930                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4931                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4932         } else
4933                 return -1;
4934
4935         return 0;
4936 }
4937
4938 /*
4939  * Stop Receive Units for specified queue.
4940  */
4941 int __attribute__((cold))
4942 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4943 {
4944         struct ixgbe_hw     *hw;
4945         struct ixgbe_adapter *adapter =
4946                 (struct ixgbe_adapter *)dev->data->dev_private;
4947         struct ixgbe_rx_queue *rxq;
4948         uint32_t rxdctl;
4949         int poll_ms;
4950
4951         PMD_INIT_FUNC_TRACE();
4952         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4953
4954         if (rx_queue_id < dev->data->nb_rx_queues) {
4955                 rxq = dev->data->rx_queues[rx_queue_id];
4956
4957                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4958                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4959                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4960
4961                 /* Wait until RX Enable bit clear */
4962                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4963                 do {
4964                         rte_delay_ms(1);
4965                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4966                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4967                 if (!poll_ms)
4968                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4969                                      rx_queue_id);
4970
4971                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4972
4973                 ixgbe_rx_queue_release_mbufs(rxq);
4974                 ixgbe_reset_rx_queue(adapter, rxq);
4975                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4976         } else
4977                 return -1;
4978
4979         return 0;
4980 }
4981
4982
4983 /*
4984  * Start Transmit Units for specified queue.
4985  */
4986 int __attribute__((cold))
4987 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4988 {
4989         struct ixgbe_hw     *hw;
4990         struct ixgbe_tx_queue *txq;
4991         uint32_t txdctl;
4992         int poll_ms;
4993
4994         PMD_INIT_FUNC_TRACE();
4995         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4996
4997         if (tx_queue_id < dev->data->nb_tx_queues) {
4998                 txq = dev->data->tx_queues[tx_queue_id];
4999                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5000                 txdctl |= IXGBE_TXDCTL_ENABLE;
5001                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5002
5003                 /* Wait until TX Enable ready */
5004                 if (hw->mac.type == ixgbe_mac_82599EB) {
5005                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5006                         do {
5007                                 rte_delay_ms(1);
5008                                 txdctl = IXGBE_READ_REG(hw,
5009                                         IXGBE_TXDCTL(txq->reg_idx));
5010                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5011                         if (!poll_ms)
5012                                 PMD_INIT_LOG(ERR, "Could not enable "
5013                                              "Tx Queue %d", tx_queue_id);
5014                 }
5015                 rte_wmb();
5016                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5017                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5018                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5019         } else
5020                 return -1;
5021
5022         return 0;
5023 }
5024
5025 /*
5026  * Stop Transmit Units for specified queue.
5027  */
5028 int __attribute__((cold))
5029 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5030 {
5031         struct ixgbe_hw     *hw;
5032         struct ixgbe_tx_queue *txq;
5033         uint32_t txdctl;
5034         uint32_t txtdh, txtdt;
5035         int poll_ms;
5036
5037         PMD_INIT_FUNC_TRACE();
5038         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5039
5040         if (tx_queue_id >= dev->data->nb_tx_queues)
5041                 return -1;
5042
5043         txq = dev->data->tx_queues[tx_queue_id];
5044
5045         /* Wait until TX queue is empty */
5046         if (hw->mac.type == ixgbe_mac_82599EB) {
5047                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5048                 do {
5049                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5050                         txtdh = IXGBE_READ_REG(hw,
5051                                                IXGBE_TDH(txq->reg_idx));
5052                         txtdt = IXGBE_READ_REG(hw,
5053                                                IXGBE_TDT(txq->reg_idx));
5054                 } while (--poll_ms && (txtdh != txtdt));
5055                 if (!poll_ms)
5056                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5057                                      "when stopping.", tx_queue_id);
5058         }
5059
5060         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5061         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5062         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5063
5064         /* Wait until TX Enable bit clear */
5065         if (hw->mac.type == ixgbe_mac_82599EB) {
5066                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5067                 do {
5068                         rte_delay_ms(1);
5069                         txdctl = IXGBE_READ_REG(hw,
5070                                                 IXGBE_TXDCTL(txq->reg_idx));
5071                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5072                 if (!poll_ms)
5073                         PMD_INIT_LOG(ERR, "Could not disable "
5074                                      "Tx Queue %d", tx_queue_id);
5075         }
5076
5077         if (txq->ops != NULL) {
5078                 txq->ops->release_mbufs(txq);
5079                 txq->ops->reset(txq);
5080         }
5081         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5082
5083         return 0;
5084 }
5085
5086 void
5087 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5088         struct rte_eth_rxq_info *qinfo)
5089 {
5090         struct ixgbe_rx_queue *rxq;
5091
5092         rxq = dev->data->rx_queues[queue_id];
5093
5094         qinfo->mp = rxq->mb_pool;
5095         qinfo->scattered_rx = dev->data->scattered_rx;
5096         qinfo->nb_desc = rxq->nb_rx_desc;
5097
5098         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5099         qinfo->conf.rx_drop_en = rxq->drop_en;
5100         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5101 }
5102
5103 void
5104 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5105         struct rte_eth_txq_info *qinfo)
5106 {
5107         struct ixgbe_tx_queue *txq;
5108
5109         txq = dev->data->tx_queues[queue_id];
5110
5111         qinfo->nb_desc = txq->nb_tx_desc;
5112
5113         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5114         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5115         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5116
5117         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5118         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5119         qinfo->conf.txq_flags = txq->txq_flags;
5120         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5121 }
5122
5123 /*
5124  * [VF] Initializes Receive Unit.
5125  */
5126 int __attribute__((cold))
5127 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5128 {
5129         struct ixgbe_hw     *hw;
5130         struct ixgbe_rx_queue *rxq;
5131         uint64_t bus_addr;
5132         uint32_t srrctl, psrtype = 0;
5133         uint16_t buf_size;
5134         uint16_t i;
5135         int ret;
5136
5137         PMD_INIT_FUNC_TRACE();
5138         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5139
5140         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5141                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5142                         "it should be power of 2");
5143                 return -1;
5144         }
5145
5146         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5147                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5148                         "it should be equal to or less than %d",
5149                         hw->mac.max_rx_queues);
5150                 return -1;
5151         }
5152
5153         /*
5154          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5155          * disables the VF receipt of packets if the PF MTU is > 1500.
5156          * This is done to deal with 82599 limitations that imposes
5157          * the PF and all VFs to share the same MTU.
5158          * Then, the PF driver enables again the VF receipt of packet when
5159          * the VF driver issues a IXGBE_VF_SET_LPE request.
5160          * In the meantime, the VF device cannot be used, even if the VF driver
5161          * and the Guest VM network stack are ready to accept packets with a
5162          * size up to the PF MTU.
5163          * As a work-around to this PF behaviour, force the call to
5164          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5165          * VF packets received can work in all cases.
5166          */
5167         ixgbevf_rlpml_set_vf(hw,
5168                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5169
5170         /* Setup RX queues */
5171         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5172                 rxq = dev->data->rx_queues[i];
5173
5174                 /* Allocate buffers for descriptor rings */
5175                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5176                 if (ret)
5177                         return ret;
5178
5179                 /* Setup the Base and Length of the Rx Descriptor Rings */
5180                 bus_addr = rxq->rx_ring_phys_addr;
5181
5182                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5183                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5184                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5185                                 (uint32_t)(bus_addr >> 32));
5186                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5187                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5188                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5189                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5190
5191
5192                 /* Configure the SRRCTL register */
5193 #ifdef RTE_HEADER_SPLIT_ENABLE
5194                 /*
5195                  * Configure Header Split
5196                  */
5197                 if (dev->data->dev_conf.rxmode.header_split) {
5198                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5199                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5200                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5201                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5202                 } else
5203 #endif
5204                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5205
5206                 /* Set if packets are dropped when no descriptors available */
5207                 if (rxq->drop_en)
5208                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5209
5210                 /*
5211                  * Configure the RX buffer size in the BSIZEPACKET field of
5212                  * the SRRCTL register of the queue.
5213                  * The value is in 1 KB resolution. Valid values can be from
5214                  * 1 KB to 16 KB.
5215                  */
5216                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5217                         RTE_PKTMBUF_HEADROOM);
5218                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5219                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5220
5221                 /*
5222                  * VF modification to write virtual function SRRCTL register
5223                  */
5224                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5225
5226                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5227                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5228
5229                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5230                     /* It adds dual VLAN length for supporting dual VLAN */
5231                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5232                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5233                         if (!dev->data->scattered_rx)
5234                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5235                         dev->data->scattered_rx = 1;
5236                 }
5237         }
5238
5239 #ifdef RTE_HEADER_SPLIT_ENABLE
5240         if (dev->data->dev_conf.rxmode.header_split)
5241                 /* Must setup the PSRTYPE register */
5242                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5243                         IXGBE_PSRTYPE_UDPHDR   |
5244                         IXGBE_PSRTYPE_IPV4HDR  |
5245                         IXGBE_PSRTYPE_IPV6HDR;
5246 #endif
5247
5248         /* Set RQPL for VF RSS according to max Rx queue */
5249         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5250                 IXGBE_PSRTYPE_RQPL_SHIFT;
5251         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5252
5253         ixgbe_set_rx_function(dev);
5254
5255         return 0;
5256 }
5257
5258 /*
5259  * [VF] Initializes Transmit Unit.
5260  */
5261 void __attribute__((cold))
5262 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5263 {
5264         struct ixgbe_hw     *hw;
5265         struct ixgbe_tx_queue *txq;
5266         uint64_t bus_addr;
5267         uint32_t txctrl;
5268         uint16_t i;
5269
5270         PMD_INIT_FUNC_TRACE();
5271         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5272
5273         /* Setup the Base and Length of the Tx Descriptor Rings */
5274         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5275                 txq = dev->data->tx_queues[i];
5276                 bus_addr = txq->tx_ring_phys_addr;
5277                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5278                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5279                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5280                                 (uint32_t)(bus_addr >> 32));
5281                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5282                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5283                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5284                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5285                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5286
5287                 /*
5288                  * Disable Tx Head Writeback RO bit, since this hoses
5289                  * bookkeeping if things aren't delivered in order.
5290                  */
5291                 txctrl = IXGBE_READ_REG(hw,
5292                                 IXGBE_VFDCA_TXCTRL(i));
5293                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5294                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5295                                 txctrl);
5296         }
5297 }
5298
5299 /*
5300  * [VF] Start Transmit and Receive Units.
5301  */
5302 void __attribute__((cold))
5303 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5304 {
5305         struct ixgbe_hw     *hw;
5306         struct ixgbe_tx_queue *txq;
5307         struct ixgbe_rx_queue *rxq;
5308         uint32_t txdctl;
5309         uint32_t rxdctl;
5310         uint16_t i;
5311         int poll_ms;
5312
5313         PMD_INIT_FUNC_TRACE();
5314         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5315
5316         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5317                 txq = dev->data->tx_queues[i];
5318                 /* Setup Transmit Threshold Registers */
5319                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5320                 txdctl |= txq->pthresh & 0x7F;
5321                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5322                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5323                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5324         }
5325
5326         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5327
5328                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5329                 txdctl |= IXGBE_TXDCTL_ENABLE;
5330                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5331
5332                 poll_ms = 10;
5333                 /* Wait until TX Enable ready */
5334                 do {
5335                         rte_delay_ms(1);
5336                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5337                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5338                 if (!poll_ms)
5339                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5340         }
5341         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5342
5343                 rxq = dev->data->rx_queues[i];
5344
5345                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5346                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5347                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5348
5349                 /* Wait until RX Enable ready */
5350                 poll_ms = 10;
5351                 do {
5352                         rte_delay_ms(1);
5353                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5354                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5355                 if (!poll_ms)
5356                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5357                 rte_wmb();
5358                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5359
5360         }
5361 }
5362
5363 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5364 int __attribute__((weak))
5365 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5366 {
5367         return -1;
5368 }
5369
5370 uint16_t __attribute__((weak))
5371 ixgbe_recv_pkts_vec(
5372         void __rte_unused *rx_queue,
5373         struct rte_mbuf __rte_unused **rx_pkts,
5374         uint16_t __rte_unused nb_pkts)
5375 {
5376         return 0;
5377 }
5378
5379 uint16_t __attribute__((weak))
5380 ixgbe_recv_scattered_pkts_vec(
5381         void __rte_unused *rx_queue,
5382         struct rte_mbuf __rte_unused **rx_pkts,
5383         uint16_t __rte_unused nb_pkts)
5384 {
5385         return 0;
5386 }
5387
5388 int __attribute__((weak))
5389 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5390 {
5391         return -1;
5392 }