Imported Upstream version 16.07-rc1
[deb_dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73 #include <rte_ip.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK (                  \
85                 PKT_TX_VLAN_PKT |                \
86                 PKT_TX_IP_CKSUM |                \
87                 PKT_TX_L4_MASK |                 \
88                 PKT_TX_TCP_SEG |                 \
89                 PKT_TX_OUTER_IP_CKSUM)
90
91 #if 1
92 #define RTE_PMD_USE_PREFETCH
93 #endif
94
95 #ifdef RTE_PMD_USE_PREFETCH
96 /*
97  * Prefetch a cache line into all cache levels.
98  */
99 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
100 #else
101 #define rte_ixgbe_prefetch(p)   do {} while (0)
102 #endif
103
104 /*********************************************************************
105  *
106  *  TX functions
107  *
108  **********************************************************************/
109
110 /*
111  * Check for descriptors with their DD bit set and free mbufs.
112  * Return the total number of buffers freed.
113  */
114 static inline int __attribute__((always_inline))
115 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
116 {
117         struct ixgbe_tx_entry *txep;
118         uint32_t status;
119         int i, nb_free = 0;
120         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
121
122         /* check DD bit on threshold descriptor */
123         status = txq->tx_ring[txq->tx_next_dd].wb.status;
124         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
125                 return 0;
126
127         /*
128          * first buffer to free from S/W ring is at index
129          * tx_next_dd - (tx_rs_thresh-1)
130          */
131         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
132
133         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
134                 /* free buffers one at a time */
135                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
136                 txep->mbuf = NULL;
137
138                 if (unlikely(m == NULL))
139                         continue;
140
141                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
142                     (nb_free > 0 && m->pool != free[0]->pool)) {
143                         rte_mempool_put_bulk(free[0]->pool,
144                                              (void **)free, nb_free);
145                         nb_free = 0;
146                 }
147
148                 free[nb_free++] = m;
149         }
150
151         if (nb_free > 0)
152                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
153
154         /* buffers were freed, update counters */
155         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
156         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
157         if (txq->tx_next_dd >= txq->nb_tx_desc)
158                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
159
160         return txq->tx_rs_thresh;
161 }
162
163 /* Populate 4 descriptors with data from 4 mbufs */
164 static inline void
165 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
166 {
167         uint64_t buf_dma_addr;
168         uint32_t pkt_len;
169         int i;
170
171         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
172                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
173                 pkt_len = (*pkts)->data_len;
174
175                 /* write data to descriptor */
176                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
177
178                 txdp->read.cmd_type_len =
179                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
180
181                 txdp->read.olinfo_status =
182                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
183
184                 rte_prefetch0(&(*pkts)->pool);
185         }
186 }
187
188 /* Populate 1 descriptor with data from 1 mbuf */
189 static inline void
190 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
191 {
192         uint64_t buf_dma_addr;
193         uint32_t pkt_len;
194
195         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
196         pkt_len = (*pkts)->data_len;
197
198         /* write data to descriptor */
199         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
200         txdp->read.cmd_type_len =
201                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
202         txdp->read.olinfo_status =
203                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
204         rte_prefetch0(&(*pkts)->pool);
205 }
206
207 /*
208  * Fill H/W descriptor ring with mbuf data.
209  * Copy mbuf pointers to the S/W ring.
210  */
211 static inline void
212 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
213                       uint16_t nb_pkts)
214 {
215         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
216         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
217         const int N_PER_LOOP = 4;
218         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
219         int mainpart, leftover;
220         int i, j;
221
222         /*
223          * Process most of the packets in chunks of N pkts.  Any
224          * leftover packets will get processed one at a time.
225          */
226         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
227         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
228         for (i = 0; i < mainpart; i += N_PER_LOOP) {
229                 /* Copy N mbuf pointers to the S/W ring */
230                 for (j = 0; j < N_PER_LOOP; ++j) {
231                         (txep + i + j)->mbuf = *(pkts + i + j);
232                 }
233                 tx4(txdp + i, pkts + i);
234         }
235
236         if (unlikely(leftover > 0)) {
237                 for (i = 0; i < leftover; ++i) {
238                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
239                         tx1(txdp + mainpart + i, pkts + mainpart + i);
240                 }
241         }
242 }
243
244 static inline uint16_t
245 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
246              uint16_t nb_pkts)
247 {
248         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
249         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
250         uint16_t n = 0;
251
252         /*
253          * Begin scanning the H/W ring for done descriptors when the
254          * number of available descriptors drops below tx_free_thresh.  For
255          * each done descriptor, free the associated buffer.
256          */
257         if (txq->nb_tx_free < txq->tx_free_thresh)
258                 ixgbe_tx_free_bufs(txq);
259
260         /* Only use descriptors that are available */
261         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
262         if (unlikely(nb_pkts == 0))
263                 return 0;
264
265         /* Use exactly nb_pkts descriptors */
266         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
267
268         /*
269          * At this point, we know there are enough descriptors in the
270          * ring to transmit all the packets.  This assumes that each
271          * mbuf contains a single segment, and that no new offloads
272          * are expected, which would require a new context descriptor.
273          */
274
275         /*
276          * See if we're going to wrap-around. If so, handle the top
277          * of the descriptor ring first, then do the bottom.  If not,
278          * the processing looks just like the "bottom" part anyway...
279          */
280         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
281                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
282                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
283
284                 /*
285                  * We know that the last descriptor in the ring will need to
286                  * have its RS bit set because tx_rs_thresh has to be
287                  * a divisor of the ring size
288                  */
289                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
290                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
291                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
292
293                 txq->tx_tail = 0;
294         }
295
296         /* Fill H/W descriptor ring with mbuf data */
297         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
298         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
299
300         /*
301          * Determine if RS bit should be set
302          * This is what we actually want:
303          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
304          * but instead of subtracting 1 and doing >=, we can just do
305          * greater than without subtracting.
306          */
307         if (txq->tx_tail > txq->tx_next_rs) {
308                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
309                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
310                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
311                                                 txq->tx_rs_thresh);
312                 if (txq->tx_next_rs >= txq->nb_tx_desc)
313                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
314         }
315
316         /*
317          * Check for wrap-around. This would only happen if we used
318          * up to the last descriptor in the ring, no more, no less.
319          */
320         if (txq->tx_tail >= txq->nb_tx_desc)
321                 txq->tx_tail = 0;
322
323         /* update tail pointer */
324         rte_wmb();
325         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
326
327         return nb_pkts;
328 }
329
330 uint16_t
331 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
332                        uint16_t nb_pkts)
333 {
334         uint16_t nb_tx;
335
336         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
337         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
338                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
339
340         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
341         nb_tx = 0;
342         while (nb_pkts) {
343                 uint16_t ret, n;
344
345                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
346                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
347                 nb_tx = (uint16_t)(nb_tx + ret);
348                 nb_pkts = (uint16_t)(nb_pkts - ret);
349                 if (ret < n)
350                         break;
351         }
352
353         return nb_tx;
354 }
355
356 static inline void
357 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
358                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
359                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
360 {
361         uint32_t type_tucmd_mlhl;
362         uint32_t mss_l4len_idx = 0;
363         uint32_t ctx_idx;
364         uint32_t vlan_macip_lens;
365         union ixgbe_tx_offload tx_offload_mask;
366         uint32_t seqnum_seed = 0;
367
368         ctx_idx = txq->ctx_curr;
369         tx_offload_mask.data[0] = 0;
370         tx_offload_mask.data[1] = 0;
371         type_tucmd_mlhl = 0;
372
373         /* Specify which HW CTX to upload. */
374         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
375
376         if (ol_flags & PKT_TX_VLAN_PKT) {
377                 tx_offload_mask.vlan_tci |= ~0;
378         }
379
380         /* check if TCP segmentation required for this packet */
381         if (ol_flags & PKT_TX_TCP_SEG) {
382                 /* implies IP cksum in IPv4 */
383                 if (ol_flags & PKT_TX_IP_CKSUM)
384                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
385                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
386                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
387                 else
388                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
389                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
390                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
391
392                 tx_offload_mask.l2_len |= ~0;
393                 tx_offload_mask.l3_len |= ~0;
394                 tx_offload_mask.l4_len |= ~0;
395                 tx_offload_mask.tso_segsz |= ~0;
396                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
397                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
398         } else { /* no TSO, check if hardware checksum is needed */
399                 if (ol_flags & PKT_TX_IP_CKSUM) {
400                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
401                         tx_offload_mask.l2_len |= ~0;
402                         tx_offload_mask.l3_len |= ~0;
403                 }
404
405                 switch (ol_flags & PKT_TX_L4_MASK) {
406                 case PKT_TX_UDP_CKSUM:
407                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
408                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
409                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
410                         tx_offload_mask.l2_len |= ~0;
411                         tx_offload_mask.l3_len |= ~0;
412                         break;
413                 case PKT_TX_TCP_CKSUM:
414                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
415                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
416                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
417                         tx_offload_mask.l2_len |= ~0;
418                         tx_offload_mask.l3_len |= ~0;
419                         break;
420                 case PKT_TX_SCTP_CKSUM:
421                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
422                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
423                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
424                         tx_offload_mask.l2_len |= ~0;
425                         tx_offload_mask.l3_len |= ~0;
426                         break;
427                 default:
428                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
429                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
430                         break;
431                 }
432         }
433
434         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
435                 tx_offload_mask.outer_l2_len |= ~0;
436                 tx_offload_mask.outer_l3_len |= ~0;
437                 tx_offload_mask.l2_len |= ~0;
438                 seqnum_seed |= tx_offload.outer_l3_len
439                                << IXGBE_ADVTXD_OUTER_IPLEN;
440                 seqnum_seed |= tx_offload.l2_len
441                                << IXGBE_ADVTXD_TUNNEL_LEN;
442         }
443
444         txq->ctx_cache[ctx_idx].flags = ol_flags;
445         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
446                 tx_offload_mask.data[0] & tx_offload.data[0];
447         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
448                 tx_offload_mask.data[1] & tx_offload.data[1];
449         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
450
451         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
452         vlan_macip_lens = tx_offload.l3_len;
453         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
454                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
455                                     IXGBE_ADVTXD_MACLEN_SHIFT);
456         else
457                 vlan_macip_lens |= (tx_offload.l2_len <<
458                                     IXGBE_ADVTXD_MACLEN_SHIFT);
459         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
460         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
461         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
462         ctx_txd->seqnum_seed     = seqnum_seed;
463 }
464
465 /*
466  * Check which hardware context can be used. Use the existing match
467  * or create a new context descriptor.
468  */
469 static inline uint32_t
470 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
471                    union ixgbe_tx_offload tx_offload)
472 {
473         /* If match with the current used context */
474         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
475                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
476                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
477                      & tx_offload.data[0])) &&
478                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
479                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
480                      & tx_offload.data[1]))))
481                 return txq->ctx_curr;
482
483         /* What if match with the next context  */
484         txq->ctx_curr ^= 1;
485         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
486                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
487                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
488                      & tx_offload.data[0])) &&
489                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
490                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
491                      & tx_offload.data[1]))))
492                 return txq->ctx_curr;
493
494         /* Mismatch, use the previous context */
495         return IXGBE_CTX_NUM;
496 }
497
498 static inline uint32_t
499 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
500 {
501         uint32_t tmp = 0;
502
503         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
504                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
505         if (ol_flags & PKT_TX_IP_CKSUM)
506                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
507         if (ol_flags & PKT_TX_TCP_SEG)
508                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
509         return tmp;
510 }
511
512 static inline uint32_t
513 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
514 {
515         uint32_t cmdtype = 0;
516
517         if (ol_flags & PKT_TX_VLAN_PKT)
518                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
519         if (ol_flags & PKT_TX_TCP_SEG)
520                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
521         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
522                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
523         return cmdtype;
524 }
525
526 /* Default RS bit threshold values */
527 #ifndef DEFAULT_TX_RS_THRESH
528 #define DEFAULT_TX_RS_THRESH   32
529 #endif
530 #ifndef DEFAULT_TX_FREE_THRESH
531 #define DEFAULT_TX_FREE_THRESH 32
532 #endif
533
534 /* Reset transmit descriptors after they have been used */
535 static inline int
536 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
537 {
538         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
539         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
540         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
541         uint16_t nb_tx_desc = txq->nb_tx_desc;
542         uint16_t desc_to_clean_to;
543         uint16_t nb_tx_to_clean;
544         uint32_t status;
545
546         /* Determine the last descriptor needing to be cleaned */
547         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
548         if (desc_to_clean_to >= nb_tx_desc)
549                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
550
551         /* Check to make sure the last descriptor to clean is done */
552         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
553         status = txr[desc_to_clean_to].wb.status;
554         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
555                 PMD_TX_FREE_LOG(DEBUG,
556                                 "TX descriptor %4u is not done"
557                                 "(port=%d queue=%d)",
558                                 desc_to_clean_to,
559                                 txq->port_id, txq->queue_id);
560                 /* Failed to clean any descriptors, better luck next time */
561                 return -(1);
562         }
563
564         /* Figure out how many descriptors will be cleaned */
565         if (last_desc_cleaned > desc_to_clean_to)
566                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
567                                                         desc_to_clean_to);
568         else
569                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
570                                                 last_desc_cleaned);
571
572         PMD_TX_FREE_LOG(DEBUG,
573                         "Cleaning %4u TX descriptors: %4u to %4u "
574                         "(port=%d queue=%d)",
575                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
576                         txq->port_id, txq->queue_id);
577
578         /*
579          * The last descriptor to clean is done, so that means all the
580          * descriptors from the last descriptor that was cleaned
581          * up to the last descriptor with the RS bit set
582          * are done. Only reset the threshold descriptor.
583          */
584         txr[desc_to_clean_to].wb.status = 0;
585
586         /* Update the txq to reflect the last descriptor that was cleaned */
587         txq->last_desc_cleaned = desc_to_clean_to;
588         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
589
590         /* No Error */
591         return 0;
592 }
593
594 uint16_t
595 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
596                 uint16_t nb_pkts)
597 {
598         struct ixgbe_tx_queue *txq;
599         struct ixgbe_tx_entry *sw_ring;
600         struct ixgbe_tx_entry *txe, *txn;
601         volatile union ixgbe_adv_tx_desc *txr;
602         volatile union ixgbe_adv_tx_desc *txd, *txp;
603         struct rte_mbuf     *tx_pkt;
604         struct rte_mbuf     *m_seg;
605         uint64_t buf_dma_addr;
606         uint32_t olinfo_status;
607         uint32_t cmd_type_len;
608         uint32_t pkt_len;
609         uint16_t slen;
610         uint64_t ol_flags;
611         uint16_t tx_id;
612         uint16_t tx_last;
613         uint16_t nb_tx;
614         uint16_t nb_used;
615         uint64_t tx_ol_req;
616         uint32_t ctx = 0;
617         uint32_t new_ctx;
618         union ixgbe_tx_offload tx_offload;
619
620         tx_offload.data[0] = 0;
621         tx_offload.data[1] = 0;
622         txq = tx_queue;
623         sw_ring = txq->sw_ring;
624         txr     = txq->tx_ring;
625         tx_id   = txq->tx_tail;
626         txe = &sw_ring[tx_id];
627         txp = NULL;
628
629         /* Determine if the descriptor ring needs to be cleaned. */
630         if (txq->nb_tx_free < txq->tx_free_thresh)
631                 ixgbe_xmit_cleanup(txq);
632
633         rte_prefetch0(&txe->mbuf->pool);
634
635         /* TX loop */
636         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
637                 new_ctx = 0;
638                 tx_pkt = *tx_pkts++;
639                 pkt_len = tx_pkt->pkt_len;
640
641                 /*
642                  * Determine how many (if any) context descriptors
643                  * are needed for offload functionality.
644                  */
645                 ol_flags = tx_pkt->ol_flags;
646
647                 /* If hardware offload required */
648                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
649                 if (tx_ol_req) {
650                         tx_offload.l2_len = tx_pkt->l2_len;
651                         tx_offload.l3_len = tx_pkt->l3_len;
652                         tx_offload.l4_len = tx_pkt->l4_len;
653                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
654                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
655                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
656                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
657
658                         /* If new context need be built or reuse the exist ctx. */
659                         ctx = what_advctx_update(txq, tx_ol_req,
660                                 tx_offload);
661                         /* Only allocate context descriptor if required*/
662                         new_ctx = (ctx == IXGBE_CTX_NUM);
663                         ctx = txq->ctx_curr;
664                 }
665
666                 /*
667                  * Keep track of how many descriptors are used this loop
668                  * This will always be the number of segments + the number of
669                  * Context descriptors required to transmit the packet
670                  */
671                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
672
673                 if (txp != NULL &&
674                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
675                         /* set RS on the previous packet in the burst */
676                         txp->read.cmd_type_len |=
677                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
678
679                 /*
680                  * The number of descriptors that must be allocated for a
681                  * packet is the number of segments of that packet, plus 1
682                  * Context Descriptor for the hardware offload, if any.
683                  * Determine the last TX descriptor to allocate in the TX ring
684                  * for the packet, starting from the current position (tx_id)
685                  * in the ring.
686                  */
687                 tx_last = (uint16_t) (tx_id + nb_used - 1);
688
689                 /* Circular ring */
690                 if (tx_last >= txq->nb_tx_desc)
691                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
692
693                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
694                            " tx_first=%u tx_last=%u",
695                            (unsigned) txq->port_id,
696                            (unsigned) txq->queue_id,
697                            (unsigned) pkt_len,
698                            (unsigned) tx_id,
699                            (unsigned) tx_last);
700
701                 /*
702                  * Make sure there are enough TX descriptors available to
703                  * transmit the entire packet.
704                  * nb_used better be less than or equal to txq->tx_rs_thresh
705                  */
706                 if (nb_used > txq->nb_tx_free) {
707                         PMD_TX_FREE_LOG(DEBUG,
708                                         "Not enough free TX descriptors "
709                                         "nb_used=%4u nb_free=%4u "
710                                         "(port=%d queue=%d)",
711                                         nb_used, txq->nb_tx_free,
712                                         txq->port_id, txq->queue_id);
713
714                         if (ixgbe_xmit_cleanup(txq) != 0) {
715                                 /* Could not clean any descriptors */
716                                 if (nb_tx == 0)
717                                         return 0;
718                                 goto end_of_tx;
719                         }
720
721                         /* nb_used better be <= txq->tx_rs_thresh */
722                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
723                                 PMD_TX_FREE_LOG(DEBUG,
724                                         "The number of descriptors needed to "
725                                         "transmit the packet exceeds the "
726                                         "RS bit threshold. This will impact "
727                                         "performance."
728                                         "nb_used=%4u nb_free=%4u "
729                                         "tx_rs_thresh=%4u. "
730                                         "(port=%d queue=%d)",
731                                         nb_used, txq->nb_tx_free,
732                                         txq->tx_rs_thresh,
733                                         txq->port_id, txq->queue_id);
734                                 /*
735                                  * Loop here until there are enough TX
736                                  * descriptors or until the ring cannot be
737                                  * cleaned.
738                                  */
739                                 while (nb_used > txq->nb_tx_free) {
740                                         if (ixgbe_xmit_cleanup(txq) != 0) {
741                                                 /*
742                                                  * Could not clean any
743                                                  * descriptors
744                                                  */
745                                                 if (nb_tx == 0)
746                                                         return 0;
747                                                 goto end_of_tx;
748                                         }
749                                 }
750                         }
751                 }
752
753                 /*
754                  * By now there are enough free TX descriptors to transmit
755                  * the packet.
756                  */
757
758                 /*
759                  * Set common flags of all TX Data Descriptors.
760                  *
761                  * The following bits must be set in all Data Descriptors:
762                  *   - IXGBE_ADVTXD_DTYP_DATA
763                  *   - IXGBE_ADVTXD_DCMD_DEXT
764                  *
765                  * The following bits must be set in the first Data Descriptor
766                  * and are ignored in the other ones:
767                  *   - IXGBE_ADVTXD_DCMD_IFCS
768                  *   - IXGBE_ADVTXD_MAC_1588
769                  *   - IXGBE_ADVTXD_DCMD_VLE
770                  *
771                  * The following bits must only be set in the last Data
772                  * Descriptor:
773                  *   - IXGBE_TXD_CMD_EOP
774                  *
775                  * The following bits can be set in any Data Descriptor, but
776                  * are only set in the last Data Descriptor:
777                  *   - IXGBE_TXD_CMD_RS
778                  */
779                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
780                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
781
782 #ifdef RTE_LIBRTE_IEEE1588
783                 if (ol_flags & PKT_TX_IEEE1588_TMST)
784                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
785 #endif
786
787                 olinfo_status = 0;
788                 if (tx_ol_req) {
789
790                         if (ol_flags & PKT_TX_TCP_SEG) {
791                                 /* when TSO is on, paylen in descriptor is the
792                                  * not the packet len but the tcp payload len */
793                                 pkt_len -= (tx_offload.l2_len +
794                                         tx_offload.l3_len + tx_offload.l4_len);
795                         }
796
797                         /*
798                          * Setup the TX Advanced Context Descriptor if required
799                          */
800                         if (new_ctx) {
801                                 volatile struct ixgbe_adv_tx_context_desc *
802                                     ctx_txd;
803
804                                 ctx_txd = (volatile struct
805                                     ixgbe_adv_tx_context_desc *)
806                                     &txr[tx_id];
807
808                                 txn = &sw_ring[txe->next_id];
809                                 rte_prefetch0(&txn->mbuf->pool);
810
811                                 if (txe->mbuf != NULL) {
812                                         rte_pktmbuf_free_seg(txe->mbuf);
813                                         txe->mbuf = NULL;
814                                 }
815
816                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
817                                         tx_offload);
818
819                                 txe->last_id = tx_last;
820                                 tx_id = txe->next_id;
821                                 txe = txn;
822                         }
823
824                         /*
825                          * Setup the TX Advanced Data Descriptor,
826                          * This path will go through
827                          * whatever new/reuse the context descriptor
828                          */
829                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
830                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
831                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
832                 }
833
834                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
835
836                 m_seg = tx_pkt;
837                 do {
838                         txd = &txr[tx_id];
839                         txn = &sw_ring[txe->next_id];
840                         rte_prefetch0(&txn->mbuf->pool);
841
842                         if (txe->mbuf != NULL)
843                                 rte_pktmbuf_free_seg(txe->mbuf);
844                         txe->mbuf = m_seg;
845
846                         /*
847                          * Set up Transmit Data Descriptor.
848                          */
849                         slen = m_seg->data_len;
850                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
851                         txd->read.buffer_addr =
852                                 rte_cpu_to_le_64(buf_dma_addr);
853                         txd->read.cmd_type_len =
854                                 rte_cpu_to_le_32(cmd_type_len | slen);
855                         txd->read.olinfo_status =
856                                 rte_cpu_to_le_32(olinfo_status);
857                         txe->last_id = tx_last;
858                         tx_id = txe->next_id;
859                         txe = txn;
860                         m_seg = m_seg->next;
861                 } while (m_seg != NULL);
862
863                 /*
864                  * The last packet data descriptor needs End Of Packet (EOP)
865                  */
866                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
867                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
868                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
869
870                 /* Set RS bit only on threshold packets' last descriptor */
871                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
872                         PMD_TX_FREE_LOG(DEBUG,
873                                         "Setting RS bit on TXD id="
874                                         "%4u (port=%d queue=%d)",
875                                         tx_last, txq->port_id, txq->queue_id);
876
877                         cmd_type_len |= IXGBE_TXD_CMD_RS;
878
879                         /* Update txq RS bit counters */
880                         txq->nb_tx_used = 0;
881                         txp = NULL;
882                 } else
883                         txp = txd;
884
885                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
886         }
887
888 end_of_tx:
889         /* set RS on last packet in the burst */
890         if (txp != NULL)
891                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
892
893         rte_wmb();
894
895         /*
896          * Set the Transmit Descriptor Tail (TDT)
897          */
898         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
899                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
900                    (unsigned) tx_id, (unsigned) nb_tx);
901         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
902         txq->tx_tail = tx_id;
903
904         return nb_tx;
905 }
906
907 /*********************************************************************
908  *
909  *  RX functions
910  *
911  **********************************************************************/
912
913 #define IXGBE_PACKET_TYPE_ETHER                         0X00
914 #define IXGBE_PACKET_TYPE_IPV4                          0X01
915 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
916 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
917 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
918 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
919 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
920 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
921 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
922 #define IXGBE_PACKET_TYPE_IPV6                          0X04
923 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
924 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
925 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
926 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
927 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
928 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
929 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
930 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
931 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
932 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
933 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
934 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
935 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
936 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
937 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
938 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
939 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
940 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
941 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
942 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
943 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
944 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
945 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
946
947 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
948 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
949 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
950 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
951 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
952 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
953 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
954 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
955 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
956 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
957 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
958 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
959 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
960 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
961 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
962 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
963 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
964 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
965 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
966 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
967 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
968 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
969 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
970
971 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
972 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
973 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
974 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
975 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
976 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
977 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
978 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
979 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
980 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
981 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
982 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
983 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
984 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
985 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
986 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
987 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
988 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
989 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
990 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
991 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
992 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
993 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
994
995 #define IXGBE_PACKET_TYPE_MAX               0X80
996 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
997 #define IXGBE_PACKET_TYPE_SHIFT             0X04
998
999 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1000 static inline uint32_t
1001 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1002 {
1003         /**
1004          * Use 2 different table for normal packet and tunnel packet
1005          * to save the space.
1006          */
1007         static const uint32_t
1008                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1009                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1010                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1011                         RTE_PTYPE_L3_IPV4,
1012                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1013                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1014                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1015                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1016                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1017                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1018                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1019                         RTE_PTYPE_L3_IPV4_EXT,
1020                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1021                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1022                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1023                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1024                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1025                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1026                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1027                         RTE_PTYPE_L3_IPV6,
1028                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1029                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1030                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1031                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1032                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1033                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1034                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1035                         RTE_PTYPE_L3_IPV6_EXT,
1036                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1037                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1038                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1039                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1040                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1041                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1042                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1043                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1044                         RTE_PTYPE_INNER_L3_IPV6,
1045                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1046                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1047                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1048                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1049                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1050                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1051                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1052                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1053                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1054                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1055                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1056                         RTE_PTYPE_INNER_L3_IPV6,
1057                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1058                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1059                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1060                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1061                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1062                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1063                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1064                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1065                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1066                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1067                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1068                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1069                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1070                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1071                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1072                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1073                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1074                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1075                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1076                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1077                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1078                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1079                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1080                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1081                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1082                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1083                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1084                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1085                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1086                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1087                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1088                         RTE_PTYPE_L2_ETHER |
1089                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1090                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1091         };
1092
1093         static const uint32_t
1094                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1095                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1096                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1097                         RTE_PTYPE_INNER_L2_ETHER,
1098                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1099                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1100                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1101                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1102                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1103                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1104                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1105                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1106                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1107                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1108                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1109                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1110                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1111                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1112                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1113                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1114                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1115                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1116                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1117                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1118                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1119                         RTE_PTYPE_INNER_L4_TCP,
1120                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1121                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1122                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1123                         RTE_PTYPE_INNER_L4_TCP,
1124                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1125                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1126                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1127                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1128                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1129                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1130                         RTE_PTYPE_INNER_L4_TCP,
1131                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1132                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1133                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1134                         RTE_PTYPE_INNER_L3_IPV4,
1135                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1136                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1137                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1138                         RTE_PTYPE_INNER_L4_UDP,
1139                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1140                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1141                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1142                         RTE_PTYPE_INNER_L4_UDP,
1143                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1144                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1145                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1146                         RTE_PTYPE_INNER_L4_SCTP,
1147                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1148                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1149                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1150                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1151                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1152                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1153                         RTE_PTYPE_INNER_L4_UDP,
1154                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1155                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1156                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1157                         RTE_PTYPE_INNER_L4_SCTP,
1158                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1159                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1160                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1161                         RTE_PTYPE_INNER_L3_IPV4,
1162                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1163                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1164                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1165                         RTE_PTYPE_INNER_L4_SCTP,
1166                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1167                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1168                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1169                         RTE_PTYPE_INNER_L4_SCTP,
1170                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1172                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1173                         RTE_PTYPE_INNER_L4_TCP,
1174                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1175                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1176                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1177                         RTE_PTYPE_INNER_L4_UDP,
1178
1179                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1180                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1181                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1182                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1183                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1184                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1185                         RTE_PTYPE_INNER_L3_IPV4,
1186                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1187                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1188                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1189                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1190                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1191                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1192                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1193                         RTE_PTYPE_INNER_L3_IPV6,
1194                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1195                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1196                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1197                         RTE_PTYPE_INNER_L3_IPV4,
1198                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1199                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1200                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1201                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1202                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1203                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1204                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1205                         RTE_PTYPE_INNER_L3_IPV4,
1206                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1207                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1208                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1209                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1210                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1211                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1212                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1213                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1214                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1215                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1216                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1217                         RTE_PTYPE_INNER_L3_IPV4,
1218                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1219                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1220                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1221                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1222                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1223                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1224                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1225                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1226                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1227                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1228                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1229                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1230                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1231                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1232                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1233                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1234                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1235                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1236                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1237                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1238                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1239                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1240                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1241                         RTE_PTYPE_INNER_L3_IPV4,
1242                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1243                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1244                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1245                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1246                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1247                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1248                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1249                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1250                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1251                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1252                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1253                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1254                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1255                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1256                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1257                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1258                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1259                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1260                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1261                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1262                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1263                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1264                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1265                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1266                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1267                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1268                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1269                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1270         };
1271
1272         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1273                 return RTE_PTYPE_UNKNOWN;
1274
1275         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1276
1277         /* For tunnel packet */
1278         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1279                 /* Remove the tunnel bit to save the space. */
1280                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1281                 return ptype_table_tn[pkt_info];
1282         }
1283
1284         /**
1285          * For x550, if it's not tunnel,
1286          * tunnel type bit should be set to 0.
1287          * Reuse 82599's mask.
1288          */
1289         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1290
1291         return ptype_table[pkt_info];
1292 }
1293
1294 static inline uint64_t
1295 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1296 {
1297         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1298                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1299                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1300                 PKT_RX_RSS_HASH, 0, 0, 0,
1301                 0, 0, 0,  PKT_RX_FDIR,
1302         };
1303 #ifdef RTE_LIBRTE_IEEE1588
1304         static uint64_t ip_pkt_etqf_map[8] = {
1305                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1306                 0, 0, 0, 0,
1307         };
1308
1309         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1310                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1311                                 ip_rss_types_map[pkt_info & 0XF];
1312         else
1313                 return ip_rss_types_map[pkt_info & 0XF];
1314 #else
1315         return ip_rss_types_map[pkt_info & 0XF];
1316 #endif
1317 }
1318
1319 static inline uint64_t
1320 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1321 {
1322         uint64_t pkt_flags;
1323
1324         /*
1325          * Check if VLAN present only.
1326          * Do not check whether L3/L4 rx checksum done by NIC or not,
1327          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1328          */
1329         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1330
1331 #ifdef RTE_LIBRTE_IEEE1588
1332         if (rx_status & IXGBE_RXD_STAT_TMST)
1333                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1334 #endif
1335         return pkt_flags;
1336 }
1337
1338 static inline uint64_t
1339 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1340 {
1341         uint64_t pkt_flags;
1342
1343         /*
1344          * Bit 31: IPE, IPv4 checksum error
1345          * Bit 30: L4I, L4I integrity error
1346          */
1347         static uint64_t error_to_pkt_flags_map[4] = {
1348                 0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1349                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1350         };
1351         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1352                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1353
1354         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1355             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1356                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1357         }
1358
1359         return pkt_flags;
1360 }
1361
1362 /*
1363  * LOOK_AHEAD defines how many desc statuses to check beyond the
1364  * current descriptor.
1365  * It must be a pound define for optimal performance.
1366  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1367  * function only works with LOOK_AHEAD=8.
1368  */
1369 #define LOOK_AHEAD 8
1370 #if (LOOK_AHEAD != 8)
1371 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1372 #endif
1373 static inline int
1374 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1375 {
1376         volatile union ixgbe_adv_rx_desc *rxdp;
1377         struct ixgbe_rx_entry *rxep;
1378         struct rte_mbuf *mb;
1379         uint16_t pkt_len;
1380         uint64_t pkt_flags;
1381         int nb_dd;
1382         uint32_t s[LOOK_AHEAD];
1383         uint32_t pkt_info[LOOK_AHEAD];
1384         int i, j, nb_rx = 0;
1385         uint32_t status;
1386         uint64_t vlan_flags = rxq->vlan_flags;
1387
1388         /* get references to current descriptor and S/W ring entry */
1389         rxdp = &rxq->rx_ring[rxq->rx_tail];
1390         rxep = &rxq->sw_ring[rxq->rx_tail];
1391
1392         status = rxdp->wb.upper.status_error;
1393         /* check to make sure there is at least 1 packet to receive */
1394         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1395                 return 0;
1396
1397         /*
1398          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1399          * reference packets that are ready to be received.
1400          */
1401         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1402              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1403                 /* Read desc statuses backwards to avoid race condition */
1404                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1405                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1406
1407                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1408                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1409                                                        lo_dword.data);
1410
1411                 /* Compute how many status bits were set */
1412                 nb_dd = 0;
1413                 for (j = 0; j < LOOK_AHEAD; ++j)
1414                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1415
1416                 nb_rx += nb_dd;
1417
1418                 /* Translate descriptor info to mbuf format */
1419                 for (j = 0; j < nb_dd; ++j) {
1420                         mb = rxep[j].mbuf;
1421                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1422                                   rxq->crc_len;
1423                         mb->data_len = pkt_len;
1424                         mb->pkt_len = pkt_len;
1425                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1426
1427                         /* convert descriptor fields to rte mbuf flags */
1428                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1429                                 vlan_flags);
1430                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1431                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1432                                         ((uint16_t)pkt_info[j]);
1433                         mb->ol_flags = pkt_flags;
1434                         mb->packet_type =
1435                                 ixgbe_rxd_pkt_info_to_pkt_type
1436                                         (pkt_info[j], rxq->pkt_type_mask);
1437
1438                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1439                                 mb->hash.rss = rte_le_to_cpu_32(
1440                                     rxdp[j].wb.lower.hi_dword.rss);
1441                         else if (pkt_flags & PKT_RX_FDIR) {
1442                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1443                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1444                                     IXGBE_ATR_HASH_MASK;
1445                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1446                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1447                         }
1448                 }
1449
1450                 /* Move mbuf pointers from the S/W ring to the stage */
1451                 for (j = 0; j < LOOK_AHEAD; ++j) {
1452                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1453                 }
1454
1455                 /* stop if all requested packets could not be received */
1456                 if (nb_dd != LOOK_AHEAD)
1457                         break;
1458         }
1459
1460         /* clear software ring entries so we can cleanup correctly */
1461         for (i = 0; i < nb_rx; ++i) {
1462                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1463         }
1464
1465
1466         return nb_rx;
1467 }
1468
1469 static inline int
1470 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1471 {
1472         volatile union ixgbe_adv_rx_desc *rxdp;
1473         struct ixgbe_rx_entry *rxep;
1474         struct rte_mbuf *mb;
1475         uint16_t alloc_idx;
1476         __le64 dma_addr;
1477         int diag, i;
1478
1479         /* allocate buffers in bulk directly into the S/W ring */
1480         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1481         rxep = &rxq->sw_ring[alloc_idx];
1482         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1483                                     rxq->rx_free_thresh);
1484         if (unlikely(diag != 0))
1485                 return -ENOMEM;
1486
1487         rxdp = &rxq->rx_ring[alloc_idx];
1488         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1489                 /* populate the static rte mbuf fields */
1490                 mb = rxep[i].mbuf;
1491                 if (reset_mbuf) {
1492                         mb->next = NULL;
1493                         mb->nb_segs = 1;
1494                         mb->port = rxq->port_id;
1495                 }
1496
1497                 rte_mbuf_refcnt_set(mb, 1);
1498                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1499
1500                 /* populate the descriptors */
1501                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1502                 rxdp[i].read.hdr_addr = 0;
1503                 rxdp[i].read.pkt_addr = dma_addr;
1504         }
1505
1506         /* update state of internal queue structure */
1507         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1508         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1509                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1510
1511         /* no errors */
1512         return 0;
1513 }
1514
1515 static inline uint16_t
1516 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1517                          uint16_t nb_pkts)
1518 {
1519         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1520         int i;
1521
1522         /* how many packets are ready to return? */
1523         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1524
1525         /* copy mbuf pointers to the application's packet list */
1526         for (i = 0; i < nb_pkts; ++i)
1527                 rx_pkts[i] = stage[i];
1528
1529         /* update internal queue state */
1530         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1531         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1532
1533         return nb_pkts;
1534 }
1535
1536 static inline uint16_t
1537 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1538              uint16_t nb_pkts)
1539 {
1540         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1541         uint16_t nb_rx = 0;
1542
1543         /* Any previously recv'd pkts will be returned from the Rx stage */
1544         if (rxq->rx_nb_avail)
1545                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1546
1547         /* Scan the H/W ring for packets to receive */
1548         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1549
1550         /* update internal queue state */
1551         rxq->rx_next_avail = 0;
1552         rxq->rx_nb_avail = nb_rx;
1553         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1554
1555         /* if required, allocate new buffers to replenish descriptors */
1556         if (rxq->rx_tail > rxq->rx_free_trigger) {
1557                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1558
1559                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1560                         int i, j;
1561
1562                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1563                                    "queue_id=%u", (unsigned) rxq->port_id,
1564                                    (unsigned) rxq->queue_id);
1565
1566                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1567                                 rxq->rx_free_thresh;
1568
1569                         /*
1570                          * Need to rewind any previous receives if we cannot
1571                          * allocate new buffers to replenish the old ones.
1572                          */
1573                         rxq->rx_nb_avail = 0;
1574                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1575                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1576                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1577
1578                         return 0;
1579                 }
1580
1581                 /* update tail pointer */
1582                 rte_wmb();
1583                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1584         }
1585
1586         if (rxq->rx_tail >= rxq->nb_rx_desc)
1587                 rxq->rx_tail = 0;
1588
1589         /* received any packets this loop? */
1590         if (rxq->rx_nb_avail)
1591                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1592
1593         return 0;
1594 }
1595
1596 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1597 uint16_t
1598 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1599                            uint16_t nb_pkts)
1600 {
1601         uint16_t nb_rx;
1602
1603         if (unlikely(nb_pkts == 0))
1604                 return 0;
1605
1606         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1607                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1608
1609         /* request is relatively large, chunk it up */
1610         nb_rx = 0;
1611         while (nb_pkts) {
1612                 uint16_t ret, n;
1613
1614                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1615                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1616                 nb_rx = (uint16_t)(nb_rx + ret);
1617                 nb_pkts = (uint16_t)(nb_pkts - ret);
1618                 if (ret < n)
1619                         break;
1620         }
1621
1622         return nb_rx;
1623 }
1624
1625 uint16_t
1626 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1627                 uint16_t nb_pkts)
1628 {
1629         struct ixgbe_rx_queue *rxq;
1630         volatile union ixgbe_adv_rx_desc *rx_ring;
1631         volatile union ixgbe_adv_rx_desc *rxdp;
1632         struct ixgbe_rx_entry *sw_ring;
1633         struct ixgbe_rx_entry *rxe;
1634         struct rte_mbuf *rxm;
1635         struct rte_mbuf *nmb;
1636         union ixgbe_adv_rx_desc rxd;
1637         uint64_t dma_addr;
1638         uint32_t staterr;
1639         uint32_t pkt_info;
1640         uint16_t pkt_len;
1641         uint16_t rx_id;
1642         uint16_t nb_rx;
1643         uint16_t nb_hold;
1644         uint64_t pkt_flags;
1645         uint64_t vlan_flags;
1646
1647         nb_rx = 0;
1648         nb_hold = 0;
1649         rxq = rx_queue;
1650         rx_id = rxq->rx_tail;
1651         rx_ring = rxq->rx_ring;
1652         sw_ring = rxq->sw_ring;
1653         vlan_flags = rxq->vlan_flags;
1654         while (nb_rx < nb_pkts) {
1655                 /*
1656                  * The order of operations here is important as the DD status
1657                  * bit must not be read after any other descriptor fields.
1658                  * rx_ring and rxdp are pointing to volatile data so the order
1659                  * of accesses cannot be reordered by the compiler. If they were
1660                  * not volatile, they could be reordered which could lead to
1661                  * using invalid descriptor fields when read from rxd.
1662                  */
1663                 rxdp = &rx_ring[rx_id];
1664                 staterr = rxdp->wb.upper.status_error;
1665                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1666                         break;
1667                 rxd = *rxdp;
1668
1669                 /*
1670                  * End of packet.
1671                  *
1672                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1673                  * is likely to be invalid and to be dropped by the various
1674                  * validation checks performed by the network stack.
1675                  *
1676                  * Allocate a new mbuf to replenish the RX ring descriptor.
1677                  * If the allocation fails:
1678                  *    - arrange for that RX descriptor to be the first one
1679                  *      being parsed the next time the receive function is
1680                  *      invoked [on the same queue].
1681                  *
1682                  *    - Stop parsing the RX ring and return immediately.
1683                  *
1684                  * This policy do not drop the packet received in the RX
1685                  * descriptor for which the allocation of a new mbuf failed.
1686                  * Thus, it allows that packet to be later retrieved if
1687                  * mbuf have been freed in the mean time.
1688                  * As a side effect, holding RX descriptors instead of
1689                  * systematically giving them back to the NIC may lead to
1690                  * RX ring exhaustion situations.
1691                  * However, the NIC can gracefully prevent such situations
1692                  * to happen by sending specific "back-pressure" flow control
1693                  * frames to its peer(s).
1694                  */
1695                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1696                            "ext_err_stat=0x%08x pkt_len=%u",
1697                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1698                            (unsigned) rx_id, (unsigned) staterr,
1699                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1700
1701                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1702                 if (nmb == NULL) {
1703                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1704                                    "queue_id=%u", (unsigned) rxq->port_id,
1705                                    (unsigned) rxq->queue_id);
1706                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1707                         break;
1708                 }
1709
1710                 nb_hold++;
1711                 rxe = &sw_ring[rx_id];
1712                 rx_id++;
1713                 if (rx_id == rxq->nb_rx_desc)
1714                         rx_id = 0;
1715
1716                 /* Prefetch next mbuf while processing current one. */
1717                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1718
1719                 /*
1720                  * When next RX descriptor is on a cache-line boundary,
1721                  * prefetch the next 4 RX descriptors and the next 8 pointers
1722                  * to mbufs.
1723                  */
1724                 if ((rx_id & 0x3) == 0) {
1725                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1726                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1727                 }
1728
1729                 rxm = rxe->mbuf;
1730                 rxe->mbuf = nmb;
1731                 dma_addr =
1732                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1733                 rxdp->read.hdr_addr = 0;
1734                 rxdp->read.pkt_addr = dma_addr;
1735
1736                 /*
1737                  * Initialize the returned mbuf.
1738                  * 1) setup generic mbuf fields:
1739                  *    - number of segments,
1740                  *    - next segment,
1741                  *    - packet length,
1742                  *    - RX port identifier.
1743                  * 2) integrate hardware offload data, if any:
1744                  *    - RSS flag & hash,
1745                  *    - IP checksum flag,
1746                  *    - VLAN TCI, if any,
1747                  *    - error flags.
1748                  */
1749                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1750                                       rxq->crc_len);
1751                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1752                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1753                 rxm->nb_segs = 1;
1754                 rxm->next = NULL;
1755                 rxm->pkt_len = pkt_len;
1756                 rxm->data_len = pkt_len;
1757                 rxm->port = rxq->port_id;
1758
1759                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1760                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1761                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1762
1763                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1764                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1765                 pkt_flags = pkt_flags |
1766                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1767                 rxm->ol_flags = pkt_flags;
1768                 rxm->packet_type =
1769                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1770                                                        rxq->pkt_type_mask);
1771
1772                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1773                         rxm->hash.rss = rte_le_to_cpu_32(
1774                                                 rxd.wb.lower.hi_dword.rss);
1775                 else if (pkt_flags & PKT_RX_FDIR) {
1776                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1777                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1778                                         IXGBE_ATR_HASH_MASK;
1779                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1780                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1781                 }
1782                 /*
1783                  * Store the mbuf address into the next entry of the array
1784                  * of returned packets.
1785                  */
1786                 rx_pkts[nb_rx++] = rxm;
1787         }
1788         rxq->rx_tail = rx_id;
1789
1790         /*
1791          * If the number of free RX descriptors is greater than the RX free
1792          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1793          * register.
1794          * Update the RDT with the value of the last processed RX descriptor
1795          * minus 1, to guarantee that the RDT register is never equal to the
1796          * RDH register, which creates a "full" ring situtation from the
1797          * hardware point of view...
1798          */
1799         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1800         if (nb_hold > rxq->rx_free_thresh) {
1801                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1802                            "nb_hold=%u nb_rx=%u",
1803                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1804                            (unsigned) rx_id, (unsigned) nb_hold,
1805                            (unsigned) nb_rx);
1806                 rx_id = (uint16_t) ((rx_id == 0) ?
1807                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1808                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1809                 nb_hold = 0;
1810         }
1811         rxq->nb_rx_hold = nb_hold;
1812         return nb_rx;
1813 }
1814
1815 /**
1816  * Detect an RSC descriptor.
1817  */
1818 static inline uint32_t
1819 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1820 {
1821         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1822                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1823 }
1824
1825 /**
1826  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1827  *
1828  * Fill the following info in the HEAD buffer of the Rx cluster:
1829  *    - RX port identifier
1830  *    - hardware offload data, if any:
1831  *      - RSS flag & hash
1832  *      - IP checksum flag
1833  *      - VLAN TCI, if any
1834  *      - error flags
1835  * @head HEAD of the packet cluster
1836  * @desc HW descriptor to get data from
1837  * @rxq Pointer to the Rx queue
1838  */
1839 static inline void
1840 ixgbe_fill_cluster_head_buf(
1841         struct rte_mbuf *head,
1842         union ixgbe_adv_rx_desc *desc,
1843         struct ixgbe_rx_queue *rxq,
1844         uint32_t staterr)
1845 {
1846         uint32_t pkt_info;
1847         uint64_t pkt_flags;
1848
1849         head->port = rxq->port_id;
1850
1851         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1852          * set in the pkt_flags field.
1853          */
1854         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1855         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1856         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1857         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1858         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1859         head->ol_flags = pkt_flags;
1860         head->packet_type =
1861                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1862
1863         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1864                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1865         else if (pkt_flags & PKT_RX_FDIR) {
1866                 head->hash.fdir.hash =
1867                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1868                                                           & IXGBE_ATR_HASH_MASK;
1869                 head->hash.fdir.id =
1870                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1871         }
1872 }
1873
1874 /**
1875  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1876  *
1877  * @rx_queue Rx queue handle
1878  * @rx_pkts table of received packets
1879  * @nb_pkts size of rx_pkts table
1880  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1881  *
1882  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1883  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1884  *
1885  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1886  * 1) When non-EOP RSC completion arrives:
1887  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1888  *       segment's data length.
1889  *    b) Set the "next" pointer of the current segment to point to the segment
1890  *       at the NEXTP index.
1891  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1892  *       in the sw_rsc_ring.
1893  * 2) When EOP arrives we just update the cluster's total length and offload
1894  *    flags and deliver the cluster up to the upper layers. In our case - put it
1895  *    in the rx_pkts table.
1896  *
1897  * Returns the number of received packets/clusters (according to the "bulk
1898  * receive" interface).
1899  */
1900 static inline uint16_t
1901 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1902                     bool bulk_alloc)
1903 {
1904         struct ixgbe_rx_queue *rxq = rx_queue;
1905         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1906         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1907         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1908         uint16_t rx_id = rxq->rx_tail;
1909         uint16_t nb_rx = 0;
1910         uint16_t nb_hold = rxq->nb_rx_hold;
1911         uint16_t prev_id = rxq->rx_tail;
1912
1913         while (nb_rx < nb_pkts) {
1914                 bool eop;
1915                 struct ixgbe_rx_entry *rxe;
1916                 struct ixgbe_scattered_rx_entry *sc_entry;
1917                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1918                 struct ixgbe_rx_entry *next_rxe = NULL;
1919                 struct rte_mbuf *first_seg;
1920                 struct rte_mbuf *rxm;
1921                 struct rte_mbuf *nmb;
1922                 union ixgbe_adv_rx_desc rxd;
1923                 uint16_t data_len;
1924                 uint16_t next_id;
1925                 volatile union ixgbe_adv_rx_desc *rxdp;
1926                 uint32_t staterr;
1927
1928 next_desc:
1929                 /*
1930                  * The code in this whole file uses the volatile pointer to
1931                  * ensure the read ordering of the status and the rest of the
1932                  * descriptor fields (on the compiler level only!!!). This is so
1933                  * UGLY - why not to just use the compiler barrier instead? DPDK
1934                  * even has the rte_compiler_barrier() for that.
1935                  *
1936                  * But most importantly this is just wrong because this doesn't
1937                  * ensure memory ordering in a general case at all. For
1938                  * instance, DPDK is supposed to work on Power CPUs where
1939                  * compiler barrier may just not be enough!
1940                  *
1941                  * I tried to write only this function properly to have a
1942                  * starting point (as a part of an LRO/RSC series) but the
1943                  * compiler cursed at me when I tried to cast away the
1944                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1945                  * keeping it the way it is for now.
1946                  *
1947                  * The code in this file is broken in so many other places and
1948                  * will just not work on a big endian CPU anyway therefore the
1949                  * lines below will have to be revisited together with the rest
1950                  * of the ixgbe PMD.
1951                  *
1952                  * TODO:
1953                  *    - Get rid of "volatile" crap and let the compiler do its
1954                  *      job.
1955                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1956                  *      memory ordering below.
1957                  */
1958                 rxdp = &rx_ring[rx_id];
1959                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1960
1961                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1962                         break;
1963
1964                 rxd = *rxdp;
1965
1966                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1967                                   "staterr=0x%x data_len=%u",
1968                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1969                            rte_le_to_cpu_16(rxd.wb.upper.length));
1970
1971                 if (!bulk_alloc) {
1972                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1973                         if (nmb == NULL) {
1974                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1975                                                   "port_id=%u queue_id=%u",
1976                                            rxq->port_id, rxq->queue_id);
1977
1978                                 rte_eth_devices[rxq->port_id].data->
1979                                                         rx_mbuf_alloc_failed++;
1980                                 break;
1981                         }
1982                 } else if (nb_hold > rxq->rx_free_thresh) {
1983                         uint16_t next_rdt = rxq->rx_free_trigger;
1984
1985                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1986                                 rte_wmb();
1987                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1988                                                     next_rdt);
1989                                 nb_hold -= rxq->rx_free_thresh;
1990                         } else {
1991                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1992                                                   "port_id=%u queue_id=%u",
1993                                            rxq->port_id, rxq->queue_id);
1994
1995                                 rte_eth_devices[rxq->port_id].data->
1996                                                         rx_mbuf_alloc_failed++;
1997                                 break;
1998                         }
1999                 }
2000
2001                 nb_hold++;
2002                 rxe = &sw_ring[rx_id];
2003                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2004
2005                 next_id = rx_id + 1;
2006                 if (next_id == rxq->nb_rx_desc)
2007                         next_id = 0;
2008
2009                 /* Prefetch next mbuf while processing current one. */
2010                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2011
2012                 /*
2013                  * When next RX descriptor is on a cache-line boundary,
2014                  * prefetch the next 4 RX descriptors and the next 4 pointers
2015                  * to mbufs.
2016                  */
2017                 if ((next_id & 0x3) == 0) {
2018                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2019                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2020                 }
2021
2022                 rxm = rxe->mbuf;
2023
2024                 if (!bulk_alloc) {
2025                         __le64 dma =
2026                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2027                         /*
2028                          * Update RX descriptor with the physical address of the
2029                          * new data buffer of the new allocated mbuf.
2030                          */
2031                         rxe->mbuf = nmb;
2032
2033                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2034                         rxdp->read.hdr_addr = 0;
2035                         rxdp->read.pkt_addr = dma;
2036                 } else
2037                         rxe->mbuf = NULL;
2038
2039                 /*
2040                  * Set data length & data buffer address of mbuf.
2041                  */
2042                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2043                 rxm->data_len = data_len;
2044
2045                 if (!eop) {
2046                         uint16_t nextp_id;
2047                         /*
2048                          * Get next descriptor index:
2049                          *  - For RSC it's in the NEXTP field.
2050                          *  - For a scattered packet - it's just a following
2051                          *    descriptor.
2052                          */
2053                         if (ixgbe_rsc_count(&rxd))
2054                                 nextp_id =
2055                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2056                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2057                         else
2058                                 nextp_id = next_id;
2059
2060                         next_sc_entry = &sw_sc_ring[nextp_id];
2061                         next_rxe = &sw_ring[nextp_id];
2062                         rte_ixgbe_prefetch(next_rxe);
2063                 }
2064
2065                 sc_entry = &sw_sc_ring[rx_id];
2066                 first_seg = sc_entry->fbuf;
2067                 sc_entry->fbuf = NULL;
2068
2069                 /*
2070                  * If this is the first buffer of the received packet,
2071                  * set the pointer to the first mbuf of the packet and
2072                  * initialize its context.
2073                  * Otherwise, update the total length and the number of segments
2074                  * of the current scattered packet, and update the pointer to
2075                  * the last mbuf of the current packet.
2076                  */
2077                 if (first_seg == NULL) {
2078                         first_seg = rxm;
2079                         first_seg->pkt_len = data_len;
2080                         first_seg->nb_segs = 1;
2081                 } else {
2082                         first_seg->pkt_len += data_len;
2083                         first_seg->nb_segs++;
2084                 }
2085
2086                 prev_id = rx_id;
2087                 rx_id = next_id;
2088
2089                 /*
2090                  * If this is not the last buffer of the received packet, update
2091                  * the pointer to the first mbuf at the NEXTP entry in the
2092                  * sw_sc_ring and continue to parse the RX ring.
2093                  */
2094                 if (!eop && next_rxe) {
2095                         rxm->next = next_rxe->mbuf;
2096                         next_sc_entry->fbuf = first_seg;
2097                         goto next_desc;
2098                 }
2099
2100                 /*
2101                  * This is the last buffer of the received packet - return
2102                  * the current cluster to the user.
2103                  */
2104                 rxm->next = NULL;
2105
2106                 /* Initialize the first mbuf of the returned packet */
2107                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2108
2109                 /*
2110                  * Deal with the case, when HW CRC srip is disabled.
2111                  * That can't happen when LRO is enabled, but still could
2112                  * happen for scattered RX mode.
2113                  */
2114                 first_seg->pkt_len -= rxq->crc_len;
2115                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2116                         struct rte_mbuf *lp;
2117
2118                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2119                                 ;
2120
2121                         first_seg->nb_segs--;
2122                         lp->data_len -= rxq->crc_len - rxm->data_len;
2123                         lp->next = NULL;
2124                         rte_pktmbuf_free_seg(rxm);
2125                 } else
2126                         rxm->data_len -= rxq->crc_len;
2127
2128                 /* Prefetch data of first segment, if configured to do so. */
2129                 rte_packet_prefetch((char *)first_seg->buf_addr +
2130                         first_seg->data_off);
2131
2132                 /*
2133                  * Store the mbuf address into the next entry of the array
2134                  * of returned packets.
2135                  */
2136                 rx_pkts[nb_rx++] = first_seg;
2137         }
2138
2139         /*
2140          * Record index of the next RX descriptor to probe.
2141          */
2142         rxq->rx_tail = rx_id;
2143
2144         /*
2145          * If the number of free RX descriptors is greater than the RX free
2146          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2147          * register.
2148          * Update the RDT with the value of the last processed RX descriptor
2149          * minus 1, to guarantee that the RDT register is never equal to the
2150          * RDH register, which creates a "full" ring situtation from the
2151          * hardware point of view...
2152          */
2153         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2154                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2155                            "nb_hold=%u nb_rx=%u",
2156                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2157
2158                 rte_wmb();
2159                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
2160                 nb_hold = 0;
2161         }
2162
2163         rxq->nb_rx_hold = nb_hold;
2164         return nb_rx;
2165 }
2166
2167 uint16_t
2168 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2169                                  uint16_t nb_pkts)
2170 {
2171         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2172 }
2173
2174 uint16_t
2175 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2176                                uint16_t nb_pkts)
2177 {
2178         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2179 }
2180
2181 /*********************************************************************
2182  *
2183  *  Queue management functions
2184  *
2185  **********************************************************************/
2186
2187 static void __attribute__((cold))
2188 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2189 {
2190         unsigned i;
2191
2192         if (txq->sw_ring != NULL) {
2193                 for (i = 0; i < txq->nb_tx_desc; i++) {
2194                         if (txq->sw_ring[i].mbuf != NULL) {
2195                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2196                                 txq->sw_ring[i].mbuf = NULL;
2197                         }
2198                 }
2199         }
2200 }
2201
2202 static void __attribute__((cold))
2203 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2204 {
2205         if (txq != NULL &&
2206             txq->sw_ring != NULL)
2207                 rte_free(txq->sw_ring);
2208 }
2209
2210 static void __attribute__((cold))
2211 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2212 {
2213         if (txq != NULL && txq->ops != NULL) {
2214                 txq->ops->release_mbufs(txq);
2215                 txq->ops->free_swring(txq);
2216                 rte_free(txq);
2217         }
2218 }
2219
2220 void __attribute__((cold))
2221 ixgbe_dev_tx_queue_release(void *txq)
2222 {
2223         ixgbe_tx_queue_release(txq);
2224 }
2225
2226 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2227 static void __attribute__((cold))
2228 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2229 {
2230         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2231         struct ixgbe_tx_entry *txe = txq->sw_ring;
2232         uint16_t prev, i;
2233
2234         /* Zero out HW ring memory */
2235         for (i = 0; i < txq->nb_tx_desc; i++) {
2236                 txq->tx_ring[i] = zeroed_desc;
2237         }
2238
2239         /* Initialize SW ring entries */
2240         prev = (uint16_t) (txq->nb_tx_desc - 1);
2241         for (i = 0; i < txq->nb_tx_desc; i++) {
2242                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2243
2244                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2245                 txe[i].mbuf = NULL;
2246                 txe[i].last_id = i;
2247                 txe[prev].next_id = i;
2248                 prev = i;
2249         }
2250
2251         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2252         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2253
2254         txq->tx_tail = 0;
2255         txq->nb_tx_used = 0;
2256         /*
2257          * Always allow 1 descriptor to be un-allocated to avoid
2258          * a H/W race condition
2259          */
2260         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2261         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2262         txq->ctx_curr = 0;
2263         memset((void *)&txq->ctx_cache, 0,
2264                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2265 }
2266
2267 static const struct ixgbe_txq_ops def_txq_ops = {
2268         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2269         .free_swring = ixgbe_tx_free_swring,
2270         .reset = ixgbe_reset_tx_queue,
2271 };
2272
2273 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2274  * the queue parameters. Used in tx_queue_setup by primary process and then
2275  * in dev_init by secondary process when attaching to an existing ethdev.
2276  */
2277 void __attribute__((cold))
2278 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2279 {
2280         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2281         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2282                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2283                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2284 #ifdef RTE_IXGBE_INC_VECTOR
2285                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2286                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2287                                         ixgbe_txq_vec_setup(txq) == 0)) {
2288                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2289                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2290                 } else
2291 #endif
2292                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2293         } else {
2294                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2295                 PMD_INIT_LOG(DEBUG,
2296                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2297                                 (unsigned long)txq->txq_flags,
2298                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2299                 PMD_INIT_LOG(DEBUG,
2300                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2301                                 (unsigned long)txq->tx_rs_thresh,
2302                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2303                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2304         }
2305 }
2306
2307 int __attribute__((cold))
2308 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2309                          uint16_t queue_idx,
2310                          uint16_t nb_desc,
2311                          unsigned int socket_id,
2312                          const struct rte_eth_txconf *tx_conf)
2313 {
2314         const struct rte_memzone *tz;
2315         struct ixgbe_tx_queue *txq;
2316         struct ixgbe_hw     *hw;
2317         uint16_t tx_rs_thresh, tx_free_thresh;
2318
2319         PMD_INIT_FUNC_TRACE();
2320         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2321
2322         /*
2323          * Validate number of transmit descriptors.
2324          * It must not exceed hardware maximum, and must be multiple
2325          * of IXGBE_ALIGN.
2326          */
2327         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2328                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2329                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2330                 return -EINVAL;
2331         }
2332
2333         /*
2334          * The following two parameters control the setting of the RS bit on
2335          * transmit descriptors.
2336          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2337          * descriptors have been used.
2338          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2339          * descriptors are used or if the number of descriptors required
2340          * to transmit a packet is greater than the number of free TX
2341          * descriptors.
2342          * The following constraints must be satisfied:
2343          *  tx_rs_thresh must be greater than 0.
2344          *  tx_rs_thresh must be less than the size of the ring minus 2.
2345          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2346          *  tx_rs_thresh must be a divisor of the ring size.
2347          *  tx_free_thresh must be greater than 0.
2348          *  tx_free_thresh must be less than the size of the ring minus 3.
2349          * One descriptor in the TX ring is used as a sentinel to avoid a
2350          * H/W race condition, hence the maximum threshold constraints.
2351          * When set to zero use default values.
2352          */
2353         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2354                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2355         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2356                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2357         if (tx_rs_thresh >= (nb_desc - 2)) {
2358                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2359                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2360                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2361                         (int)dev->data->port_id, (int)queue_idx);
2362                 return -(EINVAL);
2363         }
2364         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2365                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2366                         "(tx_rs_thresh=%u port=%d queue=%d)",
2367                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2368                         (int)dev->data->port_id, (int)queue_idx);
2369                 return -(EINVAL);
2370         }
2371         if (tx_free_thresh >= (nb_desc - 3)) {
2372                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2373                              "tx_free_thresh must be less than the number of "
2374                              "TX descriptors minus 3. (tx_free_thresh=%u "
2375                              "port=%d queue=%d)",
2376                              (unsigned int)tx_free_thresh,
2377                              (int)dev->data->port_id, (int)queue_idx);
2378                 return -(EINVAL);
2379         }
2380         if (tx_rs_thresh > tx_free_thresh) {
2381                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2382                              "tx_free_thresh. (tx_free_thresh=%u "
2383                              "tx_rs_thresh=%u port=%d queue=%d)",
2384                              (unsigned int)tx_free_thresh,
2385                              (unsigned int)tx_rs_thresh,
2386                              (int)dev->data->port_id,
2387                              (int)queue_idx);
2388                 return -(EINVAL);
2389         }
2390         if ((nb_desc % tx_rs_thresh) != 0) {
2391                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2392                              "number of TX descriptors. (tx_rs_thresh=%u "
2393                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2394                              (int)dev->data->port_id, (int)queue_idx);
2395                 return -(EINVAL);
2396         }
2397
2398         /*
2399          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2400          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2401          * by the NIC and all descriptors are written back after the NIC
2402          * accumulates WTHRESH descriptors.
2403          */
2404         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2405                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2406                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2407                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2408                              (int)dev->data->port_id, (int)queue_idx);
2409                 return -(EINVAL);
2410         }
2411
2412         /* Free memory prior to re-allocation if needed... */
2413         if (dev->data->tx_queues[queue_idx] != NULL) {
2414                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2415                 dev->data->tx_queues[queue_idx] = NULL;
2416         }
2417
2418         /* First allocate the tx queue data structure */
2419         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2420                                  RTE_CACHE_LINE_SIZE, socket_id);
2421         if (txq == NULL)
2422                 return -ENOMEM;
2423
2424         /*
2425          * Allocate TX ring hardware descriptors. A memzone large enough to
2426          * handle the maximum ring size is allocated in order to allow for
2427          * resizing in later calls to the queue setup function.
2428          */
2429         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2430                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2431                         IXGBE_ALIGN, socket_id);
2432         if (tz == NULL) {
2433                 ixgbe_tx_queue_release(txq);
2434                 return -ENOMEM;
2435         }
2436
2437         txq->nb_tx_desc = nb_desc;
2438         txq->tx_rs_thresh = tx_rs_thresh;
2439         txq->tx_free_thresh = tx_free_thresh;
2440         txq->pthresh = tx_conf->tx_thresh.pthresh;
2441         txq->hthresh = tx_conf->tx_thresh.hthresh;
2442         txq->wthresh = tx_conf->tx_thresh.wthresh;
2443         txq->queue_id = queue_idx;
2444         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2445                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2446         txq->port_id = dev->data->port_id;
2447         txq->txq_flags = tx_conf->txq_flags;
2448         txq->ops = &def_txq_ops;
2449         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2450
2451         /*
2452          * Modification to set VFTDT for virtual function if vf is detected
2453          */
2454         if (hw->mac.type == ixgbe_mac_82599_vf ||
2455             hw->mac.type == ixgbe_mac_X540_vf ||
2456             hw->mac.type == ixgbe_mac_X550_vf ||
2457             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2458             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2459                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2460         else
2461                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2462
2463         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2464         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2465
2466         /* Allocate software ring */
2467         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2468                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2469                                 RTE_CACHE_LINE_SIZE, socket_id);
2470         if (txq->sw_ring == NULL) {
2471                 ixgbe_tx_queue_release(txq);
2472                 return -ENOMEM;
2473         }
2474         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2475                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2476
2477         /* set up vector or scalar TX function as appropriate */
2478         ixgbe_set_tx_function(dev, txq);
2479
2480         txq->ops->reset(txq);
2481
2482         dev->data->tx_queues[queue_idx] = txq;
2483
2484
2485         return 0;
2486 }
2487
2488 /**
2489  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2490  *
2491  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2492  * in the sw_rsc_ring is not set to NULL but rather points to the next
2493  * mbuf of this RSC aggregation (that has not been completed yet and still
2494  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2495  * will just free first "nb_segs" segments of the cluster explicitly by calling
2496  * an rte_pktmbuf_free_seg().
2497  *
2498  * @m scattered cluster head
2499  */
2500 static void __attribute__((cold))
2501 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2502 {
2503         uint8_t i, nb_segs = m->nb_segs;
2504         struct rte_mbuf *next_seg;
2505
2506         for (i = 0; i < nb_segs; i++) {
2507                 next_seg = m->next;
2508                 rte_pktmbuf_free_seg(m);
2509                 m = next_seg;
2510         }
2511 }
2512
2513 static void __attribute__((cold))
2514 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2515 {
2516         unsigned i;
2517
2518 #ifdef RTE_IXGBE_INC_VECTOR
2519         /* SSE Vector driver has a different way of releasing mbufs. */
2520         if (rxq->rx_using_sse) {
2521                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2522                 return;
2523         }
2524 #endif
2525
2526         if (rxq->sw_ring != NULL) {
2527                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2528                         if (rxq->sw_ring[i].mbuf != NULL) {
2529                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2530                                 rxq->sw_ring[i].mbuf = NULL;
2531                         }
2532                 }
2533                 if (rxq->rx_nb_avail) {
2534                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2535                                 struct rte_mbuf *mb;
2536
2537                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2538                                 rte_pktmbuf_free_seg(mb);
2539                         }
2540                         rxq->rx_nb_avail = 0;
2541                 }
2542         }
2543
2544         if (rxq->sw_sc_ring)
2545                 for (i = 0; i < rxq->nb_rx_desc; i++)
2546                         if (rxq->sw_sc_ring[i].fbuf) {
2547                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2548                                 rxq->sw_sc_ring[i].fbuf = NULL;
2549                         }
2550 }
2551
2552 static void __attribute__((cold))
2553 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2554 {
2555         if (rxq != NULL) {
2556                 ixgbe_rx_queue_release_mbufs(rxq);
2557                 rte_free(rxq->sw_ring);
2558                 rte_free(rxq->sw_sc_ring);
2559                 rte_free(rxq);
2560         }
2561 }
2562
2563 void __attribute__((cold))
2564 ixgbe_dev_rx_queue_release(void *rxq)
2565 {
2566         ixgbe_rx_queue_release(rxq);
2567 }
2568
2569 /*
2570  * Check if Rx Burst Bulk Alloc function can be used.
2571  * Return
2572  *        0: the preconditions are satisfied and the bulk allocation function
2573  *           can be used.
2574  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2575  *           function must be used.
2576  */
2577 static inline int __attribute__((cold))
2578 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2579 {
2580         int ret = 0;
2581
2582         /*
2583          * Make sure the following pre-conditions are satisfied:
2584          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2585          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2586          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2587          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2588          * Scattered packets are not supported.  This should be checked
2589          * outside of this function.
2590          */
2591         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2592                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2593                              "rxq->rx_free_thresh=%d, "
2594                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2595                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2596                 ret = -EINVAL;
2597         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2598                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2599                              "rxq->rx_free_thresh=%d, "
2600                              "rxq->nb_rx_desc=%d",
2601                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2602                 ret = -EINVAL;
2603         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2604                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2605                              "rxq->nb_rx_desc=%d, "
2606                              "rxq->rx_free_thresh=%d",
2607                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2608                 ret = -EINVAL;
2609         } else if (!(rxq->nb_rx_desc <
2610                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2611                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2612                              "rxq->nb_rx_desc=%d, "
2613                              "IXGBE_MAX_RING_DESC=%d, "
2614                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2615                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2616                              RTE_PMD_IXGBE_RX_MAX_BURST);
2617                 ret = -EINVAL;
2618         }
2619
2620         return ret;
2621 }
2622
2623 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2624 static void __attribute__((cold))
2625 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2626 {
2627         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2628         unsigned i;
2629         uint16_t len = rxq->nb_rx_desc;
2630
2631         /*
2632          * By default, the Rx queue setup function allocates enough memory for
2633          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2634          * extra memory at the end of the descriptor ring to be zero'd out. A
2635          * pre-condition for using the Rx burst bulk alloc function is that the
2636          * number of descriptors is less than or equal to
2637          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2638          * constraints here to see if we need to zero out memory after the end
2639          * of the H/W descriptor ring.
2640          */
2641         if (adapter->rx_bulk_alloc_allowed)
2642                 /* zero out extra memory */
2643                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2644
2645         /*
2646          * Zero out HW ring memory. Zero out extra memory at the end of
2647          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2648          * reads extra memory as zeros.
2649          */
2650         for (i = 0; i < len; i++) {
2651                 rxq->rx_ring[i] = zeroed_desc;
2652         }
2653
2654         /*
2655          * initialize extra software ring entries. Space for these extra
2656          * entries is always allocated
2657          */
2658         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2659         for (i = rxq->nb_rx_desc; i < len; ++i) {
2660                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2661         }
2662
2663         rxq->rx_nb_avail = 0;
2664         rxq->rx_next_avail = 0;
2665         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2666         rxq->rx_tail = 0;
2667         rxq->nb_rx_hold = 0;
2668         rxq->pkt_first_seg = NULL;
2669         rxq->pkt_last_seg = NULL;
2670
2671 #ifdef RTE_IXGBE_INC_VECTOR
2672         rxq->rxrearm_start = 0;
2673         rxq->rxrearm_nb = 0;
2674 #endif
2675 }
2676
2677 int __attribute__((cold))
2678 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2679                          uint16_t queue_idx,
2680                          uint16_t nb_desc,
2681                          unsigned int socket_id,
2682                          const struct rte_eth_rxconf *rx_conf,
2683                          struct rte_mempool *mp)
2684 {
2685         const struct rte_memzone *rz;
2686         struct ixgbe_rx_queue *rxq;
2687         struct ixgbe_hw     *hw;
2688         uint16_t len;
2689         struct ixgbe_adapter *adapter =
2690                 (struct ixgbe_adapter *)dev->data->dev_private;
2691
2692         PMD_INIT_FUNC_TRACE();
2693         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2694
2695         /*
2696          * Validate number of receive descriptors.
2697          * It must not exceed hardware maximum, and must be multiple
2698          * of IXGBE_ALIGN.
2699          */
2700         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2701                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2702                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2703                 return -EINVAL;
2704         }
2705
2706         /* Free memory prior to re-allocation if needed... */
2707         if (dev->data->rx_queues[queue_idx] != NULL) {
2708                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2709                 dev->data->rx_queues[queue_idx] = NULL;
2710         }
2711
2712         /* First allocate the rx queue data structure */
2713         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2714                                  RTE_CACHE_LINE_SIZE, socket_id);
2715         if (rxq == NULL)
2716                 return -ENOMEM;
2717         rxq->mb_pool = mp;
2718         rxq->nb_rx_desc = nb_desc;
2719         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2720         rxq->queue_id = queue_idx;
2721         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2722                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2723         rxq->port_id = dev->data->port_id;
2724         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2725                                                         0 : ETHER_CRC_LEN);
2726         rxq->drop_en = rx_conf->rx_drop_en;
2727         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2728
2729         /*
2730          * The packet type in RX descriptor is different for different NICs.
2731          * Some bits are used for x550 but reserved for other NICS.
2732          * So set different masks for different NICs.
2733          */
2734         if (hw->mac.type == ixgbe_mac_X550 ||
2735             hw->mac.type == ixgbe_mac_X550EM_x ||
2736             hw->mac.type == ixgbe_mac_X550EM_a ||
2737             hw->mac.type == ixgbe_mac_X550_vf ||
2738             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2739             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2740                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2741         else
2742                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2743
2744         /*
2745          * Allocate RX ring hardware descriptors. A memzone large enough to
2746          * handle the maximum ring size is allocated in order to allow for
2747          * resizing in later calls to the queue setup function.
2748          */
2749         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2750                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2751         if (rz == NULL) {
2752                 ixgbe_rx_queue_release(rxq);
2753                 return -ENOMEM;
2754         }
2755
2756         /*
2757          * Zero init all the descriptors in the ring.
2758          */
2759         memset(rz->addr, 0, RX_RING_SZ);
2760
2761         /*
2762          * Modified to setup VFRDT for Virtual Function
2763          */
2764         if (hw->mac.type == ixgbe_mac_82599_vf ||
2765             hw->mac.type == ixgbe_mac_X540_vf ||
2766             hw->mac.type == ixgbe_mac_X550_vf ||
2767             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2768             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2769                 rxq->rdt_reg_addr =
2770                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2771                 rxq->rdh_reg_addr =
2772                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2773         } else {
2774                 rxq->rdt_reg_addr =
2775                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2776                 rxq->rdh_reg_addr =
2777                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2778         }
2779
2780         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2781         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2782
2783         /*
2784          * Certain constraints must be met in order to use the bulk buffer
2785          * allocation Rx burst function. If any of Rx queues doesn't meet them
2786          * the feature should be disabled for the whole port.
2787          */
2788         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2789                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2790                                     "preconditions - canceling the feature for "
2791                                     "the whole port[%d]",
2792                              rxq->queue_id, rxq->port_id);
2793                 adapter->rx_bulk_alloc_allowed = false;
2794         }
2795
2796         /*
2797          * Allocate software ring. Allow for space at the end of the
2798          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2799          * function does not access an invalid memory region.
2800          */
2801         len = nb_desc;
2802         if (adapter->rx_bulk_alloc_allowed)
2803                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2804
2805         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2806                                           sizeof(struct ixgbe_rx_entry) * len,
2807                                           RTE_CACHE_LINE_SIZE, socket_id);
2808         if (!rxq->sw_ring) {
2809                 ixgbe_rx_queue_release(rxq);
2810                 return -ENOMEM;
2811         }
2812
2813         /*
2814          * Always allocate even if it's not going to be needed in order to
2815          * simplify the code.
2816          *
2817          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2818          * be requested in ixgbe_dev_rx_init(), which is called later from
2819          * dev_start() flow.
2820          */
2821         rxq->sw_sc_ring =
2822                 rte_zmalloc_socket("rxq->sw_sc_ring",
2823                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2824                                    RTE_CACHE_LINE_SIZE, socket_id);
2825         if (!rxq->sw_sc_ring) {
2826                 ixgbe_rx_queue_release(rxq);
2827                 return -ENOMEM;
2828         }
2829
2830         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2831                             "dma_addr=0x%"PRIx64,
2832                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2833                      rxq->rx_ring_phys_addr);
2834
2835         if (!rte_is_power_of_2(nb_desc)) {
2836                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2837                                     "preconditions - canceling the feature for "
2838                                     "the whole port[%d]",
2839                              rxq->queue_id, rxq->port_id);
2840                 adapter->rx_vec_allowed = false;
2841         } else
2842                 ixgbe_rxq_vec_setup(rxq);
2843
2844         dev->data->rx_queues[queue_idx] = rxq;
2845
2846         ixgbe_reset_rx_queue(adapter, rxq);
2847
2848         return 0;
2849 }
2850
2851 uint32_t
2852 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2853 {
2854 #define IXGBE_RXQ_SCAN_INTERVAL 4
2855         volatile union ixgbe_adv_rx_desc *rxdp;
2856         struct ixgbe_rx_queue *rxq;
2857         uint32_t desc = 0;
2858
2859         if (rx_queue_id >= dev->data->nb_rx_queues) {
2860                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2861                 return 0;
2862         }
2863
2864         rxq = dev->data->rx_queues[rx_queue_id];
2865         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2866
2867         while ((desc < rxq->nb_rx_desc) &&
2868                 (rxdp->wb.upper.status_error &
2869                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2870                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2871                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2872                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2873                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2874                                 desc - rxq->nb_rx_desc]);
2875         }
2876
2877         return desc;
2878 }
2879
2880 int
2881 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2882 {
2883         volatile union ixgbe_adv_rx_desc *rxdp;
2884         struct ixgbe_rx_queue *rxq = rx_queue;
2885         uint32_t desc;
2886
2887         if (unlikely(offset >= rxq->nb_rx_desc))
2888                 return 0;
2889         desc = rxq->rx_tail + offset;
2890         if (desc >= rxq->nb_rx_desc)
2891                 desc -= rxq->nb_rx_desc;
2892
2893         rxdp = &rxq->rx_ring[desc];
2894         return !!(rxdp->wb.upper.status_error &
2895                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2896 }
2897
2898 void __attribute__((cold))
2899 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2900 {
2901         unsigned i;
2902         struct ixgbe_adapter *adapter =
2903                 (struct ixgbe_adapter *)dev->data->dev_private;
2904
2905         PMD_INIT_FUNC_TRACE();
2906
2907         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2908                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2909
2910                 if (txq != NULL) {
2911                         txq->ops->release_mbufs(txq);
2912                         txq->ops->reset(txq);
2913                 }
2914         }
2915
2916         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2917                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2918
2919                 if (rxq != NULL) {
2920                         ixgbe_rx_queue_release_mbufs(rxq);
2921                         ixgbe_reset_rx_queue(adapter, rxq);
2922                 }
2923         }
2924 }
2925
2926 void
2927 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2928 {
2929         unsigned i;
2930
2931         PMD_INIT_FUNC_TRACE();
2932
2933         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2934                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2935                 dev->data->rx_queues[i] = NULL;
2936         }
2937         dev->data->nb_rx_queues = 0;
2938
2939         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2940                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2941                 dev->data->tx_queues[i] = NULL;
2942         }
2943         dev->data->nb_tx_queues = 0;
2944 }
2945
2946 /*********************************************************************
2947  *
2948  *  Device RX/TX init functions
2949  *
2950  **********************************************************************/
2951
2952 /**
2953  * Receive Side Scaling (RSS)
2954  * See section 7.1.2.8 in the following document:
2955  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2956  *
2957  * Principles:
2958  * The source and destination IP addresses of the IP header and the source
2959  * and destination ports of TCP/UDP headers, if any, of received packets are
2960  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2961  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2962  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2963  * RSS output index which is used as the RX queue index where to store the
2964  * received packets.
2965  * The following output is supplied in the RX write-back descriptor:
2966  *     - 32-bit result of the Microsoft RSS hash function,
2967  *     - 4-bit RSS type field.
2968  */
2969
2970 /*
2971  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2972  * Used as the default key.
2973  */
2974 static uint8_t rss_intel_key[40] = {
2975         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2976         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2977         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2978         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2979         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2980 };
2981
2982 static void
2983 ixgbe_rss_disable(struct rte_eth_dev *dev)
2984 {
2985         struct ixgbe_hw *hw;
2986         uint32_t mrqc;
2987         uint32_t mrqc_reg;
2988
2989         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2990         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2991         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2992         mrqc &= ~IXGBE_MRQC_RSSEN;
2993         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2994 }
2995
2996 static void
2997 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2998 {
2999         uint8_t  *hash_key;
3000         uint32_t mrqc;
3001         uint32_t rss_key;
3002         uint64_t rss_hf;
3003         uint16_t i;
3004         uint32_t mrqc_reg;
3005         uint32_t rssrk_reg;
3006
3007         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3008         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3009
3010         hash_key = rss_conf->rss_key;
3011         if (hash_key != NULL) {
3012                 /* Fill in RSS hash key */
3013                 for (i = 0; i < 10; i++) {
3014                         rss_key  = hash_key[(i * 4)];
3015                         rss_key |= hash_key[(i * 4) + 1] << 8;
3016                         rss_key |= hash_key[(i * 4) + 2] << 16;
3017                         rss_key |= hash_key[(i * 4) + 3] << 24;
3018                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3019                 }
3020         }
3021
3022         /* Set configured hashing protocols in MRQC register */
3023         rss_hf = rss_conf->rss_hf;
3024         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3025         if (rss_hf & ETH_RSS_IPV4)
3026                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3027         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3028                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3029         if (rss_hf & ETH_RSS_IPV6)
3030                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3031         if (rss_hf & ETH_RSS_IPV6_EX)
3032                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3033         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3034                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3035         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3036                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3037         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3038                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3039         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3040                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3041         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3042                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3043         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3044 }
3045
3046 int
3047 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3048                           struct rte_eth_rss_conf *rss_conf)
3049 {
3050         struct ixgbe_hw *hw;
3051         uint32_t mrqc;
3052         uint64_t rss_hf;
3053         uint32_t mrqc_reg;
3054
3055         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3056
3057         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3058                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3059                         "NIC.");
3060                 return -ENOTSUP;
3061         }
3062         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3063
3064         /*
3065          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3066          *     "RSS enabling cannot be done dynamically while it must be
3067          *      preceded by a software reset"
3068          * Before changing anything, first check that the update RSS operation
3069          * does not attempt to disable RSS, if RSS was enabled at
3070          * initialization time, or does not attempt to enable RSS, if RSS was
3071          * disabled at initialization time.
3072          */
3073         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3074         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3075         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3076                 if (rss_hf != 0) /* Enable RSS */
3077                         return -(EINVAL);
3078                 return 0; /* Nothing to do */
3079         }
3080         /* RSS enabled */
3081         if (rss_hf == 0) /* Disable RSS */
3082                 return -(EINVAL);
3083         ixgbe_hw_rss_hash_set(hw, rss_conf);
3084         return 0;
3085 }
3086
3087 int
3088 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3089                             struct rte_eth_rss_conf *rss_conf)
3090 {
3091         struct ixgbe_hw *hw;
3092         uint8_t *hash_key;
3093         uint32_t mrqc;
3094         uint32_t rss_key;
3095         uint64_t rss_hf;
3096         uint16_t i;
3097         uint32_t mrqc_reg;
3098         uint32_t rssrk_reg;
3099
3100         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3101         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3102         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3103         hash_key = rss_conf->rss_key;
3104         if (hash_key != NULL) {
3105                 /* Return RSS hash key */
3106                 for (i = 0; i < 10; i++) {
3107                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3108                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3109                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3110                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3111                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3112                 }
3113         }
3114
3115         /* Get RSS functions configured in MRQC register */
3116         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3117         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3118                 rss_conf->rss_hf = 0;
3119                 return 0;
3120         }
3121         rss_hf = 0;
3122         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3123                 rss_hf |= ETH_RSS_IPV4;
3124         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3125                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3126         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3127                 rss_hf |= ETH_RSS_IPV6;
3128         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3129                 rss_hf |= ETH_RSS_IPV6_EX;
3130         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3131                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3132         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3133                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3134         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3135                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3136         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3137                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3138         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3139                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3140         rss_conf->rss_hf = rss_hf;
3141         return 0;
3142 }
3143
3144 static void
3145 ixgbe_rss_configure(struct rte_eth_dev *dev)
3146 {
3147         struct rte_eth_rss_conf rss_conf;
3148         struct ixgbe_hw *hw;
3149         uint32_t reta;
3150         uint16_t i;
3151         uint16_t j;
3152         uint16_t sp_reta_size;
3153         uint32_t reta_reg;
3154
3155         PMD_INIT_FUNC_TRACE();
3156         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3157
3158         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3159
3160         /*
3161          * Fill in redirection table
3162          * The byte-swap is needed because NIC registers are in
3163          * little-endian order.
3164          */
3165         reta = 0;
3166         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3167                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3168
3169                 if (j == dev->data->nb_rx_queues)
3170                         j = 0;
3171                 reta = (reta << 8) | j;
3172                 if ((i & 3) == 3)
3173                         IXGBE_WRITE_REG(hw, reta_reg,
3174                                         rte_bswap32(reta));
3175         }
3176
3177         /*
3178          * Configure the RSS key and the RSS protocols used to compute
3179          * the RSS hash of input packets.
3180          */
3181         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3182         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3183                 ixgbe_rss_disable(dev);
3184                 return;
3185         }
3186         if (rss_conf.rss_key == NULL)
3187                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3188         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3189 }
3190
3191 #define NUM_VFTA_REGISTERS 128
3192 #define NIC_RX_BUFFER_SIZE 0x200
3193 #define X550_RX_BUFFER_SIZE 0x180
3194
3195 static void
3196 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3197 {
3198         struct rte_eth_vmdq_dcb_conf *cfg;
3199         struct ixgbe_hw *hw;
3200         enum rte_eth_nb_pools num_pools;
3201         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3202         uint16_t pbsize;
3203         uint8_t nb_tcs; /* number of traffic classes */
3204         int i;
3205
3206         PMD_INIT_FUNC_TRACE();
3207         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3208         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3209         num_pools = cfg->nb_queue_pools;
3210         /* Check we have a valid number of pools */
3211         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3212                 ixgbe_rss_disable(dev);
3213                 return;
3214         }
3215         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3216         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3217
3218         /*
3219          * RXPBSIZE
3220          * split rx buffer up into sections, each for 1 traffic class
3221          */
3222         switch (hw->mac.type) {
3223         case ixgbe_mac_X550:
3224         case ixgbe_mac_X550EM_x:
3225         case ixgbe_mac_X550EM_a:
3226                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3227                 break;
3228         default:
3229                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3230                 break;
3231         }
3232         for (i = 0; i < nb_tcs; i++) {
3233                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3234
3235                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3236                 /* clear 10 bits. */
3237                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3238                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3239         }
3240         /* zero alloc all unused TCs */
3241         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3242                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3243
3244                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3245                 /* clear 10 bits. */
3246                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3247         }
3248
3249         /* MRQC: enable vmdq and dcb */
3250         mrqc = (num_pools == ETH_16_POOLS) ?
3251                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3252         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3253
3254         /* PFVTCTL: turn on virtualisation and set the default pool */
3255         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3256         if (cfg->enable_default_pool) {
3257                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3258         } else {
3259                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3260         }
3261
3262         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3263
3264         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3265         queue_mapping = 0;
3266         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3267                 /*
3268                  * mapping is done with 3 bits per priority,
3269                  * so shift by i*3 each time
3270                  */
3271                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3272
3273         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3274
3275         /* RTRPCS: DCB related */
3276         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3277
3278         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3279         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3280         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3281         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3282
3283         /* VFTA - enable all vlan filters */
3284         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3285                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3286         }
3287
3288         /* VFRE: pool enabling for receive - 16 or 32 */
3289         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3290                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3291
3292         /*
3293          * MPSAR - allow pools to read specific mac addresses
3294          * In this case, all pools should be able to read from mac addr 0
3295          */
3296         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3297         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3298
3299         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3300         for (i = 0; i < cfg->nb_pool_maps; i++) {
3301                 /* set vlan id in VF register and set the valid bit */
3302                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3303                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3304                 /*
3305                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3306                  * pools, we only need to use the first half of the register
3307                  * i.e. bits 0-31
3308                  */
3309                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3310         }
3311 }
3312
3313 /**
3314  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3315  * @hw: pointer to hardware structure
3316  * @dcb_config: pointer to ixgbe_dcb_config structure
3317  */
3318 static void
3319 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
3320                        struct ixgbe_dcb_config *dcb_config)
3321 {
3322         uint32_t reg;
3323         uint32_t q;
3324
3325         PMD_INIT_FUNC_TRACE();
3326         if (hw->mac.type != ixgbe_mac_82598EB) {
3327                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3328                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3329                 reg |= IXGBE_RTTDCS_ARBDIS;
3330                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3331
3332                 /* Enable DCB for Tx with 8 TCs */
3333                 if (dcb_config->num_tcs.pg_tcs == 8) {
3334                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3335                 } else {
3336                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3337                 }
3338                 if (dcb_config->vt_mode)
3339                         reg |= IXGBE_MTQC_VT_ENA;
3340                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3341
3342                 /* Disable drop for all queues */
3343                 for (q = 0; q < 128; q++)
3344                         IXGBE_WRITE_REG(hw, IXGBE_QDE,
3345                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3346
3347                 /* Enable the Tx desc arbiter */
3348                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3349                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3350                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3351
3352                 /* Enable Security TX Buffer IFG for DCB */
3353                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3354                 reg |= IXGBE_SECTX_DCB;
3355                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3356         }
3357 }
3358
3359 /**
3360  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3361  * @dev: pointer to rte_eth_dev structure
3362  * @dcb_config: pointer to ixgbe_dcb_config structure
3363  */
3364 static void
3365 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3366                         struct ixgbe_dcb_config *dcb_config)
3367 {
3368         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3369                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3370         struct ixgbe_hw *hw =
3371                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3372
3373         PMD_INIT_FUNC_TRACE();
3374         if (hw->mac.type != ixgbe_mac_82598EB)
3375                 /*PF VF Transmit Enable*/
3376                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3377                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3378
3379         /*Configure general DCB TX parameters*/
3380         ixgbe_dcb_tx_hw_config(hw, dcb_config);
3381 }
3382
3383 static void
3384 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3385                         struct ixgbe_dcb_config *dcb_config)
3386 {
3387         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3388                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3389         struct ixgbe_dcb_tc_config *tc;
3390         uint8_t i, j;
3391
3392         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3393         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3394                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3395                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3396         } else {
3397                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3398                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3399         }
3400         /* User Priority to Traffic Class mapping */
3401         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3402                 j = vmdq_rx_conf->dcb_tc[i];
3403                 tc = &dcb_config->tc_config[j];
3404                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3405                                                 (uint8_t)(1 << j);
3406         }
3407 }
3408
3409 static void
3410 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3411                         struct ixgbe_dcb_config *dcb_config)
3412 {
3413         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3414                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3415         struct ixgbe_dcb_tc_config *tc;
3416         uint8_t i, j;
3417
3418         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3419         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3420                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3421                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3422         } else {
3423                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3424                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3425         }
3426
3427         /* User Priority to Traffic Class mapping */
3428         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3429                 j = vmdq_tx_conf->dcb_tc[i];
3430                 tc = &dcb_config->tc_config[j];
3431                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3432                                                 (uint8_t)(1 << j);
3433         }
3434 }
3435
3436 static void
3437 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3438                 struct ixgbe_dcb_config *dcb_config)
3439 {
3440         struct rte_eth_dcb_rx_conf *rx_conf =
3441                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3442         struct ixgbe_dcb_tc_config *tc;
3443         uint8_t i, j;
3444
3445         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3446         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3447
3448         /* User Priority to Traffic Class mapping */
3449         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3450                 j = rx_conf->dcb_tc[i];
3451                 tc = &dcb_config->tc_config[j];
3452                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3453                                                 (uint8_t)(1 << j);
3454         }
3455 }
3456
3457 static void
3458 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3459                 struct ixgbe_dcb_config *dcb_config)
3460 {
3461         struct rte_eth_dcb_tx_conf *tx_conf =
3462                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3463         struct ixgbe_dcb_tc_config *tc;
3464         uint8_t i, j;
3465
3466         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3467         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3468
3469         /* User Priority to Traffic Class mapping */
3470         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3471                 j = tx_conf->dcb_tc[i];
3472                 tc = &dcb_config->tc_config[j];
3473                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3474                                                 (uint8_t)(1 << j);
3475         }
3476 }
3477
3478 /**
3479  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3480  * @hw: pointer to hardware structure
3481  * @dcb_config: pointer to ixgbe_dcb_config structure
3482  */
3483 static void
3484 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3485                struct ixgbe_dcb_config *dcb_config)
3486 {
3487         uint32_t reg;
3488         uint32_t vlanctrl;
3489         uint8_t i;
3490
3491         PMD_INIT_FUNC_TRACE();
3492         /*
3493          * Disable the arbiter before changing parameters
3494          * (always enable recycle mode; WSP)
3495          */
3496         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3497         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3498
3499         if (hw->mac.type != ixgbe_mac_82598EB) {
3500                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3501                 if (dcb_config->num_tcs.pg_tcs == 4) {
3502                         if (dcb_config->vt_mode)
3503                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3504                                         IXGBE_MRQC_VMDQRT4TCEN;
3505                         else {
3506                                 /* no matter the mode is DCB or DCB_RSS, just
3507                                  * set the MRQE to RSSXTCEN. RSS is controlled
3508                                  * by RSS_FIELD
3509                                  */
3510                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3511                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3512                                         IXGBE_MRQC_RTRSS4TCEN;
3513                         }
3514                 }
3515                 if (dcb_config->num_tcs.pg_tcs == 8) {
3516                         if (dcb_config->vt_mode)
3517                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3518                                         IXGBE_MRQC_VMDQRT8TCEN;
3519                         else {
3520                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3521                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3522                                         IXGBE_MRQC_RTRSS8TCEN;
3523                         }
3524                 }
3525
3526                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3527         }
3528
3529         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3530         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3531         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3532         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3533
3534         /* VFTA - enable all vlan filters */
3535         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3536                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3537         }
3538
3539         /*
3540          * Configure Rx packet plane (recycle mode; WSP) and
3541          * enable arbiter
3542          */
3543         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3544         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3545 }
3546
3547 static void
3548 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3549                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3550 {
3551         switch (hw->mac.type) {
3552         case ixgbe_mac_82598EB:
3553                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3554                 break;
3555         case ixgbe_mac_82599EB:
3556         case ixgbe_mac_X540:
3557         case ixgbe_mac_X550:
3558         case ixgbe_mac_X550EM_x:
3559         case ixgbe_mac_X550EM_a:
3560                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3561                                                   tsa, map);
3562                 break;
3563         default:
3564                 break;
3565         }
3566 }
3567
3568 static void
3569 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3570                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3571 {
3572         switch (hw->mac.type) {
3573         case ixgbe_mac_82598EB:
3574                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3575                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3576                 break;
3577         case ixgbe_mac_82599EB:
3578         case ixgbe_mac_X540:
3579         case ixgbe_mac_X550:
3580         case ixgbe_mac_X550EM_x:
3581         case ixgbe_mac_X550EM_a:
3582                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3583                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3584                 break;
3585         default:
3586                 break;
3587         }
3588 }
3589
3590 #define DCB_RX_CONFIG  1
3591 #define DCB_TX_CONFIG  1
3592 #define DCB_TX_PB      1024
3593 /**
3594  * ixgbe_dcb_hw_configure - Enable DCB and configure
3595  * general DCB in VT mode and non-VT mode parameters
3596  * @dev: pointer to rte_eth_dev structure
3597  * @dcb_config: pointer to ixgbe_dcb_config structure
3598  */
3599 static int
3600 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3601                         struct ixgbe_dcb_config *dcb_config)
3602 {
3603         int     ret = 0;
3604         uint8_t i, pfc_en, nb_tcs;
3605         uint16_t pbsize, rx_buffer_size;
3606         uint8_t config_dcb_rx = 0;
3607         uint8_t config_dcb_tx = 0;
3608         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3609         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3610         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3611         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3612         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3613         struct ixgbe_dcb_tc_config *tc;
3614         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3615         struct ixgbe_hw *hw =
3616                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3617
3618         switch (dev->data->dev_conf.rxmode.mq_mode) {
3619         case ETH_MQ_RX_VMDQ_DCB:
3620                 dcb_config->vt_mode = true;
3621                 if (hw->mac.type != ixgbe_mac_82598EB) {
3622                         config_dcb_rx = DCB_RX_CONFIG;
3623                         /*
3624                          *get dcb and VT rx configuration parameters
3625                          *from rte_eth_conf
3626                          */
3627                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3628                         /*Configure general VMDQ and DCB RX parameters*/
3629                         ixgbe_vmdq_dcb_configure(dev);
3630                 }
3631                 break;
3632         case ETH_MQ_RX_DCB:
3633         case ETH_MQ_RX_DCB_RSS:
3634                 dcb_config->vt_mode = false;
3635                 config_dcb_rx = DCB_RX_CONFIG;
3636                 /* Get dcb TX configuration parameters from rte_eth_conf */
3637                 ixgbe_dcb_rx_config(dev, dcb_config);
3638                 /*Configure general DCB RX parameters*/
3639                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3640                 break;
3641         default:
3642                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3643                 break;
3644         }
3645         switch (dev->data->dev_conf.txmode.mq_mode) {
3646         case ETH_MQ_TX_VMDQ_DCB:
3647                 dcb_config->vt_mode = true;
3648                 config_dcb_tx = DCB_TX_CONFIG;
3649                 /* get DCB and VT TX configuration parameters
3650                  * from rte_eth_conf
3651                  */
3652                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3653                 /*Configure general VMDQ and DCB TX parameters*/
3654                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3655                 break;
3656
3657         case ETH_MQ_TX_DCB:
3658                 dcb_config->vt_mode = false;
3659                 config_dcb_tx = DCB_TX_CONFIG;
3660                 /*get DCB TX configuration parameters from rte_eth_conf*/
3661                 ixgbe_dcb_tx_config(dev, dcb_config);
3662                 /*Configure general DCB TX parameters*/
3663                 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3664                 break;
3665         default:
3666                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3667                 break;
3668         }
3669
3670         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3671         /* Unpack map */
3672         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3673         if (nb_tcs == ETH_4_TCS) {
3674                 /* Avoid un-configured priority mapping to TC0 */
3675                 uint8_t j = 4;
3676                 uint8_t mask = 0xFF;
3677
3678                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3679                         mask = (uint8_t)(mask & (~(1 << map[i])));
3680                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3681                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3682                                 map[j++] = i;
3683                         mask >>= 1;
3684                 }
3685                 /* Re-configure 4 TCs BW */
3686                 for (i = 0; i < nb_tcs; i++) {
3687                         tc = &dcb_config->tc_config[i];
3688                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3689                                                 (uint8_t)(100 / nb_tcs);
3690                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3691                                                 (uint8_t)(100 / nb_tcs);
3692                 }
3693                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3694                         tc = &dcb_config->tc_config[i];
3695                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3696                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3697                 }
3698         }
3699
3700         switch (hw->mac.type) {
3701         case ixgbe_mac_X550:
3702         case ixgbe_mac_X550EM_x:
3703         case ixgbe_mac_X550EM_a:
3704                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3705                 break;
3706         default:
3707                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3708                 break;
3709         }
3710
3711         if (config_dcb_rx) {
3712                 /* Set RX buffer size */
3713                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3714                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3715
3716                 for (i = 0; i < nb_tcs; i++) {
3717                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3718                 }
3719                 /* zero alloc all unused TCs */
3720                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3721                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3722                 }
3723         }
3724         if (config_dcb_tx) {
3725                 /* Only support an equally distributed
3726                  *  Tx packet buffer strategy.
3727                  */
3728                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3729                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3730
3731                 for (i = 0; i < nb_tcs; i++) {
3732                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3733                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3734                 }
3735                 /* Clear unused TCs, if any, to zero buffer size*/
3736                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3737                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3738                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3739                 }
3740         }
3741
3742         /*Calculates traffic class credits*/
3743         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3744                                 IXGBE_DCB_TX_CONFIG);
3745         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3746                                 IXGBE_DCB_RX_CONFIG);
3747
3748         if (config_dcb_rx) {
3749                 /* Unpack CEE standard containers */
3750                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3751                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3752                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3753                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3754                 /* Configure PG(ETS) RX */
3755                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3756         }
3757
3758         if (config_dcb_tx) {
3759                 /* Unpack CEE standard containers */
3760                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3761                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3762                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3763                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3764                 /* Configure PG(ETS) TX */
3765                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3766         }
3767
3768         /*Configure queue statistics registers*/
3769         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3770
3771         /* Check if the PFC is supported */
3772         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3773                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3774                 for (i = 0; i < nb_tcs; i++) {
3775                         /*
3776                         * If the TC count is 8,and the default high_water is 48,
3777                         * the low_water is 16 as default.
3778                         */
3779                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3780                         hw->fc.low_water[i] = pbsize / 4;
3781                         /* Enable pfc for this TC */
3782                         tc = &dcb_config->tc_config[i];
3783                         tc->pfc = ixgbe_dcb_pfc_enabled;
3784                 }
3785                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3786                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3787                         pfc_en &= 0x0F;
3788                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3789         }
3790
3791         return ret;
3792 }
3793
3794 /**
3795  * ixgbe_configure_dcb - Configure DCB  Hardware
3796  * @dev: pointer to rte_eth_dev
3797  */
3798 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3799 {
3800         struct ixgbe_dcb_config *dcb_cfg =
3801                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3802         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3803
3804         PMD_INIT_FUNC_TRACE();
3805
3806         /* check support mq_mode for DCB */
3807         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3808             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3809             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3810                 return;
3811
3812         if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3813                 return;
3814
3815         /** Configure DCB hardware **/
3816         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3817 }
3818
3819 /*
3820  * VMDq only support for 10 GbE NIC.
3821  */
3822 static void
3823 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3824 {
3825         struct rte_eth_vmdq_rx_conf *cfg;
3826         struct ixgbe_hw *hw;
3827         enum rte_eth_nb_pools num_pools;
3828         uint32_t mrqc, vt_ctl, vlanctrl;
3829         uint32_t vmolr = 0;
3830         int i;
3831
3832         PMD_INIT_FUNC_TRACE();
3833         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3834         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3835         num_pools = cfg->nb_queue_pools;
3836
3837         ixgbe_rss_disable(dev);
3838
3839         /* MRQC: enable vmdq */
3840         mrqc = IXGBE_MRQC_VMDQEN;
3841         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3842
3843         /* PFVTCTL: turn on virtualisation and set the default pool */
3844         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3845         if (cfg->enable_default_pool)
3846                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3847         else
3848                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3849
3850         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3851
3852         for (i = 0; i < (int)num_pools; i++) {
3853                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3854                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3855         }
3856
3857         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3858         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3859         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3860         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3861
3862         /* VFTA - enable all vlan filters */
3863         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3864                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3865
3866         /* VFRE: pool enabling for receive - 64 */
3867         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3868         if (num_pools == ETH_64_POOLS)
3869                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3870
3871         /*
3872          * MPSAR - allow pools to read specific mac addresses
3873          * In this case, all pools should be able to read from mac addr 0
3874          */
3875         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3876         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3877
3878         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3879         for (i = 0; i < cfg->nb_pool_maps; i++) {
3880                 /* set vlan id in VF register and set the valid bit */
3881                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3882                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3883                 /*
3884                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3885                  * pools, we only need to use the first half of the register
3886                  * i.e. bits 0-31
3887                  */
3888                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3889                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3890                                         (cfg->pool_map[i].pools & UINT32_MAX));
3891                 else
3892                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3893                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3894
3895         }
3896
3897         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3898         if (cfg->enable_loop_back) {
3899                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3900                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3901                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3902         }
3903
3904         IXGBE_WRITE_FLUSH(hw);
3905 }
3906
3907 /*
3908  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3909  * @hw: pointer to hardware structure
3910  */
3911 static void
3912 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3913 {
3914         uint32_t reg;
3915         uint32_t q;
3916
3917         PMD_INIT_FUNC_TRACE();
3918         /*PF VF Transmit Enable*/
3919         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3920         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3921
3922         /* Disable the Tx desc arbiter so that MTQC can be changed */
3923         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3924         reg |= IXGBE_RTTDCS_ARBDIS;
3925         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3926
3927         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3928         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3929
3930         /* Disable drop for all queues */
3931         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3932                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3933                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3934
3935         /* Enable the Tx desc arbiter */
3936         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3937         reg &= ~IXGBE_RTTDCS_ARBDIS;
3938         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3939
3940         IXGBE_WRITE_FLUSH(hw);
3941 }
3942
3943 static int __attribute__((cold))
3944 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3945 {
3946         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3947         uint64_t dma_addr;
3948         unsigned int i;
3949
3950         /* Initialize software ring entries */
3951         for (i = 0; i < rxq->nb_rx_desc; i++) {
3952                 volatile union ixgbe_adv_rx_desc *rxd;
3953                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
3954
3955                 if (mbuf == NULL) {
3956                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3957                                      (unsigned) rxq->queue_id);
3958                         return -ENOMEM;
3959                 }
3960
3961                 rte_mbuf_refcnt_set(mbuf, 1);
3962                 mbuf->next = NULL;
3963                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3964                 mbuf->nb_segs = 1;
3965                 mbuf->port = rxq->port_id;
3966
3967                 dma_addr =
3968                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
3969                 rxd = &rxq->rx_ring[i];
3970                 rxd->read.hdr_addr = 0;
3971                 rxd->read.pkt_addr = dma_addr;
3972                 rxe[i].mbuf = mbuf;
3973         }
3974
3975         return 0;
3976 }
3977
3978 static int
3979 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3980 {
3981         struct ixgbe_hw *hw;
3982         uint32_t mrqc;
3983
3984         ixgbe_rss_configure(dev);
3985
3986         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3987
3988         /* MRQC: enable VF RSS */
3989         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3990         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
3991         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3992         case ETH_64_POOLS:
3993                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
3994                 break;
3995
3996         case ETH_32_POOLS:
3997                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
3998                 break;
3999
4000         default:
4001                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4002                 return -EINVAL;
4003         }
4004
4005         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4006
4007         return 0;
4008 }
4009
4010 static int
4011 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4012 {
4013         struct ixgbe_hw *hw =
4014                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4015
4016         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4017         case ETH_64_POOLS:
4018                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4019                         IXGBE_MRQC_VMDQEN);
4020                 break;
4021
4022         case ETH_32_POOLS:
4023                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4024                         IXGBE_MRQC_VMDQRT4TCEN);
4025                 break;
4026
4027         case ETH_16_POOLS:
4028                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4029                         IXGBE_MRQC_VMDQRT8TCEN);
4030                 break;
4031         default:
4032                 PMD_INIT_LOG(ERR,
4033                         "invalid pool number in IOV mode");
4034                 break;
4035         }
4036         return 0;
4037 }
4038
4039 static int
4040 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4041 {
4042         struct ixgbe_hw *hw =
4043                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4044
4045         if (hw->mac.type == ixgbe_mac_82598EB)
4046                 return 0;
4047
4048         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4049                 /*
4050                  * SRIOV inactive scheme
4051                  * any DCB/RSS w/o VMDq multi-queue setting
4052                  */
4053                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4054                 case ETH_MQ_RX_RSS:
4055                 case ETH_MQ_RX_DCB_RSS:
4056                 case ETH_MQ_RX_VMDQ_RSS:
4057                         ixgbe_rss_configure(dev);
4058                         break;
4059
4060                 case ETH_MQ_RX_VMDQ_DCB:
4061                         ixgbe_vmdq_dcb_configure(dev);
4062                         break;
4063
4064                 case ETH_MQ_RX_VMDQ_ONLY:
4065                         ixgbe_vmdq_rx_hw_configure(dev);
4066                         break;
4067
4068                 case ETH_MQ_RX_NONE:
4069                 default:
4070                         /* if mq_mode is none, disable rss mode.*/
4071                         ixgbe_rss_disable(dev);
4072                         break;
4073                 }
4074         } else {
4075                 /*
4076                  * SRIOV active scheme
4077                  * Support RSS together with VMDq & SRIOV
4078                  */
4079                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4080                 case ETH_MQ_RX_RSS:
4081                 case ETH_MQ_RX_VMDQ_RSS:
4082                         ixgbe_config_vf_rss(dev);
4083                         break;
4084
4085                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
4086                 case ETH_MQ_RX_VMDQ_DCB:
4087                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4088                         PMD_INIT_LOG(ERR,
4089                                 "Could not support DCB with VMDq & SRIOV");
4090                         return -1;
4091                 default:
4092                         ixgbe_config_vf_default(dev);
4093                         break;
4094                 }
4095         }
4096
4097         return 0;
4098 }
4099
4100 static int
4101 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4102 {
4103         struct ixgbe_hw *hw =
4104                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4105         uint32_t mtqc;
4106         uint32_t rttdcs;
4107
4108         if (hw->mac.type == ixgbe_mac_82598EB)
4109                 return 0;
4110
4111         /* disable arbiter before setting MTQC */
4112         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4113         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4114         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4115
4116         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4117                 /*
4118                  * SRIOV inactive scheme
4119                  * any DCB w/o VMDq multi-queue setting
4120                  */
4121                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4122                         ixgbe_vmdq_tx_hw_configure(hw);
4123                 else {
4124                         mtqc = IXGBE_MTQC_64Q_1PB;
4125                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4126                 }
4127         } else {
4128                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4129
4130                 /*
4131                  * SRIOV active scheme
4132                  * FIXME if support DCB together with VMDq & SRIOV
4133                  */
4134                 case ETH_64_POOLS:
4135                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4136                         break;
4137                 case ETH_32_POOLS:
4138                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4139                         break;
4140                 case ETH_16_POOLS:
4141                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4142                                 IXGBE_MTQC_8TC_8TQ;
4143                         break;
4144                 default:
4145                         mtqc = IXGBE_MTQC_64Q_1PB;
4146                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4147                 }
4148                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4149         }
4150
4151         /* re-enable arbiter */
4152         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4153         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4154
4155         return 0;
4156 }
4157
4158 /**
4159  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4160  *
4161  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4162  * spec rev. 3.0 chapter 8.2.3.8.13.
4163  *
4164  * @pool Memory pool of the Rx queue
4165  */
4166 static inline uint32_t
4167 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4168 {
4169         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4170
4171         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4172         uint16_t maxdesc =
4173                 IPV4_MAX_PKT_LEN /
4174                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4175
4176         if (maxdesc >= 16)
4177                 return IXGBE_RSCCTL_MAXDESC_16;
4178         else if (maxdesc >= 8)
4179                 return IXGBE_RSCCTL_MAXDESC_8;
4180         else if (maxdesc >= 4)
4181                 return IXGBE_RSCCTL_MAXDESC_4;
4182         else
4183                 return IXGBE_RSCCTL_MAXDESC_1;
4184 }
4185
4186 /**
4187  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4188  * interrupt
4189  *
4190  * (Taken from FreeBSD tree)
4191  * (yes this is all very magic and confusing :)
4192  *
4193  * @dev port handle
4194  * @entry the register array entry
4195  * @vector the MSIX vector for this queue
4196  * @type RX/TX/MISC
4197  */
4198 static void
4199 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4200 {
4201         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4202         u32 ivar, index;
4203
4204         vector |= IXGBE_IVAR_ALLOC_VAL;
4205
4206         switch (hw->mac.type) {
4207
4208         case ixgbe_mac_82598EB:
4209                 if (type == -1)
4210                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4211                 else
4212                         entry += (type * 64);
4213                 index = (entry >> 2) & 0x1F;
4214                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4215                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4216                 ivar |= (vector << (8 * (entry & 0x3)));
4217                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4218                 break;
4219
4220         case ixgbe_mac_82599EB:
4221         case ixgbe_mac_X540:
4222                 if (type == -1) { /* MISC IVAR */
4223                         index = (entry & 1) * 8;
4224                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4225                         ivar &= ~(0xFF << index);
4226                         ivar |= (vector << index);
4227                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4228                 } else {        /* RX/TX IVARS */
4229                         index = (16 * (entry & 1)) + (8 * type);
4230                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4231                         ivar &= ~(0xFF << index);
4232                         ivar |= (vector << index);
4233                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4234                 }
4235
4236                 break;
4237
4238         default:
4239                 break;
4240         }
4241 }
4242
4243 void __attribute__((cold))
4244 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4245 {
4246         uint16_t i, rx_using_sse;
4247         struct ixgbe_adapter *adapter =
4248                 (struct ixgbe_adapter *)dev->data->dev_private;
4249
4250         /*
4251          * In order to allow Vector Rx there are a few configuration
4252          * conditions to be met and Rx Bulk Allocation should be allowed.
4253          */
4254         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4255             !adapter->rx_bulk_alloc_allowed) {
4256                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4257                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4258                                     "not enabled",
4259                              dev->data->port_id);
4260
4261                 adapter->rx_vec_allowed = false;
4262         }
4263
4264         /*
4265          * Initialize the appropriate LRO callback.
4266          *
4267          * If all queues satisfy the bulk allocation preconditions
4268          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4269          * Otherwise use a single allocation version.
4270          */
4271         if (dev->data->lro) {
4272                 if (adapter->rx_bulk_alloc_allowed) {
4273                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4274                                            "allocation version");
4275                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4276                 } else {
4277                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4278                                            "allocation version");
4279                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4280                 }
4281         } else if (dev->data->scattered_rx) {
4282                 /*
4283                  * Set the non-LRO scattered callback: there are Vector and
4284                  * single allocation versions.
4285                  */
4286                 if (adapter->rx_vec_allowed) {
4287                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4288                                             "callback (port=%d).",
4289                                      dev->data->port_id);
4290
4291                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4292                 } else if (adapter->rx_bulk_alloc_allowed) {
4293                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4294                                            "allocation callback (port=%d).",
4295                                      dev->data->port_id);
4296                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4297                 } else {
4298                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4299                                             "single allocation) "
4300                                             "Scattered Rx callback "
4301                                             "(port=%d).",
4302                                      dev->data->port_id);
4303
4304                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4305                 }
4306         /*
4307          * Below we set "simple" callbacks according to port/queues parameters.
4308          * If parameters allow we are going to choose between the following
4309          * callbacks:
4310          *    - Vector
4311          *    - Bulk Allocation
4312          *    - Single buffer allocation (the simplest one)
4313          */
4314         } else if (adapter->rx_vec_allowed) {
4315                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4316                                     "burst size no less than %d (port=%d).",
4317                              RTE_IXGBE_DESCS_PER_LOOP,
4318                              dev->data->port_id);
4319
4320                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4321         } else if (adapter->rx_bulk_alloc_allowed) {
4322                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4323                                     "satisfied. Rx Burst Bulk Alloc function "
4324                                     "will be used on port=%d.",
4325                              dev->data->port_id);
4326
4327                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4328         } else {
4329                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4330                                     "satisfied, or Scattered Rx is requested "
4331                                     "(port=%d).",
4332                              dev->data->port_id);
4333
4334                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4335         }
4336
4337         /* Propagate information about RX function choice through all queues. */
4338
4339         rx_using_sse =
4340                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4341                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4342
4343         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4344                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4345
4346                 rxq->rx_using_sse = rx_using_sse;
4347         }
4348 }
4349
4350 /**
4351  * ixgbe_set_rsc - configure RSC related port HW registers
4352  *
4353  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4354  * of 82599 Spec (x540 configuration is virtually the same).
4355  *
4356  * @dev port handle
4357  *
4358  * Returns 0 in case of success or a non-zero error code
4359  */
4360 static int
4361 ixgbe_set_rsc(struct rte_eth_dev *dev)
4362 {
4363         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4364         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4365         struct rte_eth_dev_info dev_info = { 0 };
4366         bool rsc_capable = false;
4367         uint16_t i;
4368         uint32_t rdrxctl;
4369
4370         /* Sanity check */
4371         dev->dev_ops->dev_infos_get(dev, &dev_info);
4372         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4373                 rsc_capable = true;
4374
4375         if (!rsc_capable && rx_conf->enable_lro) {
4376                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4377                                    "support it");
4378                 return -EINVAL;
4379         }
4380
4381         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4382
4383         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4384                 /*
4385                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4386                  * 3.0 RSC configuration requires HW CRC stripping being
4387                  * enabled. If user requested both HW CRC stripping off
4388                  * and RSC on - return an error.
4389                  */
4390                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4391                                     "is disabled");
4392                 return -EINVAL;
4393         }
4394
4395         /* RFCTL configuration  */
4396         if (rsc_capable) {
4397                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4398
4399                 if (rx_conf->enable_lro)
4400                         /*
4401                          * Since NFS packets coalescing is not supported - clear
4402                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4403                          * enabled.
4404                          */
4405                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4406                                    IXGBE_RFCTL_NFSR_DIS);
4407                 else
4408                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4409
4410                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4411         }
4412
4413         /* If LRO hasn't been requested - we are done here. */
4414         if (!rx_conf->enable_lro)
4415                 return 0;
4416
4417         /* Set RDRXCTL.RSCACKC bit */
4418         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4419         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4420         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4421
4422         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4423         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4424                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4425                 uint32_t srrctl =
4426                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4427                 uint32_t rscctl =
4428                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4429                 uint32_t psrtype =
4430                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4431                 uint32_t eitr =
4432                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4433
4434                 /*
4435                  * ixgbe PMD doesn't support header-split at the moment.
4436                  *
4437                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4438                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4439                  * should be configured even if header split is not
4440                  * enabled. We will configure it 128 bytes following the
4441                  * recommendation in the spec.
4442                  */
4443                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4444                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4445                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4446
4447                 /*
4448                  * TODO: Consider setting the Receive Descriptor Minimum
4449                  * Threshold Size for an RSC case. This is not an obviously
4450                  * beneficiary option but the one worth considering...
4451                  */
4452
4453                 rscctl |= IXGBE_RSCCTL_RSCEN;
4454                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4455                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4456
4457                 /*
4458                  * RSC: Set ITR interval corresponding to 2K ints/s.
4459                  *
4460                  * Full-sized RSC aggregations for a 10Gb/s link will
4461                  * arrive at about 20K aggregation/s rate.
4462                  *
4463                  * 2K inst/s rate will make only 10% of the
4464                  * aggregations to be closed due to the interrupt timer
4465                  * expiration for a streaming at wire-speed case.
4466                  *
4467                  * For a sparse streaming case this setting will yield
4468                  * at most 500us latency for a single RSC aggregation.
4469                  */
4470                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4471                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4472
4473                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4474                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4475                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4476                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4477
4478                 /*
4479                  * RSC requires the mapping of the queue to the
4480                  * interrupt vector.
4481                  */
4482                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4483         }
4484
4485         dev->data->lro = 1;
4486
4487         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4488
4489         return 0;
4490 }
4491
4492 /*
4493  * Initializes Receive Unit.
4494  */
4495 int __attribute__((cold))
4496 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4497 {
4498         struct ixgbe_hw     *hw;
4499         struct ixgbe_rx_queue *rxq;
4500         uint64_t bus_addr;
4501         uint32_t rxctrl;
4502         uint32_t fctrl;
4503         uint32_t hlreg0;
4504         uint32_t maxfrs;
4505         uint32_t srrctl;
4506         uint32_t rdrxctl;
4507         uint32_t rxcsum;
4508         uint16_t buf_size;
4509         uint16_t i;
4510         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4511         int rc;
4512
4513         PMD_INIT_FUNC_TRACE();
4514         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4515
4516         /*
4517          * Make sure receives are disabled while setting
4518          * up the RX context (registers, descriptor rings, etc.).
4519          */
4520         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4521         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4522
4523         /* Enable receipt of broadcasted frames */
4524         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4525         fctrl |= IXGBE_FCTRL_BAM;
4526         fctrl |= IXGBE_FCTRL_DPF;
4527         fctrl |= IXGBE_FCTRL_PMCF;
4528         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4529
4530         /*
4531          * Configure CRC stripping, if any.
4532          */
4533         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4534         if (rx_conf->hw_strip_crc)
4535                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4536         else
4537                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4538
4539         /*
4540          * Configure jumbo frame support, if any.
4541          */
4542         if (rx_conf->jumbo_frame == 1) {
4543                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4544                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4545                 maxfrs &= 0x0000FFFF;
4546                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4547                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4548         } else
4549                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4550
4551         /*
4552          * If loopback mode is configured for 82599, set LPBK bit.
4553          */
4554         if (hw->mac.type == ixgbe_mac_82599EB &&
4555                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4556                 hlreg0 |= IXGBE_HLREG0_LPBK;
4557         else
4558                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4559
4560         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4561
4562         /* Setup RX queues */
4563         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4564                 rxq = dev->data->rx_queues[i];
4565
4566                 /*
4567                  * Reset crc_len in case it was changed after queue setup by a
4568                  * call to configure.
4569                  */
4570                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4571
4572                 /* Setup the Base and Length of the Rx Descriptor Rings */
4573                 bus_addr = rxq->rx_ring_phys_addr;
4574                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4575                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4576                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4577                                 (uint32_t)(bus_addr >> 32));
4578                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4579                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4580                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4581                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4582
4583                 /* Configure the SRRCTL register */
4584 #ifdef RTE_HEADER_SPLIT_ENABLE
4585                 /*
4586                  * Configure Header Split
4587                  */
4588                 if (rx_conf->header_split) {
4589                         if (hw->mac.type == ixgbe_mac_82599EB) {
4590                                 /* Must setup the PSRTYPE register */
4591                                 uint32_t psrtype;
4592
4593                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4594                                         IXGBE_PSRTYPE_UDPHDR   |
4595                                         IXGBE_PSRTYPE_IPV4HDR  |
4596                                         IXGBE_PSRTYPE_IPV6HDR;
4597                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4598                         }
4599                         srrctl = ((rx_conf->split_hdr_size <<
4600                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4601                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4602                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4603                 } else
4604 #endif
4605                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4606
4607                 /* Set if packets are dropped when no descriptors available */
4608                 if (rxq->drop_en)
4609                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4610
4611                 /*
4612                  * Configure the RX buffer size in the BSIZEPACKET field of
4613                  * the SRRCTL register of the queue.
4614                  * The value is in 1 KB resolution. Valid values can be from
4615                  * 1 KB to 16 KB.
4616                  */
4617                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4618                         RTE_PKTMBUF_HEADROOM);
4619                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4620                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4621
4622                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4623
4624                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4625                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4626
4627                 /* It adds dual VLAN length for supporting dual VLAN */
4628                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4629                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4630                         dev->data->scattered_rx = 1;
4631         }
4632
4633         if (rx_conf->enable_scatter)
4634                 dev->data->scattered_rx = 1;
4635
4636         /*
4637          * Device configured with multiple RX queues.
4638          */
4639         ixgbe_dev_mq_rx_configure(dev);
4640
4641         /*
4642          * Setup the Checksum Register.
4643          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4644          * Enable IP/L4 checkum computation by hardware if requested to do so.
4645          */
4646         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4647         rxcsum |= IXGBE_RXCSUM_PCSD;
4648         if (rx_conf->hw_ip_checksum)
4649                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4650         else
4651                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4652
4653         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4654
4655         if (hw->mac.type == ixgbe_mac_82599EB ||
4656             hw->mac.type == ixgbe_mac_X540) {
4657                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4658                 if (rx_conf->hw_strip_crc)
4659                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4660                 else
4661                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4662                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4663                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4664         }
4665
4666         rc = ixgbe_set_rsc(dev);
4667         if (rc)
4668                 return rc;
4669
4670         ixgbe_set_rx_function(dev);
4671
4672         return 0;
4673 }
4674
4675 /*
4676  * Initializes Transmit Unit.
4677  */
4678 void __attribute__((cold))
4679 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4680 {
4681         struct ixgbe_hw     *hw;
4682         struct ixgbe_tx_queue *txq;
4683         uint64_t bus_addr;
4684         uint32_t hlreg0;
4685         uint32_t txctrl;
4686         uint16_t i;
4687
4688         PMD_INIT_FUNC_TRACE();
4689         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4690
4691         /* Enable TX CRC (checksum offload requirement) and hw padding
4692          * (TSO requirement)
4693          */
4694         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4695         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4696         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4697
4698         /* Setup the Base and Length of the Tx Descriptor Rings */
4699         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4700                 txq = dev->data->tx_queues[i];
4701
4702                 bus_addr = txq->tx_ring_phys_addr;
4703                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4704                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4705                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4706                                 (uint32_t)(bus_addr >> 32));
4707                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4708                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4709                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4710                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4711                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4712
4713                 /*
4714                  * Disable Tx Head Writeback RO bit, since this hoses
4715                  * bookkeeping if things aren't delivered in order.
4716                  */
4717                 switch (hw->mac.type) {
4718                 case ixgbe_mac_82598EB:
4719                         txctrl = IXGBE_READ_REG(hw,
4720                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4721                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4722                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4723                                         txctrl);
4724                         break;
4725
4726                 case ixgbe_mac_82599EB:
4727                 case ixgbe_mac_X540:
4728                 case ixgbe_mac_X550:
4729                 case ixgbe_mac_X550EM_x:
4730                 case ixgbe_mac_X550EM_a:
4731                 default:
4732                         txctrl = IXGBE_READ_REG(hw,
4733                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4734                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4735                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4736                                         txctrl);
4737                         break;
4738                 }
4739         }
4740
4741         /* Device configured with multiple TX queues. */
4742         ixgbe_dev_mq_tx_configure(dev);
4743 }
4744
4745 /*
4746  * Set up link for 82599 loopback mode Tx->Rx.
4747  */
4748 static inline void __attribute__((cold))
4749 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4750 {
4751         PMD_INIT_FUNC_TRACE();
4752
4753         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4754                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4755                                 IXGBE_SUCCESS) {
4756                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4757                         /* ignore error */
4758                         return;
4759                 }
4760         }
4761
4762         /* Restart link */
4763         IXGBE_WRITE_REG(hw,
4764                         IXGBE_AUTOC,
4765                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4766         ixgbe_reset_pipeline_82599(hw);
4767
4768         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4769         msec_delay(50);
4770 }
4771
4772
4773 /*
4774  * Start Transmit and Receive Units.
4775  */
4776 int __attribute__((cold))
4777 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4778 {
4779         struct ixgbe_hw     *hw;
4780         struct ixgbe_tx_queue *txq;
4781         struct ixgbe_rx_queue *rxq;
4782         uint32_t txdctl;
4783         uint32_t dmatxctl;
4784         uint32_t rxctrl;
4785         uint16_t i;
4786         int ret = 0;
4787
4788         PMD_INIT_FUNC_TRACE();
4789         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4790
4791         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4792                 txq = dev->data->tx_queues[i];
4793                 /* Setup Transmit Threshold Registers */
4794                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4795                 txdctl |= txq->pthresh & 0x7F;
4796                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4797                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4798                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4799         }
4800
4801         if (hw->mac.type != ixgbe_mac_82598EB) {
4802                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4803                 dmatxctl |= IXGBE_DMATXCTL_TE;
4804                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4805         }
4806
4807         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4808                 txq = dev->data->tx_queues[i];
4809                 if (!txq->tx_deferred_start) {
4810                         ret = ixgbe_dev_tx_queue_start(dev, i);
4811                         if (ret < 0)
4812                                 return ret;
4813                 }
4814         }
4815
4816         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4817                 rxq = dev->data->rx_queues[i];
4818                 if (!rxq->rx_deferred_start) {
4819                         ret = ixgbe_dev_rx_queue_start(dev, i);
4820                         if (ret < 0)
4821                                 return ret;
4822                 }
4823         }
4824
4825         /* Enable Receive engine */
4826         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4827         if (hw->mac.type == ixgbe_mac_82598EB)
4828                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4829         rxctrl |= IXGBE_RXCTRL_RXEN;
4830         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4831
4832         /* If loopback mode is enabled for 82599, set up the link accordingly */
4833         if (hw->mac.type == ixgbe_mac_82599EB &&
4834                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4835                 ixgbe_setup_loopback_link_82599(hw);
4836
4837         return 0;
4838 }
4839
4840 /*
4841  * Start Receive Units for specified queue.
4842  */
4843 int __attribute__((cold))
4844 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4845 {
4846         struct ixgbe_hw     *hw;
4847         struct ixgbe_rx_queue *rxq;
4848         uint32_t rxdctl;
4849         int poll_ms;
4850
4851         PMD_INIT_FUNC_TRACE();
4852         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4853
4854         if (rx_queue_id < dev->data->nb_rx_queues) {
4855                 rxq = dev->data->rx_queues[rx_queue_id];
4856
4857                 /* Allocate buffers for descriptor rings */
4858                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4859                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4860                                      rx_queue_id);
4861                         return -1;
4862                 }
4863                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4864                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4865                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4866
4867                 /* Wait until RX Enable ready */
4868                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4869                 do {
4870                         rte_delay_ms(1);
4871                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4872                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4873                 if (!poll_ms)
4874                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4875                                      rx_queue_id);
4876                 rte_wmb();
4877                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4878                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4879                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4880         } else
4881                 return -1;
4882
4883         return 0;
4884 }
4885
4886 /*
4887  * Stop Receive Units for specified queue.
4888  */
4889 int __attribute__((cold))
4890 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4891 {
4892         struct ixgbe_hw     *hw;
4893         struct ixgbe_adapter *adapter =
4894                 (struct ixgbe_adapter *)dev->data->dev_private;
4895         struct ixgbe_rx_queue *rxq;
4896         uint32_t rxdctl;
4897         int poll_ms;
4898
4899         PMD_INIT_FUNC_TRACE();
4900         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4901
4902         if (rx_queue_id < dev->data->nb_rx_queues) {
4903                 rxq = dev->data->rx_queues[rx_queue_id];
4904
4905                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4906                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4907                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4908
4909                 /* Wait until RX Enable bit clear */
4910                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4911                 do {
4912                         rte_delay_ms(1);
4913                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4914                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4915                 if (!poll_ms)
4916                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4917                                      rx_queue_id);
4918
4919                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4920
4921                 ixgbe_rx_queue_release_mbufs(rxq);
4922                 ixgbe_reset_rx_queue(adapter, rxq);
4923                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4924         } else
4925                 return -1;
4926
4927         return 0;
4928 }
4929
4930
4931 /*
4932  * Start Transmit Units for specified queue.
4933  */
4934 int __attribute__((cold))
4935 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4936 {
4937         struct ixgbe_hw     *hw;
4938         struct ixgbe_tx_queue *txq;
4939         uint32_t txdctl;
4940         int poll_ms;
4941
4942         PMD_INIT_FUNC_TRACE();
4943         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4944
4945         if (tx_queue_id < dev->data->nb_tx_queues) {
4946                 txq = dev->data->tx_queues[tx_queue_id];
4947                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4948                 txdctl |= IXGBE_TXDCTL_ENABLE;
4949                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4950
4951                 /* Wait until TX Enable ready */
4952                 if (hw->mac.type == ixgbe_mac_82599EB) {
4953                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4954                         do {
4955                                 rte_delay_ms(1);
4956                                 txdctl = IXGBE_READ_REG(hw,
4957                                         IXGBE_TXDCTL(txq->reg_idx));
4958                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4959                         if (!poll_ms)
4960                                 PMD_INIT_LOG(ERR, "Could not enable "
4961                                              "Tx Queue %d", tx_queue_id);
4962                 }
4963                 rte_wmb();
4964                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4965                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4966                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4967         } else
4968                 return -1;
4969
4970         return 0;
4971 }
4972
4973 /*
4974  * Stop Transmit Units for specified queue.
4975  */
4976 int __attribute__((cold))
4977 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4978 {
4979         struct ixgbe_hw     *hw;
4980         struct ixgbe_tx_queue *txq;
4981         uint32_t txdctl;
4982         uint32_t txtdh, txtdt;
4983         int poll_ms;
4984
4985         PMD_INIT_FUNC_TRACE();
4986         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4987
4988         if (tx_queue_id >= dev->data->nb_tx_queues)
4989                 return -1;
4990
4991         txq = dev->data->tx_queues[tx_queue_id];
4992
4993         /* Wait until TX queue is empty */
4994         if (hw->mac.type == ixgbe_mac_82599EB) {
4995                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4996                 do {
4997                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
4998                         txtdh = IXGBE_READ_REG(hw,
4999                                                IXGBE_TDH(txq->reg_idx));
5000                         txtdt = IXGBE_READ_REG(hw,
5001                                                IXGBE_TDT(txq->reg_idx));
5002                 } while (--poll_ms && (txtdh != txtdt));
5003                 if (!poll_ms)
5004                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5005                                      "when stopping.", tx_queue_id);
5006         }
5007
5008         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5009         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5010         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5011
5012         /* Wait until TX Enable bit clear */
5013         if (hw->mac.type == ixgbe_mac_82599EB) {
5014                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5015                 do {
5016                         rte_delay_ms(1);
5017                         txdctl = IXGBE_READ_REG(hw,
5018                                                 IXGBE_TXDCTL(txq->reg_idx));
5019                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5020                 if (!poll_ms)
5021                         PMD_INIT_LOG(ERR, "Could not disable "
5022                                      "Tx Queue %d", tx_queue_id);
5023         }
5024
5025         if (txq->ops != NULL) {
5026                 txq->ops->release_mbufs(txq);
5027                 txq->ops->reset(txq);
5028         }
5029         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5030
5031         return 0;
5032 }
5033
5034 void
5035 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5036         struct rte_eth_rxq_info *qinfo)
5037 {
5038         struct ixgbe_rx_queue *rxq;
5039
5040         rxq = dev->data->rx_queues[queue_id];
5041
5042         qinfo->mp = rxq->mb_pool;
5043         qinfo->scattered_rx = dev->data->scattered_rx;
5044         qinfo->nb_desc = rxq->nb_rx_desc;
5045
5046         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5047         qinfo->conf.rx_drop_en = rxq->drop_en;
5048         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5049 }
5050
5051 void
5052 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5053         struct rte_eth_txq_info *qinfo)
5054 {
5055         struct ixgbe_tx_queue *txq;
5056
5057         txq = dev->data->tx_queues[queue_id];
5058
5059         qinfo->nb_desc = txq->nb_tx_desc;
5060
5061         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5062         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5063         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5064
5065         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5066         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5067         qinfo->conf.txq_flags = txq->txq_flags;
5068         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5069 }
5070
5071 /*
5072  * [VF] Initializes Receive Unit.
5073  */
5074 int __attribute__((cold))
5075 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5076 {
5077         struct ixgbe_hw     *hw;
5078         struct ixgbe_rx_queue *rxq;
5079         uint64_t bus_addr;
5080         uint32_t srrctl, psrtype = 0;
5081         uint16_t buf_size;
5082         uint16_t i;
5083         int ret;
5084
5085         PMD_INIT_FUNC_TRACE();
5086         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5087
5088         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5089                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5090                         "it should be power of 2");
5091                 return -1;
5092         }
5093
5094         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5095                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5096                         "it should be equal to or less than %d",
5097                         hw->mac.max_rx_queues);
5098                 return -1;
5099         }
5100
5101         /*
5102          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5103          * disables the VF receipt of packets if the PF MTU is > 1500.
5104          * This is done to deal with 82599 limitations that imposes
5105          * the PF and all VFs to share the same MTU.
5106          * Then, the PF driver enables again the VF receipt of packet when
5107          * the VF driver issues a IXGBE_VF_SET_LPE request.
5108          * In the meantime, the VF device cannot be used, even if the VF driver
5109          * and the Guest VM network stack are ready to accept packets with a
5110          * size up to the PF MTU.
5111          * As a work-around to this PF behaviour, force the call to
5112          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5113          * VF packets received can work in all cases.
5114          */
5115         ixgbevf_rlpml_set_vf(hw,
5116                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5117
5118         /* Setup RX queues */
5119         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5120                 rxq = dev->data->rx_queues[i];
5121
5122                 /* Allocate buffers for descriptor rings */
5123                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5124                 if (ret)
5125                         return ret;
5126
5127                 /* Setup the Base and Length of the Rx Descriptor Rings */
5128                 bus_addr = rxq->rx_ring_phys_addr;
5129
5130                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5131                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5132                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5133                                 (uint32_t)(bus_addr >> 32));
5134                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5135                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5136                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5137                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5138
5139
5140                 /* Configure the SRRCTL register */
5141 #ifdef RTE_HEADER_SPLIT_ENABLE
5142                 /*
5143                  * Configure Header Split
5144                  */
5145                 if (dev->data->dev_conf.rxmode.header_split) {
5146                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5147                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5148                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5149                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5150                 } else
5151 #endif
5152                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5153
5154                 /* Set if packets are dropped when no descriptors available */
5155                 if (rxq->drop_en)
5156                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5157
5158                 /*
5159                  * Configure the RX buffer size in the BSIZEPACKET field of
5160                  * the SRRCTL register of the queue.
5161                  * The value is in 1 KB resolution. Valid values can be from
5162                  * 1 KB to 16 KB.
5163                  */
5164                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5165                         RTE_PKTMBUF_HEADROOM);
5166                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5167                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5168
5169                 /*
5170                  * VF modification to write virtual function SRRCTL register
5171                  */
5172                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5173
5174                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5175                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5176
5177                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5178                     /* It adds dual VLAN length for supporting dual VLAN */
5179                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5180                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5181                         if (!dev->data->scattered_rx)
5182                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5183                         dev->data->scattered_rx = 1;
5184                 }
5185         }
5186
5187 #ifdef RTE_HEADER_SPLIT_ENABLE
5188         if (dev->data->dev_conf.rxmode.header_split)
5189                 /* Must setup the PSRTYPE register */
5190                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5191                         IXGBE_PSRTYPE_UDPHDR   |
5192                         IXGBE_PSRTYPE_IPV4HDR  |
5193                         IXGBE_PSRTYPE_IPV6HDR;
5194 #endif
5195
5196         /* Set RQPL for VF RSS according to max Rx queue */
5197         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5198                 IXGBE_PSRTYPE_RQPL_SHIFT;
5199         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5200
5201         ixgbe_set_rx_function(dev);
5202
5203         return 0;
5204 }
5205
5206 /*
5207  * [VF] Initializes Transmit Unit.
5208  */
5209 void __attribute__((cold))
5210 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5211 {
5212         struct ixgbe_hw     *hw;
5213         struct ixgbe_tx_queue *txq;
5214         uint64_t bus_addr;
5215         uint32_t txctrl;
5216         uint16_t i;
5217
5218         PMD_INIT_FUNC_TRACE();
5219         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5220
5221         /* Setup the Base and Length of the Tx Descriptor Rings */
5222         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5223                 txq = dev->data->tx_queues[i];
5224                 bus_addr = txq->tx_ring_phys_addr;
5225                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5226                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5227                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5228                                 (uint32_t)(bus_addr >> 32));
5229                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5230                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5231                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5232                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5233                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5234
5235                 /*
5236                  * Disable Tx Head Writeback RO bit, since this hoses
5237                  * bookkeeping if things aren't delivered in order.
5238                  */
5239                 txctrl = IXGBE_READ_REG(hw,
5240                                 IXGBE_VFDCA_TXCTRL(i));
5241                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5242                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5243                                 txctrl);
5244         }
5245 }
5246
5247 /*
5248  * [VF] Start Transmit and Receive Units.
5249  */
5250 void __attribute__((cold))
5251 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5252 {
5253         struct ixgbe_hw     *hw;
5254         struct ixgbe_tx_queue *txq;
5255         struct ixgbe_rx_queue *rxq;
5256         uint32_t txdctl;
5257         uint32_t rxdctl;
5258         uint16_t i;
5259         int poll_ms;
5260
5261         PMD_INIT_FUNC_TRACE();
5262         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5263
5264         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5265                 txq = dev->data->tx_queues[i];
5266                 /* Setup Transmit Threshold Registers */
5267                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5268                 txdctl |= txq->pthresh & 0x7F;
5269                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5270                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5271                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5272         }
5273
5274         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5275
5276                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5277                 txdctl |= IXGBE_TXDCTL_ENABLE;
5278                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5279
5280                 poll_ms = 10;
5281                 /* Wait until TX Enable ready */
5282                 do {
5283                         rte_delay_ms(1);
5284                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5285                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5286                 if (!poll_ms)
5287                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5288         }
5289         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5290
5291                 rxq = dev->data->rx_queues[i];
5292
5293                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5294                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5295                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5296
5297                 /* Wait until RX Enable ready */
5298                 poll_ms = 10;
5299                 do {
5300                         rte_delay_ms(1);
5301                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5302                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5303                 if (!poll_ms)
5304                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5305                 rte_wmb();
5306                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5307
5308         }
5309 }
5310
5311 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5312 int __attribute__((weak))
5313 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5314 {
5315         return -1;
5316 }
5317
5318 uint16_t __attribute__((weak))
5319 ixgbe_recv_pkts_vec(
5320         void __rte_unused *rx_queue,
5321         struct rte_mbuf __rte_unused **rx_pkts,
5322         uint16_t __rte_unused nb_pkts)
5323 {
5324         return 0;
5325 }
5326
5327 uint16_t __attribute__((weak))
5328 ixgbe_recv_scattered_pkts_vec(
5329         void __rte_unused *rx_queue,
5330         struct rte_mbuf __rte_unused **rx_pkts,
5331         uint16_t __rte_unused nb_pkts)
5332 {
5333         return 0;
5334 }
5335
5336 int __attribute__((weak))
5337 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5338 {
5339         return -1;
5340 }