1e0789595f403691f4b40dd291a2cadc4a1059a9
[deb_dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
85 #else
86 #define IXGBE_TX_IEEE1588_TMST 0
87 #endif
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK (                  \
90                 PKT_TX_VLAN_PKT |                \
91                 PKT_TX_IP_CKSUM |                \
92                 PKT_TX_L4_MASK |                 \
93                 PKT_TX_TCP_SEG |                 \
94                 PKT_TX_MACSEC |                  \
95                 PKT_TX_OUTER_IP_CKSUM |          \
96                 IXGBE_TX_IEEE1588_TMST)
97
98 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
99                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while (0)
112 #endif
113
114 #ifdef RTE_IXGBE_INC_VECTOR
115 uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
116                                     uint16_t nb_pkts);
117 #endif
118
119 /*********************************************************************
120  *
121  *  TX functions
122  *
123  **********************************************************************/
124
125 /*
126  * Check for descriptors with their DD bit set and free mbufs.
127  * Return the total number of buffers freed.
128  */
129 static inline int __attribute__((always_inline))
130 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
131 {
132         struct ixgbe_tx_entry *txep;
133         uint32_t status;
134         int i, nb_free = 0;
135         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
136
137         /* check DD bit on threshold descriptor */
138         status = txq->tx_ring[txq->tx_next_dd].wb.status;
139         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
140                 return 0;
141
142         /*
143          * first buffer to free from S/W ring is at index
144          * tx_next_dd - (tx_rs_thresh-1)
145          */
146         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
147
148         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
149                 /* free buffers one at a time */
150                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
151                 txep->mbuf = NULL;
152
153                 if (unlikely(m == NULL))
154                         continue;
155
156                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
157                     (nb_free > 0 && m->pool != free[0]->pool)) {
158                         rte_mempool_put_bulk(free[0]->pool,
159                                              (void **)free, nb_free);
160                         nb_free = 0;
161                 }
162
163                 free[nb_free++] = m;
164         }
165
166         if (nb_free > 0)
167                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
168
169         /* buffers were freed, update counters */
170         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
171         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
172         if (txq->tx_next_dd >= txq->nb_tx_desc)
173                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
174
175         return txq->tx_rs_thresh;
176 }
177
178 /* Populate 4 descriptors with data from 4 mbufs */
179 static inline void
180 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
181 {
182         uint64_t buf_dma_addr;
183         uint32_t pkt_len;
184         int i;
185
186         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
187                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
188                 pkt_len = (*pkts)->data_len;
189
190                 /* write data to descriptor */
191                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
192
193                 txdp->read.cmd_type_len =
194                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
195
196                 txdp->read.olinfo_status =
197                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
198
199                 rte_prefetch0(&(*pkts)->pool);
200         }
201 }
202
203 /* Populate 1 descriptor with data from 1 mbuf */
204 static inline void
205 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
206 {
207         uint64_t buf_dma_addr;
208         uint32_t pkt_len;
209
210         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
211         pkt_len = (*pkts)->data_len;
212
213         /* write data to descriptor */
214         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
215         txdp->read.cmd_type_len =
216                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
217         txdp->read.olinfo_status =
218                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
219         rte_prefetch0(&(*pkts)->pool);
220 }
221
222 /*
223  * Fill H/W descriptor ring with mbuf data.
224  * Copy mbuf pointers to the S/W ring.
225  */
226 static inline void
227 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
228                       uint16_t nb_pkts)
229 {
230         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
231         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
232         const int N_PER_LOOP = 4;
233         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
234         int mainpart, leftover;
235         int i, j;
236
237         /*
238          * Process most of the packets in chunks of N pkts.  Any
239          * leftover packets will get processed one at a time.
240          */
241         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
242         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
243         for (i = 0; i < mainpart; i += N_PER_LOOP) {
244                 /* Copy N mbuf pointers to the S/W ring */
245                 for (j = 0; j < N_PER_LOOP; ++j) {
246                         (txep + i + j)->mbuf = *(pkts + i + j);
247                 }
248                 tx4(txdp + i, pkts + i);
249         }
250
251         if (unlikely(leftover > 0)) {
252                 for (i = 0; i < leftover; ++i) {
253                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
254                         tx1(txdp + mainpart + i, pkts + mainpart + i);
255                 }
256         }
257 }
258
259 static inline uint16_t
260 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
261              uint16_t nb_pkts)
262 {
263         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
264         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
265         uint16_t n = 0;
266
267         /*
268          * Begin scanning the H/W ring for done descriptors when the
269          * number of available descriptors drops below tx_free_thresh.  For
270          * each done descriptor, free the associated buffer.
271          */
272         if (txq->nb_tx_free < txq->tx_free_thresh)
273                 ixgbe_tx_free_bufs(txq);
274
275         /* Only use descriptors that are available */
276         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
277         if (unlikely(nb_pkts == 0))
278                 return 0;
279
280         /* Use exactly nb_pkts descriptors */
281         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
282
283         /*
284          * At this point, we know there are enough descriptors in the
285          * ring to transmit all the packets.  This assumes that each
286          * mbuf contains a single segment, and that no new offloads
287          * are expected, which would require a new context descriptor.
288          */
289
290         /*
291          * See if we're going to wrap-around. If so, handle the top
292          * of the descriptor ring first, then do the bottom.  If not,
293          * the processing looks just like the "bottom" part anyway...
294          */
295         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
296                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
297                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
298
299                 /*
300                  * We know that the last descriptor in the ring will need to
301                  * have its RS bit set because tx_rs_thresh has to be
302                  * a divisor of the ring size
303                  */
304                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
305                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
306                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
307
308                 txq->tx_tail = 0;
309         }
310
311         /* Fill H/W descriptor ring with mbuf data */
312         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
313         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
314
315         /*
316          * Determine if RS bit should be set
317          * This is what we actually want:
318          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
319          * but instead of subtracting 1 and doing >=, we can just do
320          * greater than without subtracting.
321          */
322         if (txq->tx_tail > txq->tx_next_rs) {
323                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
324                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
325                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
326                                                 txq->tx_rs_thresh);
327                 if (txq->tx_next_rs >= txq->nb_tx_desc)
328                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
329         }
330
331         /*
332          * Check for wrap-around. This would only happen if we used
333          * up to the last descriptor in the ring, no more, no less.
334          */
335         if (txq->tx_tail >= txq->nb_tx_desc)
336                 txq->tx_tail = 0;
337
338         /* update tail pointer */
339         rte_wmb();
340         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
341
342         return nb_pkts;
343 }
344
345 uint16_t
346 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
347                        uint16_t nb_pkts)
348 {
349         uint16_t nb_tx;
350
351         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
352         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
353                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
354
355         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
356         nb_tx = 0;
357         while (nb_pkts) {
358                 uint16_t ret, n;
359
360                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
361                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
362                 nb_tx = (uint16_t)(nb_tx + ret);
363                 nb_pkts = (uint16_t)(nb_pkts - ret);
364                 if (ret < n)
365                         break;
366         }
367
368         return nb_tx;
369 }
370
371 #ifdef RTE_IXGBE_INC_VECTOR
372 static uint16_t
373 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
374                     uint16_t nb_pkts)
375 {
376         uint16_t nb_tx = 0;
377         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
378
379         while (nb_pkts) {
380                 uint16_t ret, num;
381
382                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
383                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
384                                                  num);
385                 nb_tx += ret;
386                 nb_pkts -= ret;
387                 if (ret < num)
388                         break;
389         }
390
391         return nb_tx;
392 }
393 #endif
394
395 static inline void
396 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
397                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
398                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
399 {
400         uint32_t type_tucmd_mlhl;
401         uint32_t mss_l4len_idx = 0;
402         uint32_t ctx_idx;
403         uint32_t vlan_macip_lens;
404         union ixgbe_tx_offload tx_offload_mask;
405         uint32_t seqnum_seed = 0;
406
407         ctx_idx = txq->ctx_curr;
408         tx_offload_mask.data[0] = 0;
409         tx_offload_mask.data[1] = 0;
410         type_tucmd_mlhl = 0;
411
412         /* Specify which HW CTX to upload. */
413         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
414
415         if (ol_flags & PKT_TX_VLAN_PKT) {
416                 tx_offload_mask.vlan_tci |= ~0;
417         }
418
419         /* check if TCP segmentation required for this packet */
420         if (ol_flags & PKT_TX_TCP_SEG) {
421                 /* implies IP cksum in IPv4 */
422                 if (ol_flags & PKT_TX_IP_CKSUM)
423                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
424                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                 else
427                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
428                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
429                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
430
431                 tx_offload_mask.l2_len |= ~0;
432                 tx_offload_mask.l3_len |= ~0;
433                 tx_offload_mask.l4_len |= ~0;
434                 tx_offload_mask.tso_segsz |= ~0;
435                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
436                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
437         } else { /* no TSO, check if hardware checksum is needed */
438                 if (ol_flags & PKT_TX_IP_CKSUM) {
439                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
440                         tx_offload_mask.l2_len |= ~0;
441                         tx_offload_mask.l3_len |= ~0;
442                 }
443
444                 switch (ol_flags & PKT_TX_L4_MASK) {
445                 case PKT_TX_UDP_CKSUM:
446                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
447                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
448                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
449                         tx_offload_mask.l2_len |= ~0;
450                         tx_offload_mask.l3_len |= ~0;
451                         break;
452                 case PKT_TX_TCP_CKSUM:
453                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
454                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
455                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
456                         tx_offload_mask.l2_len |= ~0;
457                         tx_offload_mask.l3_len |= ~0;
458                         break;
459                 case PKT_TX_SCTP_CKSUM:
460                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
461                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
462                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
463                         tx_offload_mask.l2_len |= ~0;
464                         tx_offload_mask.l3_len |= ~0;
465                         break;
466                 default:
467                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
468                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
469                         break;
470                 }
471         }
472
473         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
474                 tx_offload_mask.outer_l2_len |= ~0;
475                 tx_offload_mask.outer_l3_len |= ~0;
476                 tx_offload_mask.l2_len |= ~0;
477                 seqnum_seed |= tx_offload.outer_l3_len
478                                << IXGBE_ADVTXD_OUTER_IPLEN;
479                 seqnum_seed |= tx_offload.l2_len
480                                << IXGBE_ADVTXD_TUNNEL_LEN;
481         }
482
483         txq->ctx_cache[ctx_idx].flags = ol_flags;
484         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
485                 tx_offload_mask.data[0] & tx_offload.data[0];
486         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
487                 tx_offload_mask.data[1] & tx_offload.data[1];
488         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
489
490         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
491         vlan_macip_lens = tx_offload.l3_len;
492         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
493                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
494                                     IXGBE_ADVTXD_MACLEN_SHIFT);
495         else
496                 vlan_macip_lens |= (tx_offload.l2_len <<
497                                     IXGBE_ADVTXD_MACLEN_SHIFT);
498         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
499         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
500         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
501         ctx_txd->seqnum_seed     = seqnum_seed;
502 }
503
504 /*
505  * Check which hardware context can be used. Use the existing match
506  * or create a new context descriptor.
507  */
508 static inline uint32_t
509 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
510                    union ixgbe_tx_offload tx_offload)
511 {
512         /* If match with the current used context */
513         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
514                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
515                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
516                      & tx_offload.data[0])) &&
517                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
518                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
519                      & tx_offload.data[1]))))
520                 return txq->ctx_curr;
521
522         /* What if match with the next context  */
523         txq->ctx_curr ^= 1;
524         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
525                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
526                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
527                      & tx_offload.data[0])) &&
528                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
529                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
530                      & tx_offload.data[1]))))
531                 return txq->ctx_curr;
532
533         /* Mismatch, use the previous context */
534         return IXGBE_CTX_NUM;
535 }
536
537 static inline uint32_t
538 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
539 {
540         uint32_t tmp = 0;
541
542         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
543                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
544         if (ol_flags & PKT_TX_IP_CKSUM)
545                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
546         if (ol_flags & PKT_TX_TCP_SEG)
547                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
548         return tmp;
549 }
550
551 static inline uint32_t
552 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
553 {
554         uint32_t cmdtype = 0;
555
556         if (ol_flags & PKT_TX_VLAN_PKT)
557                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
558         if (ol_flags & PKT_TX_TCP_SEG)
559                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
560         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
561                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
562         if (ol_flags & PKT_TX_MACSEC)
563                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
564         return cmdtype;
565 }
566
567 /* Default RS bit threshold values */
568 #ifndef DEFAULT_TX_RS_THRESH
569 #define DEFAULT_TX_RS_THRESH   32
570 #endif
571 #ifndef DEFAULT_TX_FREE_THRESH
572 #define DEFAULT_TX_FREE_THRESH 32
573 #endif
574
575 /* Reset transmit descriptors after they have been used */
576 static inline int
577 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
578 {
579         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
580         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
581         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
582         uint16_t nb_tx_desc = txq->nb_tx_desc;
583         uint16_t desc_to_clean_to;
584         uint16_t nb_tx_to_clean;
585         uint32_t status;
586
587         /* Determine the last descriptor needing to be cleaned */
588         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
589         if (desc_to_clean_to >= nb_tx_desc)
590                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
591
592         /* Check to make sure the last descriptor to clean is done */
593         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
594         status = txr[desc_to_clean_to].wb.status;
595         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
596                 PMD_TX_FREE_LOG(DEBUG,
597                                 "TX descriptor %4u is not done"
598                                 "(port=%d queue=%d)",
599                                 desc_to_clean_to,
600                                 txq->port_id, txq->queue_id);
601                 /* Failed to clean any descriptors, better luck next time */
602                 return -(1);
603         }
604
605         /* Figure out how many descriptors will be cleaned */
606         if (last_desc_cleaned > desc_to_clean_to)
607                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
608                                                         desc_to_clean_to);
609         else
610                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
611                                                 last_desc_cleaned);
612
613         PMD_TX_FREE_LOG(DEBUG,
614                         "Cleaning %4u TX descriptors: %4u to %4u "
615                         "(port=%d queue=%d)",
616                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
617                         txq->port_id, txq->queue_id);
618
619         /*
620          * The last descriptor to clean is done, so that means all the
621          * descriptors from the last descriptor that was cleaned
622          * up to the last descriptor with the RS bit set
623          * are done. Only reset the threshold descriptor.
624          */
625         txr[desc_to_clean_to].wb.status = 0;
626
627         /* Update the txq to reflect the last descriptor that was cleaned */
628         txq->last_desc_cleaned = desc_to_clean_to;
629         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
630
631         /* No Error */
632         return 0;
633 }
634
635 uint16_t
636 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
637                 uint16_t nb_pkts)
638 {
639         struct ixgbe_tx_queue *txq;
640         struct ixgbe_tx_entry *sw_ring;
641         struct ixgbe_tx_entry *txe, *txn;
642         volatile union ixgbe_adv_tx_desc *txr;
643         volatile union ixgbe_adv_tx_desc *txd, *txp;
644         struct rte_mbuf     *tx_pkt;
645         struct rte_mbuf     *m_seg;
646         uint64_t buf_dma_addr;
647         uint32_t olinfo_status;
648         uint32_t cmd_type_len;
649         uint32_t pkt_len;
650         uint16_t slen;
651         uint64_t ol_flags;
652         uint16_t tx_id;
653         uint16_t tx_last;
654         uint16_t nb_tx;
655         uint16_t nb_used;
656         uint64_t tx_ol_req;
657         uint32_t ctx = 0;
658         uint32_t new_ctx;
659         union ixgbe_tx_offload tx_offload;
660
661         tx_offload.data[0] = 0;
662         tx_offload.data[1] = 0;
663         txq = tx_queue;
664         sw_ring = txq->sw_ring;
665         txr     = txq->tx_ring;
666         tx_id   = txq->tx_tail;
667         txe = &sw_ring[tx_id];
668         txp = NULL;
669
670         /* Determine if the descriptor ring needs to be cleaned. */
671         if (txq->nb_tx_free < txq->tx_free_thresh)
672                 ixgbe_xmit_cleanup(txq);
673
674         rte_prefetch0(&txe->mbuf->pool);
675
676         /* TX loop */
677         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
678                 new_ctx = 0;
679                 tx_pkt = *tx_pkts++;
680                 pkt_len = tx_pkt->pkt_len;
681
682                 /*
683                  * Determine how many (if any) context descriptors
684                  * are needed for offload functionality.
685                  */
686                 ol_flags = tx_pkt->ol_flags;
687
688                 /* If hardware offload required */
689                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
690                 if (tx_ol_req) {
691                         tx_offload.l2_len = tx_pkt->l2_len;
692                         tx_offload.l3_len = tx_pkt->l3_len;
693                         tx_offload.l4_len = tx_pkt->l4_len;
694                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
695                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
696                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
697                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
698
699                         /* If new context need be built or reuse the exist ctx. */
700                         ctx = what_advctx_update(txq, tx_ol_req,
701                                 tx_offload);
702                         /* Only allocate context descriptor if required*/
703                         new_ctx = (ctx == IXGBE_CTX_NUM);
704                         ctx = txq->ctx_curr;
705                 }
706
707                 /*
708                  * Keep track of how many descriptors are used this loop
709                  * This will always be the number of segments + the number of
710                  * Context descriptors required to transmit the packet
711                  */
712                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
713
714                 if (txp != NULL &&
715                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
716                         /* set RS on the previous packet in the burst */
717                         txp->read.cmd_type_len |=
718                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
719
720                 /*
721                  * The number of descriptors that must be allocated for a
722                  * packet is the number of segments of that packet, plus 1
723                  * Context Descriptor for the hardware offload, if any.
724                  * Determine the last TX descriptor to allocate in the TX ring
725                  * for the packet, starting from the current position (tx_id)
726                  * in the ring.
727                  */
728                 tx_last = (uint16_t) (tx_id + nb_used - 1);
729
730                 /* Circular ring */
731                 if (tx_last >= txq->nb_tx_desc)
732                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
733
734                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
735                            " tx_first=%u tx_last=%u",
736                            (unsigned) txq->port_id,
737                            (unsigned) txq->queue_id,
738                            (unsigned) pkt_len,
739                            (unsigned) tx_id,
740                            (unsigned) tx_last);
741
742                 /*
743                  * Make sure there are enough TX descriptors available to
744                  * transmit the entire packet.
745                  * nb_used better be less than or equal to txq->tx_rs_thresh
746                  */
747                 if (nb_used > txq->nb_tx_free) {
748                         PMD_TX_FREE_LOG(DEBUG,
749                                         "Not enough free TX descriptors "
750                                         "nb_used=%4u nb_free=%4u "
751                                         "(port=%d queue=%d)",
752                                         nb_used, txq->nb_tx_free,
753                                         txq->port_id, txq->queue_id);
754
755                         if (ixgbe_xmit_cleanup(txq) != 0) {
756                                 /* Could not clean any descriptors */
757                                 if (nb_tx == 0)
758                                         return 0;
759                                 goto end_of_tx;
760                         }
761
762                         /* nb_used better be <= txq->tx_rs_thresh */
763                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
764                                 PMD_TX_FREE_LOG(DEBUG,
765                                         "The number of descriptors needed to "
766                                         "transmit the packet exceeds the "
767                                         "RS bit threshold. This will impact "
768                                         "performance."
769                                         "nb_used=%4u nb_free=%4u "
770                                         "tx_rs_thresh=%4u. "
771                                         "(port=%d queue=%d)",
772                                         nb_used, txq->nb_tx_free,
773                                         txq->tx_rs_thresh,
774                                         txq->port_id, txq->queue_id);
775                                 /*
776                                  * Loop here until there are enough TX
777                                  * descriptors or until the ring cannot be
778                                  * cleaned.
779                                  */
780                                 while (nb_used > txq->nb_tx_free) {
781                                         if (ixgbe_xmit_cleanup(txq) != 0) {
782                                                 /*
783                                                  * Could not clean any
784                                                  * descriptors
785                                                  */
786                                                 if (nb_tx == 0)
787                                                         return 0;
788                                                 goto end_of_tx;
789                                         }
790                                 }
791                         }
792                 }
793
794                 /*
795                  * By now there are enough free TX descriptors to transmit
796                  * the packet.
797                  */
798
799                 /*
800                  * Set common flags of all TX Data Descriptors.
801                  *
802                  * The following bits must be set in all Data Descriptors:
803                  *   - IXGBE_ADVTXD_DTYP_DATA
804                  *   - IXGBE_ADVTXD_DCMD_DEXT
805                  *
806                  * The following bits must be set in the first Data Descriptor
807                  * and are ignored in the other ones:
808                  *   - IXGBE_ADVTXD_DCMD_IFCS
809                  *   - IXGBE_ADVTXD_MAC_1588
810                  *   - IXGBE_ADVTXD_DCMD_VLE
811                  *
812                  * The following bits must only be set in the last Data
813                  * Descriptor:
814                  *   - IXGBE_TXD_CMD_EOP
815                  *
816                  * The following bits can be set in any Data Descriptor, but
817                  * are only set in the last Data Descriptor:
818                  *   - IXGBE_TXD_CMD_RS
819                  */
820                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
821                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
822
823 #ifdef RTE_LIBRTE_IEEE1588
824                 if (ol_flags & PKT_TX_IEEE1588_TMST)
825                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
826 #endif
827
828                 olinfo_status = 0;
829                 if (tx_ol_req) {
830
831                         if (ol_flags & PKT_TX_TCP_SEG) {
832                                 /* when TSO is on, paylen in descriptor is the
833                                  * not the packet len but the tcp payload len */
834                                 pkt_len -= (tx_offload.l2_len +
835                                         tx_offload.l3_len + tx_offload.l4_len);
836                         }
837
838                         /*
839                          * Setup the TX Advanced Context Descriptor if required
840                          */
841                         if (new_ctx) {
842                                 volatile struct ixgbe_adv_tx_context_desc *
843                                     ctx_txd;
844
845                                 ctx_txd = (volatile struct
846                                     ixgbe_adv_tx_context_desc *)
847                                     &txr[tx_id];
848
849                                 txn = &sw_ring[txe->next_id];
850                                 rte_prefetch0(&txn->mbuf->pool);
851
852                                 if (txe->mbuf != NULL) {
853                                         rte_pktmbuf_free_seg(txe->mbuf);
854                                         txe->mbuf = NULL;
855                                 }
856
857                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
858                                         tx_offload);
859
860                                 txe->last_id = tx_last;
861                                 tx_id = txe->next_id;
862                                 txe = txn;
863                         }
864
865                         /*
866                          * Setup the TX Advanced Data Descriptor,
867                          * This path will go through
868                          * whatever new/reuse the context descriptor
869                          */
870                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
871                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
872                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
873                 }
874
875                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
876
877                 m_seg = tx_pkt;
878                 do {
879                         txd = &txr[tx_id];
880                         txn = &sw_ring[txe->next_id];
881                         rte_prefetch0(&txn->mbuf->pool);
882
883                         if (txe->mbuf != NULL)
884                                 rte_pktmbuf_free_seg(txe->mbuf);
885                         txe->mbuf = m_seg;
886
887                         /*
888                          * Set up Transmit Data Descriptor.
889                          */
890                         slen = m_seg->data_len;
891                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
892                         txd->read.buffer_addr =
893                                 rte_cpu_to_le_64(buf_dma_addr);
894                         txd->read.cmd_type_len =
895                                 rte_cpu_to_le_32(cmd_type_len | slen);
896                         txd->read.olinfo_status =
897                                 rte_cpu_to_le_32(olinfo_status);
898                         txe->last_id = tx_last;
899                         tx_id = txe->next_id;
900                         txe = txn;
901                         m_seg = m_seg->next;
902                 } while (m_seg != NULL);
903
904                 /*
905                  * The last packet data descriptor needs End Of Packet (EOP)
906                  */
907                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
908                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
909                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
910
911                 /* Set RS bit only on threshold packets' last descriptor */
912                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
913                         PMD_TX_FREE_LOG(DEBUG,
914                                         "Setting RS bit on TXD id="
915                                         "%4u (port=%d queue=%d)",
916                                         tx_last, txq->port_id, txq->queue_id);
917
918                         cmd_type_len |= IXGBE_TXD_CMD_RS;
919
920                         /* Update txq RS bit counters */
921                         txq->nb_tx_used = 0;
922                         txp = NULL;
923                 } else
924                         txp = txd;
925
926                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
927         }
928
929 end_of_tx:
930         /* set RS on last packet in the burst */
931         if (txp != NULL)
932                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
933
934         rte_wmb();
935
936         /*
937          * Set the Transmit Descriptor Tail (TDT)
938          */
939         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
940                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
941                    (unsigned) tx_id, (unsigned) nb_tx);
942         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
943         txq->tx_tail = tx_id;
944
945         return nb_tx;
946 }
947
948 /*********************************************************************
949  *
950  *  TX prep functions
951  *
952  **********************************************************************/
953 uint16_t
954 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
955 {
956         int i, ret;
957         uint64_t ol_flags;
958         struct rte_mbuf *m;
959         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
960
961         for (i = 0; i < nb_pkts; i++) {
962                 m = tx_pkts[i];
963                 ol_flags = m->ol_flags;
964
965                 /**
966                  * Check if packet meets requirements for number of segments
967                  *
968                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
969                  *       non-TSO
970                  */
971
972                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
973                         rte_errno = -EINVAL;
974                         return i;
975                 }
976
977                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
978                         rte_errno = -ENOTSUP;
979                         return i;
980                 }
981
982 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
983                 ret = rte_validate_tx_offload(m);
984                 if (ret != 0) {
985                         rte_errno = ret;
986                         return i;
987                 }
988 #endif
989                 ret = rte_net_intel_cksum_prepare(m);
990                 if (ret != 0) {
991                         rte_errno = ret;
992                         return i;
993                 }
994         }
995
996         return i;
997 }
998
999 /*********************************************************************
1000  *
1001  *  RX functions
1002  *
1003  **********************************************************************/
1004
1005 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1006 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1007 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1008 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1009 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1010 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1011 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1012 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1013 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1014 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1015 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1016 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1017 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1018 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1019 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1020 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1021 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1022 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1023 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1024 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1025 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1028 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1029 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1030 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1031 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1032 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1033 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1034 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1035 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1036 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1037 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1038
1039 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1040 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1041 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1042 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1043 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1044 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1045 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1046 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1047 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1048 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1049 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1050 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1051 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1052 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1053 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1062
1063 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1064 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1065 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1066 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1067 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1068 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1069 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1070 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1071 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1072 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1073 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1074 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1075 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1076 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1077 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1086
1087 #define IXGBE_PACKET_TYPE_MAX               0X80
1088 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
1089 #define IXGBE_PACKET_TYPE_SHIFT             0X04
1090
1091 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1092 static inline uint32_t
1093 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1094 {
1095         /**
1096          * Use 2 different table for normal packet and tunnel packet
1097          * to save the space.
1098          */
1099         static const uint32_t
1100                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1101                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1102                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1103                         RTE_PTYPE_L3_IPV4,
1104                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1105                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1106                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1107                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1108                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1109                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1110                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1111                         RTE_PTYPE_L3_IPV4_EXT,
1112                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1113                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1114                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1116                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1117                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1118                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1119                         RTE_PTYPE_L3_IPV6,
1120                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1121                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1122                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1123                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1124                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1125                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1126                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV6_EXT,
1128                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1129                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1130                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1131                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1132                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1133                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1134                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1135                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1136                         RTE_PTYPE_INNER_L3_IPV6,
1137                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1138                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1139                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1140                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1141                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1142                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1143                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1144                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1145                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1146                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1147                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1148                         RTE_PTYPE_INNER_L3_IPV6,
1149                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1150                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1151                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1152                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1153                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1154                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1155                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1156                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1157                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1158                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1160                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1161                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1163                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1164                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1165                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1166                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1167                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1168                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1169                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1170                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1172                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1173                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1175                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1176                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1177                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1178                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1179                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1180                         RTE_PTYPE_L2_ETHER |
1181                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1182                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1183         };
1184
1185         static const uint32_t
1186                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1187                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1188                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1189                         RTE_PTYPE_INNER_L2_ETHER,
1190                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1191                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1192                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1193                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1194                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1195                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1196                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1197                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1198                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1199                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1200                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1201                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1202                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1203                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1204                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1205                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1206                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1207                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1208                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1209                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1210                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1211                         RTE_PTYPE_INNER_L4_TCP,
1212                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1213                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1214                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1215                         RTE_PTYPE_INNER_L4_TCP,
1216                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1217                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1218                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1219                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1220                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1221                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1222                         RTE_PTYPE_INNER_L4_TCP,
1223                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1224                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1225                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1226                         RTE_PTYPE_INNER_L3_IPV4,
1227                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1228                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1230                         RTE_PTYPE_INNER_L4_UDP,
1231                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1232                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1233                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1234                         RTE_PTYPE_INNER_L4_UDP,
1235                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1236                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1237                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1238                         RTE_PTYPE_INNER_L4_SCTP,
1239                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1240                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1241                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1242                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1243                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1244                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1245                         RTE_PTYPE_INNER_L4_UDP,
1246                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1247                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1248                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1249                         RTE_PTYPE_INNER_L4_SCTP,
1250                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1251                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1252                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1253                         RTE_PTYPE_INNER_L3_IPV4,
1254                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1255                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1256                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1257                         RTE_PTYPE_INNER_L4_SCTP,
1258                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1259                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1260                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1261                         RTE_PTYPE_INNER_L4_SCTP,
1262                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1263                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1264                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1265                         RTE_PTYPE_INNER_L4_TCP,
1266                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1267                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1268                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1269                         RTE_PTYPE_INNER_L4_UDP,
1270
1271                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1272                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1273                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1274                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1275                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1276                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1277                         RTE_PTYPE_INNER_L3_IPV4,
1278                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1279                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1280                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1281                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1282                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1283                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1284                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1285                         RTE_PTYPE_INNER_L3_IPV6,
1286                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1287                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1288                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1289                         RTE_PTYPE_INNER_L3_IPV4,
1290                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1291                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1292                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1293                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1294                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1295                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1296                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1297                         RTE_PTYPE_INNER_L3_IPV4,
1298                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1299                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1300                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1301                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1302                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1303                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1306                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1307                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309                         RTE_PTYPE_INNER_L3_IPV4,
1310                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1311                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1314                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1315                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1316                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1317                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1318                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1319                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1322                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1323                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1324                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1325                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1326                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1327                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1330                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1331                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                         RTE_PTYPE_INNER_L3_IPV4,
1334                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1335                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1338                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1339                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1342                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1343                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1344                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1345                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1346                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1347                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1348                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1349                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1350                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1351                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1352                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1353                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1354                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1355                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1358                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1359                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1362         };
1363
1364         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1365                 return RTE_PTYPE_UNKNOWN;
1366
1367         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1368
1369         /* For tunnel packet */
1370         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1371                 /* Remove the tunnel bit to save the space. */
1372                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1373                 return ptype_table_tn[pkt_info];
1374         }
1375
1376         /**
1377          * For x550, if it's not tunnel,
1378          * tunnel type bit should be set to 0.
1379          * Reuse 82599's mask.
1380          */
1381         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1382
1383         return ptype_table[pkt_info];
1384 }
1385
1386 static inline uint64_t
1387 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1388 {
1389         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1390                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1391                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1392                 PKT_RX_RSS_HASH, 0, 0, 0,
1393                 0, 0, 0,  PKT_RX_FDIR,
1394         };
1395 #ifdef RTE_LIBRTE_IEEE1588
1396         static uint64_t ip_pkt_etqf_map[8] = {
1397                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1398                 0, 0, 0, 0,
1399         };
1400
1401         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1402                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1403                                 ip_rss_types_map[pkt_info & 0XF];
1404         else
1405                 return ip_rss_types_map[pkt_info & 0XF];
1406 #else
1407         return ip_rss_types_map[pkt_info & 0XF];
1408 #endif
1409 }
1410
1411 static inline uint64_t
1412 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1413 {
1414         uint64_t pkt_flags;
1415
1416         /*
1417          * Check if VLAN present only.
1418          * Do not check whether L3/L4 rx checksum done by NIC or not,
1419          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1420          */
1421         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1422
1423 #ifdef RTE_LIBRTE_IEEE1588
1424         if (rx_status & IXGBE_RXD_STAT_TMST)
1425                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1426 #endif
1427         return pkt_flags;
1428 }
1429
1430 static inline uint64_t
1431 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1432 {
1433         uint64_t pkt_flags;
1434
1435         /*
1436          * Bit 31: IPE, IPv4 checksum error
1437          * Bit 30: L4I, L4I integrity error
1438          */
1439         static uint64_t error_to_pkt_flags_map[4] = {
1440                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1441                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1442                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1443                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1444         };
1445         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1446                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1447
1448         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1449             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1450                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1451         }
1452
1453         return pkt_flags;
1454 }
1455
1456 /*
1457  * LOOK_AHEAD defines how many desc statuses to check beyond the
1458  * current descriptor.
1459  * It must be a pound define for optimal performance.
1460  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1461  * function only works with LOOK_AHEAD=8.
1462  */
1463 #define LOOK_AHEAD 8
1464 #if (LOOK_AHEAD != 8)
1465 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1466 #endif
1467 static inline int
1468 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1469 {
1470         volatile union ixgbe_adv_rx_desc *rxdp;
1471         struct ixgbe_rx_entry *rxep;
1472         struct rte_mbuf *mb;
1473         uint16_t pkt_len;
1474         uint64_t pkt_flags;
1475         int nb_dd;
1476         uint32_t s[LOOK_AHEAD];
1477         uint32_t pkt_info[LOOK_AHEAD];
1478         int i, j, nb_rx = 0;
1479         uint32_t status;
1480         uint64_t vlan_flags = rxq->vlan_flags;
1481
1482         /* get references to current descriptor and S/W ring entry */
1483         rxdp = &rxq->rx_ring[rxq->rx_tail];
1484         rxep = &rxq->sw_ring[rxq->rx_tail];
1485
1486         status = rxdp->wb.upper.status_error;
1487         /* check to make sure there is at least 1 packet to receive */
1488         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1489                 return 0;
1490
1491         /*
1492          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1493          * reference packets that are ready to be received.
1494          */
1495         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1496              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1497                 /* Read desc statuses backwards to avoid race condition */
1498                 for (j = 0; j < LOOK_AHEAD; j++)
1499                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1500
1501                 rte_smp_rmb();
1502
1503                 /* Compute how many status bits were set */
1504                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1505                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1506                         ;
1507
1508                 for (j = 0; j < nb_dd; j++)
1509                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1510                                                        lo_dword.data);
1511
1512                 nb_rx += nb_dd;
1513
1514                 /* Translate descriptor info to mbuf format */
1515                 for (j = 0; j < nb_dd; ++j) {
1516                         mb = rxep[j].mbuf;
1517                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1518                                   rxq->crc_len;
1519                         mb->data_len = pkt_len;
1520                         mb->pkt_len = pkt_len;
1521                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1522
1523                         /* convert descriptor fields to rte mbuf flags */
1524                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1525                                 vlan_flags);
1526                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1527                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1528                                         ((uint16_t)pkt_info[j]);
1529                         mb->ol_flags = pkt_flags;
1530                         mb->packet_type =
1531                                 ixgbe_rxd_pkt_info_to_pkt_type
1532                                         (pkt_info[j], rxq->pkt_type_mask);
1533
1534                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1535                                 mb->hash.rss = rte_le_to_cpu_32(
1536                                     rxdp[j].wb.lower.hi_dword.rss);
1537                         else if (pkt_flags & PKT_RX_FDIR) {
1538                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1539                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1540                                     IXGBE_ATR_HASH_MASK;
1541                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1542                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1543                         }
1544                 }
1545
1546                 /* Move mbuf pointers from the S/W ring to the stage */
1547                 for (j = 0; j < LOOK_AHEAD; ++j) {
1548                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1549                 }
1550
1551                 /* stop if all requested packets could not be received */
1552                 if (nb_dd != LOOK_AHEAD)
1553                         break;
1554         }
1555
1556         /* clear software ring entries so we can cleanup correctly */
1557         for (i = 0; i < nb_rx; ++i) {
1558                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1559         }
1560
1561
1562         return nb_rx;
1563 }
1564
1565 static inline int
1566 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1567 {
1568         volatile union ixgbe_adv_rx_desc *rxdp;
1569         struct ixgbe_rx_entry *rxep;
1570         struct rte_mbuf *mb;
1571         uint16_t alloc_idx;
1572         __le64 dma_addr;
1573         int diag, i;
1574
1575         /* allocate buffers in bulk directly into the S/W ring */
1576         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1577         rxep = &rxq->sw_ring[alloc_idx];
1578         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1579                                     rxq->rx_free_thresh);
1580         if (unlikely(diag != 0))
1581                 return -ENOMEM;
1582
1583         rxdp = &rxq->rx_ring[alloc_idx];
1584         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1585                 /* populate the static rte mbuf fields */
1586                 mb = rxep[i].mbuf;
1587                 if (reset_mbuf) {
1588                         mb->port = rxq->port_id;
1589                 }
1590
1591                 rte_mbuf_refcnt_set(mb, 1);
1592                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1593
1594                 /* populate the descriptors */
1595                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1596                 rxdp[i].read.hdr_addr = 0;
1597                 rxdp[i].read.pkt_addr = dma_addr;
1598         }
1599
1600         /* update state of internal queue structure */
1601         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1602         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1603                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1604
1605         /* no errors */
1606         return 0;
1607 }
1608
1609 static inline uint16_t
1610 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1611                          uint16_t nb_pkts)
1612 {
1613         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1614         int i;
1615
1616         /* how many packets are ready to return? */
1617         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1618
1619         /* copy mbuf pointers to the application's packet list */
1620         for (i = 0; i < nb_pkts; ++i)
1621                 rx_pkts[i] = stage[i];
1622
1623         /* update internal queue state */
1624         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1625         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1626
1627         return nb_pkts;
1628 }
1629
1630 static inline uint16_t
1631 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1632              uint16_t nb_pkts)
1633 {
1634         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1635         uint16_t nb_rx = 0;
1636
1637         /* Any previously recv'd pkts will be returned from the Rx stage */
1638         if (rxq->rx_nb_avail)
1639                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1640
1641         /* Scan the H/W ring for packets to receive */
1642         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1643
1644         /* update internal queue state */
1645         rxq->rx_next_avail = 0;
1646         rxq->rx_nb_avail = nb_rx;
1647         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1648
1649         /* if required, allocate new buffers to replenish descriptors */
1650         if (rxq->rx_tail > rxq->rx_free_trigger) {
1651                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1652
1653                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1654                         int i, j;
1655
1656                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1657                                    "queue_id=%u", (unsigned) rxq->port_id,
1658                                    (unsigned) rxq->queue_id);
1659
1660                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1661                                 rxq->rx_free_thresh;
1662
1663                         /*
1664                          * Need to rewind any previous receives if we cannot
1665                          * allocate new buffers to replenish the old ones.
1666                          */
1667                         rxq->rx_nb_avail = 0;
1668                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1669                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1670                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1671
1672                         return 0;
1673                 }
1674
1675                 /* update tail pointer */
1676                 rte_wmb();
1677                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1678                                             cur_free_trigger);
1679         }
1680
1681         if (rxq->rx_tail >= rxq->nb_rx_desc)
1682                 rxq->rx_tail = 0;
1683
1684         /* received any packets this loop? */
1685         if (rxq->rx_nb_avail)
1686                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1687
1688         return 0;
1689 }
1690
1691 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1692 uint16_t
1693 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1694                            uint16_t nb_pkts)
1695 {
1696         uint16_t nb_rx;
1697
1698         if (unlikely(nb_pkts == 0))
1699                 return 0;
1700
1701         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1702                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1703
1704         /* request is relatively large, chunk it up */
1705         nb_rx = 0;
1706         while (nb_pkts) {
1707                 uint16_t ret, n;
1708
1709                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1710                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1711                 nb_rx = (uint16_t)(nb_rx + ret);
1712                 nb_pkts = (uint16_t)(nb_pkts - ret);
1713                 if (ret < n)
1714                         break;
1715         }
1716
1717         return nb_rx;
1718 }
1719
1720 uint16_t
1721 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1722                 uint16_t nb_pkts)
1723 {
1724         struct ixgbe_rx_queue *rxq;
1725         volatile union ixgbe_adv_rx_desc *rx_ring;
1726         volatile union ixgbe_adv_rx_desc *rxdp;
1727         struct ixgbe_rx_entry *sw_ring;
1728         struct ixgbe_rx_entry *rxe;
1729         struct rte_mbuf *rxm;
1730         struct rte_mbuf *nmb;
1731         union ixgbe_adv_rx_desc rxd;
1732         uint64_t dma_addr;
1733         uint32_t staterr;
1734         uint32_t pkt_info;
1735         uint16_t pkt_len;
1736         uint16_t rx_id;
1737         uint16_t nb_rx;
1738         uint16_t nb_hold;
1739         uint64_t pkt_flags;
1740         uint64_t vlan_flags;
1741
1742         nb_rx = 0;
1743         nb_hold = 0;
1744         rxq = rx_queue;
1745         rx_id = rxq->rx_tail;
1746         rx_ring = rxq->rx_ring;
1747         sw_ring = rxq->sw_ring;
1748         vlan_flags = rxq->vlan_flags;
1749         while (nb_rx < nb_pkts) {
1750                 /*
1751                  * The order of operations here is important as the DD status
1752                  * bit must not be read after any other descriptor fields.
1753                  * rx_ring and rxdp are pointing to volatile data so the order
1754                  * of accesses cannot be reordered by the compiler. If they were
1755                  * not volatile, they could be reordered which could lead to
1756                  * using invalid descriptor fields when read from rxd.
1757                  */
1758                 rxdp = &rx_ring[rx_id];
1759                 staterr = rxdp->wb.upper.status_error;
1760                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1761                         break;
1762                 rxd = *rxdp;
1763
1764                 /*
1765                  * End of packet.
1766                  *
1767                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1768                  * is likely to be invalid and to be dropped by the various
1769                  * validation checks performed by the network stack.
1770                  *
1771                  * Allocate a new mbuf to replenish the RX ring descriptor.
1772                  * If the allocation fails:
1773                  *    - arrange for that RX descriptor to be the first one
1774                  *      being parsed the next time the receive function is
1775                  *      invoked [on the same queue].
1776                  *
1777                  *    - Stop parsing the RX ring and return immediately.
1778                  *
1779                  * This policy do not drop the packet received in the RX
1780                  * descriptor for which the allocation of a new mbuf failed.
1781                  * Thus, it allows that packet to be later retrieved if
1782                  * mbuf have been freed in the mean time.
1783                  * As a side effect, holding RX descriptors instead of
1784                  * systematically giving them back to the NIC may lead to
1785                  * RX ring exhaustion situations.
1786                  * However, the NIC can gracefully prevent such situations
1787                  * to happen by sending specific "back-pressure" flow control
1788                  * frames to its peer(s).
1789                  */
1790                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1791                            "ext_err_stat=0x%08x pkt_len=%u",
1792                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1793                            (unsigned) rx_id, (unsigned) staterr,
1794                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1795
1796                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1797                 if (nmb == NULL) {
1798                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1799                                    "queue_id=%u", (unsigned) rxq->port_id,
1800                                    (unsigned) rxq->queue_id);
1801                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1802                         break;
1803                 }
1804
1805                 nb_hold++;
1806                 rxe = &sw_ring[rx_id];
1807                 rx_id++;
1808                 if (rx_id == rxq->nb_rx_desc)
1809                         rx_id = 0;
1810
1811                 /* Prefetch next mbuf while processing current one. */
1812                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1813
1814                 /*
1815                  * When next RX descriptor is on a cache-line boundary,
1816                  * prefetch the next 4 RX descriptors and the next 8 pointers
1817                  * to mbufs.
1818                  */
1819                 if ((rx_id & 0x3) == 0) {
1820                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1821                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1822                 }
1823
1824                 rxm = rxe->mbuf;
1825                 rxe->mbuf = nmb;
1826                 dma_addr =
1827                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1828                 rxdp->read.hdr_addr = 0;
1829                 rxdp->read.pkt_addr = dma_addr;
1830
1831                 /*
1832                  * Initialize the returned mbuf.
1833                  * 1) setup generic mbuf fields:
1834                  *    - number of segments,
1835                  *    - next segment,
1836                  *    - packet length,
1837                  *    - RX port identifier.
1838                  * 2) integrate hardware offload data, if any:
1839                  *    - RSS flag & hash,
1840                  *    - IP checksum flag,
1841                  *    - VLAN TCI, if any,
1842                  *    - error flags.
1843                  */
1844                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1845                                       rxq->crc_len);
1846                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1847                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1848                 rxm->nb_segs = 1;
1849                 rxm->next = NULL;
1850                 rxm->pkt_len = pkt_len;
1851                 rxm->data_len = pkt_len;
1852                 rxm->port = rxq->port_id;
1853
1854                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1855                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1856                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1857
1858                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1859                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1860                 pkt_flags = pkt_flags |
1861                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1862                 rxm->ol_flags = pkt_flags;
1863                 rxm->packet_type =
1864                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1865                                                        rxq->pkt_type_mask);
1866
1867                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1868                         rxm->hash.rss = rte_le_to_cpu_32(
1869                                                 rxd.wb.lower.hi_dword.rss);
1870                 else if (pkt_flags & PKT_RX_FDIR) {
1871                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1872                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1873                                         IXGBE_ATR_HASH_MASK;
1874                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1875                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1876                 }
1877                 /*
1878                  * Store the mbuf address into the next entry of the array
1879                  * of returned packets.
1880                  */
1881                 rx_pkts[nb_rx++] = rxm;
1882         }
1883         rxq->rx_tail = rx_id;
1884
1885         /*
1886          * If the number of free RX descriptors is greater than the RX free
1887          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1888          * register.
1889          * Update the RDT with the value of the last processed RX descriptor
1890          * minus 1, to guarantee that the RDT register is never equal to the
1891          * RDH register, which creates a "full" ring situtation from the
1892          * hardware point of view...
1893          */
1894         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1895         if (nb_hold > rxq->rx_free_thresh) {
1896                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1897                            "nb_hold=%u nb_rx=%u",
1898                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1899                            (unsigned) rx_id, (unsigned) nb_hold,
1900                            (unsigned) nb_rx);
1901                 rx_id = (uint16_t) ((rx_id == 0) ?
1902                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1903                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1904                 nb_hold = 0;
1905         }
1906         rxq->nb_rx_hold = nb_hold;
1907         return nb_rx;
1908 }
1909
1910 /**
1911  * Detect an RSC descriptor.
1912  */
1913 static inline uint32_t
1914 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1915 {
1916         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1917                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1918 }
1919
1920 /**
1921  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1922  *
1923  * Fill the following info in the HEAD buffer of the Rx cluster:
1924  *    - RX port identifier
1925  *    - hardware offload data, if any:
1926  *      - RSS flag & hash
1927  *      - IP checksum flag
1928  *      - VLAN TCI, if any
1929  *      - error flags
1930  * @head HEAD of the packet cluster
1931  * @desc HW descriptor to get data from
1932  * @rxq Pointer to the Rx queue
1933  */
1934 static inline void
1935 ixgbe_fill_cluster_head_buf(
1936         struct rte_mbuf *head,
1937         union ixgbe_adv_rx_desc *desc,
1938         struct ixgbe_rx_queue *rxq,
1939         uint32_t staterr)
1940 {
1941         uint32_t pkt_info;
1942         uint64_t pkt_flags;
1943
1944         head->port = rxq->port_id;
1945
1946         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1947          * set in the pkt_flags field.
1948          */
1949         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1950         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1951         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1952         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1953         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1954         head->ol_flags = pkt_flags;
1955         head->packet_type =
1956                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1957
1958         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1959                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1960         else if (pkt_flags & PKT_RX_FDIR) {
1961                 head->hash.fdir.hash =
1962                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1963                                                           & IXGBE_ATR_HASH_MASK;
1964                 head->hash.fdir.id =
1965                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1966         }
1967 }
1968
1969 /**
1970  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1971  *
1972  * @rx_queue Rx queue handle
1973  * @rx_pkts table of received packets
1974  * @nb_pkts size of rx_pkts table
1975  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1976  *
1977  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1978  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1979  *
1980  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1981  * 1) When non-EOP RSC completion arrives:
1982  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1983  *       segment's data length.
1984  *    b) Set the "next" pointer of the current segment to point to the segment
1985  *       at the NEXTP index.
1986  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1987  *       in the sw_rsc_ring.
1988  * 2) When EOP arrives we just update the cluster's total length and offload
1989  *    flags and deliver the cluster up to the upper layers. In our case - put it
1990  *    in the rx_pkts table.
1991  *
1992  * Returns the number of received packets/clusters (according to the "bulk
1993  * receive" interface).
1994  */
1995 static inline uint16_t
1996 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1997                     bool bulk_alloc)
1998 {
1999         struct ixgbe_rx_queue *rxq = rx_queue;
2000         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2001         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2002         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2003         uint16_t rx_id = rxq->rx_tail;
2004         uint16_t nb_rx = 0;
2005         uint16_t nb_hold = rxq->nb_rx_hold;
2006         uint16_t prev_id = rxq->rx_tail;
2007
2008         while (nb_rx < nb_pkts) {
2009                 bool eop;
2010                 struct ixgbe_rx_entry *rxe;
2011                 struct ixgbe_scattered_rx_entry *sc_entry;
2012                 struct ixgbe_scattered_rx_entry *next_sc_entry;
2013                 struct ixgbe_rx_entry *next_rxe = NULL;
2014                 struct rte_mbuf *first_seg;
2015                 struct rte_mbuf *rxm;
2016                 struct rte_mbuf *nmb;
2017                 union ixgbe_adv_rx_desc rxd;
2018                 uint16_t data_len;
2019                 uint16_t next_id;
2020                 volatile union ixgbe_adv_rx_desc *rxdp;
2021                 uint32_t staterr;
2022
2023 next_desc:
2024                 /*
2025                  * The code in this whole file uses the volatile pointer to
2026                  * ensure the read ordering of the status and the rest of the
2027                  * descriptor fields (on the compiler level only!!!). This is so
2028                  * UGLY - why not to just use the compiler barrier instead? DPDK
2029                  * even has the rte_compiler_barrier() for that.
2030                  *
2031                  * But most importantly this is just wrong because this doesn't
2032                  * ensure memory ordering in a general case at all. For
2033                  * instance, DPDK is supposed to work on Power CPUs where
2034                  * compiler barrier may just not be enough!
2035                  *
2036                  * I tried to write only this function properly to have a
2037                  * starting point (as a part of an LRO/RSC series) but the
2038                  * compiler cursed at me when I tried to cast away the
2039                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2040                  * keeping it the way it is for now.
2041                  *
2042                  * The code in this file is broken in so many other places and
2043                  * will just not work on a big endian CPU anyway therefore the
2044                  * lines below will have to be revisited together with the rest
2045                  * of the ixgbe PMD.
2046                  *
2047                  * TODO:
2048                  *    - Get rid of "volatile" crap and let the compiler do its
2049                  *      job.
2050                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2051                  *      memory ordering below.
2052                  */
2053                 rxdp = &rx_ring[rx_id];
2054                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2055
2056                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2057                         break;
2058
2059                 rxd = *rxdp;
2060
2061                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2062                                   "staterr=0x%x data_len=%u",
2063                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2064                            rte_le_to_cpu_16(rxd.wb.upper.length));
2065
2066                 if (!bulk_alloc) {
2067                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2068                         if (nmb == NULL) {
2069                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2070                                                   "port_id=%u queue_id=%u",
2071                                            rxq->port_id, rxq->queue_id);
2072
2073                                 rte_eth_devices[rxq->port_id].data->
2074                                                         rx_mbuf_alloc_failed++;
2075                                 break;
2076                         }
2077                 } else if (nb_hold > rxq->rx_free_thresh) {
2078                         uint16_t next_rdt = rxq->rx_free_trigger;
2079
2080                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2081                                 rte_wmb();
2082                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2083                                                             next_rdt);
2084                                 nb_hold -= rxq->rx_free_thresh;
2085                         } else {
2086                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2087                                                   "port_id=%u queue_id=%u",
2088                                            rxq->port_id, rxq->queue_id);
2089
2090                                 rte_eth_devices[rxq->port_id].data->
2091                                                         rx_mbuf_alloc_failed++;
2092                                 break;
2093                         }
2094                 }
2095
2096                 nb_hold++;
2097                 rxe = &sw_ring[rx_id];
2098                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2099
2100                 next_id = rx_id + 1;
2101                 if (next_id == rxq->nb_rx_desc)
2102                         next_id = 0;
2103
2104                 /* Prefetch next mbuf while processing current one. */
2105                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2106
2107                 /*
2108                  * When next RX descriptor is on a cache-line boundary,
2109                  * prefetch the next 4 RX descriptors and the next 4 pointers
2110                  * to mbufs.
2111                  */
2112                 if ((next_id & 0x3) == 0) {
2113                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2114                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2115                 }
2116
2117                 rxm = rxe->mbuf;
2118
2119                 if (!bulk_alloc) {
2120                         __le64 dma =
2121                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2122                         /*
2123                          * Update RX descriptor with the physical address of the
2124                          * new data buffer of the new allocated mbuf.
2125                          */
2126                         rxe->mbuf = nmb;
2127
2128                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2129                         rxdp->read.hdr_addr = 0;
2130                         rxdp->read.pkt_addr = dma;
2131                 } else
2132                         rxe->mbuf = NULL;
2133
2134                 /*
2135                  * Set data length & data buffer address of mbuf.
2136                  */
2137                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2138                 rxm->data_len = data_len;
2139
2140                 if (!eop) {
2141                         uint16_t nextp_id;
2142                         /*
2143                          * Get next descriptor index:
2144                          *  - For RSC it's in the NEXTP field.
2145                          *  - For a scattered packet - it's just a following
2146                          *    descriptor.
2147                          */
2148                         if (ixgbe_rsc_count(&rxd))
2149                                 nextp_id =
2150                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2151                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2152                         else
2153                                 nextp_id = next_id;
2154
2155                         next_sc_entry = &sw_sc_ring[nextp_id];
2156                         next_rxe = &sw_ring[nextp_id];
2157                         rte_ixgbe_prefetch(next_rxe);
2158                 }
2159
2160                 sc_entry = &sw_sc_ring[rx_id];
2161                 first_seg = sc_entry->fbuf;
2162                 sc_entry->fbuf = NULL;
2163
2164                 /*
2165                  * If this is the first buffer of the received packet,
2166                  * set the pointer to the first mbuf of the packet and
2167                  * initialize its context.
2168                  * Otherwise, update the total length and the number of segments
2169                  * of the current scattered packet, and update the pointer to
2170                  * the last mbuf of the current packet.
2171                  */
2172                 if (first_seg == NULL) {
2173                         first_seg = rxm;
2174                         first_seg->pkt_len = data_len;
2175                         first_seg->nb_segs = 1;
2176                 } else {
2177                         first_seg->pkt_len += data_len;
2178                         first_seg->nb_segs++;
2179                 }
2180
2181                 prev_id = rx_id;
2182                 rx_id = next_id;
2183
2184                 /*
2185                  * If this is not the last buffer of the received packet, update
2186                  * the pointer to the first mbuf at the NEXTP entry in the
2187                  * sw_sc_ring and continue to parse the RX ring.
2188                  */
2189                 if (!eop && next_rxe) {
2190                         rxm->next = next_rxe->mbuf;
2191                         next_sc_entry->fbuf = first_seg;
2192                         goto next_desc;
2193                 }
2194
2195                 /* Initialize the first mbuf of the returned packet */
2196                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2197
2198                 /*
2199                  * Deal with the case, when HW CRC srip is disabled.
2200                  * That can't happen when LRO is enabled, but still could
2201                  * happen for scattered RX mode.
2202                  */
2203                 first_seg->pkt_len -= rxq->crc_len;
2204                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2205                         struct rte_mbuf *lp;
2206
2207                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2208                                 ;
2209
2210                         first_seg->nb_segs--;
2211                         lp->data_len -= rxq->crc_len - rxm->data_len;
2212                         lp->next = NULL;
2213                         rte_pktmbuf_free_seg(rxm);
2214                 } else
2215                         rxm->data_len -= rxq->crc_len;
2216
2217                 /* Prefetch data of first segment, if configured to do so. */
2218                 rte_packet_prefetch((char *)first_seg->buf_addr +
2219                         first_seg->data_off);
2220
2221                 /*
2222                  * Store the mbuf address into the next entry of the array
2223                  * of returned packets.
2224                  */
2225                 rx_pkts[nb_rx++] = first_seg;
2226         }
2227
2228         /*
2229          * Record index of the next RX descriptor to probe.
2230          */
2231         rxq->rx_tail = rx_id;
2232
2233         /*
2234          * If the number of free RX descriptors is greater than the RX free
2235          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2236          * register.
2237          * Update the RDT with the value of the last processed RX descriptor
2238          * minus 1, to guarantee that the RDT register is never equal to the
2239          * RDH register, which creates a "full" ring situtation from the
2240          * hardware point of view...
2241          */
2242         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2243                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2244                            "nb_hold=%u nb_rx=%u",
2245                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2246
2247                 rte_wmb();
2248                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2249                 nb_hold = 0;
2250         }
2251
2252         rxq->nb_rx_hold = nb_hold;
2253         return nb_rx;
2254 }
2255
2256 uint16_t
2257 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2258                                  uint16_t nb_pkts)
2259 {
2260         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2261 }
2262
2263 uint16_t
2264 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2265                                uint16_t nb_pkts)
2266 {
2267         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2268 }
2269
2270 /*********************************************************************
2271  *
2272  *  Queue management functions
2273  *
2274  **********************************************************************/
2275
2276 static void __attribute__((cold))
2277 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2278 {
2279         unsigned i;
2280
2281         if (txq->sw_ring != NULL) {
2282                 for (i = 0; i < txq->nb_tx_desc; i++) {
2283                         if (txq->sw_ring[i].mbuf != NULL) {
2284                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2285                                 txq->sw_ring[i].mbuf = NULL;
2286                         }
2287                 }
2288         }
2289 }
2290
2291 static void __attribute__((cold))
2292 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2293 {
2294         if (txq != NULL &&
2295             txq->sw_ring != NULL)
2296                 rte_free(txq->sw_ring);
2297 }
2298
2299 static void __attribute__((cold))
2300 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2301 {
2302         if (txq != NULL && txq->ops != NULL) {
2303                 txq->ops->release_mbufs(txq);
2304                 txq->ops->free_swring(txq);
2305                 rte_free(txq);
2306         }
2307 }
2308
2309 void __attribute__((cold))
2310 ixgbe_dev_tx_queue_release(void *txq)
2311 {
2312         ixgbe_tx_queue_release(txq);
2313 }
2314
2315 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2316 static void __attribute__((cold))
2317 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2318 {
2319         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2320         struct ixgbe_tx_entry *txe = txq->sw_ring;
2321         uint16_t prev, i;
2322
2323         /* Zero out HW ring memory */
2324         for (i = 0; i < txq->nb_tx_desc; i++) {
2325                 txq->tx_ring[i] = zeroed_desc;
2326         }
2327
2328         /* Initialize SW ring entries */
2329         prev = (uint16_t) (txq->nb_tx_desc - 1);
2330         for (i = 0; i < txq->nb_tx_desc; i++) {
2331                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2332
2333                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2334                 txe[i].mbuf = NULL;
2335                 txe[i].last_id = i;
2336                 txe[prev].next_id = i;
2337                 prev = i;
2338         }
2339
2340         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2341         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2342
2343         txq->tx_tail = 0;
2344         txq->nb_tx_used = 0;
2345         /*
2346          * Always allow 1 descriptor to be un-allocated to avoid
2347          * a H/W race condition
2348          */
2349         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2350         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2351         txq->ctx_curr = 0;
2352         memset((void *)&txq->ctx_cache, 0,
2353                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2354 }
2355
2356 static const struct ixgbe_txq_ops def_txq_ops = {
2357         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2358         .free_swring = ixgbe_tx_free_swring,
2359         .reset = ixgbe_reset_tx_queue,
2360 };
2361
2362 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2363  * the queue parameters. Used in tx_queue_setup by primary process and then
2364  * in dev_init by secondary process when attaching to an existing ethdev.
2365  */
2366 void __attribute__((cold))
2367 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2368 {
2369         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2370         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2371                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2372                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2373                 dev->tx_pkt_prepare = NULL;
2374 #ifdef RTE_IXGBE_INC_VECTOR
2375                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2376                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2377                                         ixgbe_txq_vec_setup(txq) == 0)) {
2378                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2379                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2380                 } else
2381 #endif
2382                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2383         } else {
2384                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2385                 PMD_INIT_LOG(DEBUG,
2386                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2387                                 (unsigned long)txq->txq_flags,
2388                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2389                 PMD_INIT_LOG(DEBUG,
2390                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2391                                 (unsigned long)txq->tx_rs_thresh,
2392                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2393                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2394                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2395         }
2396 }
2397
2398 int __attribute__((cold))
2399 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2400                          uint16_t queue_idx,
2401                          uint16_t nb_desc,
2402                          unsigned int socket_id,
2403                          const struct rte_eth_txconf *tx_conf)
2404 {
2405         const struct rte_memzone *tz;
2406         struct ixgbe_tx_queue *txq;
2407         struct ixgbe_hw     *hw;
2408         uint16_t tx_rs_thresh, tx_free_thresh;
2409
2410         PMD_INIT_FUNC_TRACE();
2411         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2412
2413         /*
2414          * Validate number of transmit descriptors.
2415          * It must not exceed hardware maximum, and must be multiple
2416          * of IXGBE_ALIGN.
2417          */
2418         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2419                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2420                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2421                 return -EINVAL;
2422         }
2423
2424         /*
2425          * The following two parameters control the setting of the RS bit on
2426          * transmit descriptors.
2427          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2428          * descriptors have been used.
2429          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2430          * descriptors are used or if the number of descriptors required
2431          * to transmit a packet is greater than the number of free TX
2432          * descriptors.
2433          * The following constraints must be satisfied:
2434          *  tx_rs_thresh must be greater than 0.
2435          *  tx_rs_thresh must be less than the size of the ring minus 2.
2436          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2437          *  tx_rs_thresh must be a divisor of the ring size.
2438          *  tx_free_thresh must be greater than 0.
2439          *  tx_free_thresh must be less than the size of the ring minus 3.
2440          * One descriptor in the TX ring is used as a sentinel to avoid a
2441          * H/W race condition, hence the maximum threshold constraints.
2442          * When set to zero use default values.
2443          */
2444         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2445                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2446         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2447                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2448         if (tx_rs_thresh >= (nb_desc - 2)) {
2449                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2450                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2451                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2452                         (int)dev->data->port_id, (int)queue_idx);
2453                 return -(EINVAL);
2454         }
2455         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2456                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2457                         "(tx_rs_thresh=%u port=%d queue=%d)",
2458                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2459                         (int)dev->data->port_id, (int)queue_idx);
2460                 return -(EINVAL);
2461         }
2462         if (tx_free_thresh >= (nb_desc - 3)) {
2463                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2464                              "tx_free_thresh must be less than the number of "
2465                              "TX descriptors minus 3. (tx_free_thresh=%u "
2466                              "port=%d queue=%d)",
2467                              (unsigned int)tx_free_thresh,
2468                              (int)dev->data->port_id, (int)queue_idx);
2469                 return -(EINVAL);
2470         }
2471         if (tx_rs_thresh > tx_free_thresh) {
2472                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2473                              "tx_free_thresh. (tx_free_thresh=%u "
2474                              "tx_rs_thresh=%u port=%d queue=%d)",
2475                              (unsigned int)tx_free_thresh,
2476                              (unsigned int)tx_rs_thresh,
2477                              (int)dev->data->port_id,
2478                              (int)queue_idx);
2479                 return -(EINVAL);
2480         }
2481         if ((nb_desc % tx_rs_thresh) != 0) {
2482                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2483                              "number of TX descriptors. (tx_rs_thresh=%u "
2484                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2485                              (int)dev->data->port_id, (int)queue_idx);
2486                 return -(EINVAL);
2487         }
2488
2489         /*
2490          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2491          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2492          * by the NIC and all descriptors are written back after the NIC
2493          * accumulates WTHRESH descriptors.
2494          */
2495         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2496                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2497                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2498                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2499                              (int)dev->data->port_id, (int)queue_idx);
2500                 return -(EINVAL);
2501         }
2502
2503         /* Free memory prior to re-allocation if needed... */
2504         if (dev->data->tx_queues[queue_idx] != NULL) {
2505                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2506                 dev->data->tx_queues[queue_idx] = NULL;
2507         }
2508
2509         /* First allocate the tx queue data structure */
2510         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2511                                  RTE_CACHE_LINE_SIZE, socket_id);
2512         if (txq == NULL)
2513                 return -ENOMEM;
2514
2515         /*
2516          * Allocate TX ring hardware descriptors. A memzone large enough to
2517          * handle the maximum ring size is allocated in order to allow for
2518          * resizing in later calls to the queue setup function.
2519          */
2520         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2521                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2522                         IXGBE_ALIGN, socket_id);
2523         if (tz == NULL) {
2524                 ixgbe_tx_queue_release(txq);
2525                 return -ENOMEM;
2526         }
2527
2528         txq->nb_tx_desc = nb_desc;
2529         txq->tx_rs_thresh = tx_rs_thresh;
2530         txq->tx_free_thresh = tx_free_thresh;
2531         txq->pthresh = tx_conf->tx_thresh.pthresh;
2532         txq->hthresh = tx_conf->tx_thresh.hthresh;
2533         txq->wthresh = tx_conf->tx_thresh.wthresh;
2534         txq->queue_id = queue_idx;
2535         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2536                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2537         txq->port_id = dev->data->port_id;
2538         txq->txq_flags = tx_conf->txq_flags;
2539         txq->ops = &def_txq_ops;
2540         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2541
2542         /*
2543          * Modification to set VFTDT for virtual function if vf is detected
2544          */
2545         if (hw->mac.type == ixgbe_mac_82599_vf ||
2546             hw->mac.type == ixgbe_mac_X540_vf ||
2547             hw->mac.type == ixgbe_mac_X550_vf ||
2548             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2549             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2550                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2551         else
2552                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2553
2554         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2555         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2556
2557         /* Allocate software ring */
2558         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2559                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2560                                 RTE_CACHE_LINE_SIZE, socket_id);
2561         if (txq->sw_ring == NULL) {
2562                 ixgbe_tx_queue_release(txq);
2563                 return -ENOMEM;
2564         }
2565         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2566                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2567
2568         /* set up vector or scalar TX function as appropriate */
2569         ixgbe_set_tx_function(dev, txq);
2570
2571         txq->ops->reset(txq);
2572
2573         dev->data->tx_queues[queue_idx] = txq;
2574
2575
2576         return 0;
2577 }
2578
2579 /**
2580  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2581  *
2582  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2583  * in the sw_rsc_ring is not set to NULL but rather points to the next
2584  * mbuf of this RSC aggregation (that has not been completed yet and still
2585  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2586  * will just free first "nb_segs" segments of the cluster explicitly by calling
2587  * an rte_pktmbuf_free_seg().
2588  *
2589  * @m scattered cluster head
2590  */
2591 static void __attribute__((cold))
2592 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2593 {
2594         uint8_t i, nb_segs = m->nb_segs;
2595         struct rte_mbuf *next_seg;
2596
2597         for (i = 0; i < nb_segs; i++) {
2598                 next_seg = m->next;
2599                 rte_pktmbuf_free_seg(m);
2600                 m = next_seg;
2601         }
2602 }
2603
2604 static void __attribute__((cold))
2605 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2606 {
2607         unsigned i;
2608
2609 #ifdef RTE_IXGBE_INC_VECTOR
2610         /* SSE Vector driver has a different way of releasing mbufs. */
2611         if (rxq->rx_using_sse) {
2612                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2613                 return;
2614         }
2615 #endif
2616
2617         if (rxq->sw_ring != NULL) {
2618                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2619                         if (rxq->sw_ring[i].mbuf != NULL) {
2620                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2621                                 rxq->sw_ring[i].mbuf = NULL;
2622                         }
2623                 }
2624                 if (rxq->rx_nb_avail) {
2625                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2626                                 struct rte_mbuf *mb;
2627
2628                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2629                                 rte_pktmbuf_free_seg(mb);
2630                         }
2631                         rxq->rx_nb_avail = 0;
2632                 }
2633         }
2634
2635         if (rxq->sw_sc_ring)
2636                 for (i = 0; i < rxq->nb_rx_desc; i++)
2637                         if (rxq->sw_sc_ring[i].fbuf) {
2638                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2639                                 rxq->sw_sc_ring[i].fbuf = NULL;
2640                         }
2641 }
2642
2643 static void __attribute__((cold))
2644 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2645 {
2646         if (rxq != NULL) {
2647                 ixgbe_rx_queue_release_mbufs(rxq);
2648                 rte_free(rxq->sw_ring);
2649                 rte_free(rxq->sw_sc_ring);
2650                 rte_free(rxq);
2651         }
2652 }
2653
2654 void __attribute__((cold))
2655 ixgbe_dev_rx_queue_release(void *rxq)
2656 {
2657         ixgbe_rx_queue_release(rxq);
2658 }
2659
2660 /*
2661  * Check if Rx Burst Bulk Alloc function can be used.
2662  * Return
2663  *        0: the preconditions are satisfied and the bulk allocation function
2664  *           can be used.
2665  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2666  *           function must be used.
2667  */
2668 static inline int __attribute__((cold))
2669 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2670 {
2671         int ret = 0;
2672
2673         /*
2674          * Make sure the following pre-conditions are satisfied:
2675          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2676          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2677          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2678          * Scattered packets are not supported.  This should be checked
2679          * outside of this function.
2680          */
2681         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2682                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2683                              "rxq->rx_free_thresh=%d, "
2684                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2685                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2686                 ret = -EINVAL;
2687         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2688                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2689                              "rxq->rx_free_thresh=%d, "
2690                              "rxq->nb_rx_desc=%d",
2691                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2692                 ret = -EINVAL;
2693         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2694                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2695                              "rxq->nb_rx_desc=%d, "
2696                              "rxq->rx_free_thresh=%d",
2697                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2698                 ret = -EINVAL;
2699         }
2700
2701         return ret;
2702 }
2703
2704 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2705 static void __attribute__((cold))
2706 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2707 {
2708         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2709         unsigned i;
2710         uint16_t len = rxq->nb_rx_desc;
2711
2712         /*
2713          * By default, the Rx queue setup function allocates enough memory for
2714          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2715          * extra memory at the end of the descriptor ring to be zero'd out.
2716          */
2717         if (adapter->rx_bulk_alloc_allowed)
2718                 /* zero out extra memory */
2719                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2720
2721         /*
2722          * Zero out HW ring memory. Zero out extra memory at the end of
2723          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2724          * reads extra memory as zeros.
2725          */
2726         for (i = 0; i < len; i++) {
2727                 rxq->rx_ring[i] = zeroed_desc;
2728         }
2729
2730         /*
2731          * initialize extra software ring entries. Space for these extra
2732          * entries is always allocated
2733          */
2734         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2735         for (i = rxq->nb_rx_desc; i < len; ++i) {
2736                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2737         }
2738
2739         rxq->rx_nb_avail = 0;
2740         rxq->rx_next_avail = 0;
2741         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2742         rxq->rx_tail = 0;
2743         rxq->nb_rx_hold = 0;
2744         rxq->pkt_first_seg = NULL;
2745         rxq->pkt_last_seg = NULL;
2746
2747 #ifdef RTE_IXGBE_INC_VECTOR
2748         rxq->rxrearm_start = 0;
2749         rxq->rxrearm_nb = 0;
2750 #endif
2751 }
2752
2753 int __attribute__((cold))
2754 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2755                          uint16_t queue_idx,
2756                          uint16_t nb_desc,
2757                          unsigned int socket_id,
2758                          const struct rte_eth_rxconf *rx_conf,
2759                          struct rte_mempool *mp)
2760 {
2761         const struct rte_memzone *rz;
2762         struct ixgbe_rx_queue *rxq;
2763         struct ixgbe_hw     *hw;
2764         uint16_t len;
2765         struct ixgbe_adapter *adapter =
2766                 (struct ixgbe_adapter *)dev->data->dev_private;
2767
2768         PMD_INIT_FUNC_TRACE();
2769         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2770
2771         /*
2772          * Validate number of receive descriptors.
2773          * It must not exceed hardware maximum, and must be multiple
2774          * of IXGBE_ALIGN.
2775          */
2776         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2777                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2778                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2779                 return -EINVAL;
2780         }
2781
2782         /* Free memory prior to re-allocation if needed... */
2783         if (dev->data->rx_queues[queue_idx] != NULL) {
2784                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2785                 dev->data->rx_queues[queue_idx] = NULL;
2786         }
2787
2788         /* First allocate the rx queue data structure */
2789         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2790                                  RTE_CACHE_LINE_SIZE, socket_id);
2791         if (rxq == NULL)
2792                 return -ENOMEM;
2793         rxq->mb_pool = mp;
2794         rxq->nb_rx_desc = nb_desc;
2795         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2796         rxq->queue_id = queue_idx;
2797         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2798                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2799         rxq->port_id = dev->data->port_id;
2800         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2801                                                         0 : ETHER_CRC_LEN);
2802         rxq->drop_en = rx_conf->rx_drop_en;
2803         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2804
2805         /*
2806          * The packet type in RX descriptor is different for different NICs.
2807          * Some bits are used for x550 but reserved for other NICS.
2808          * So set different masks for different NICs.
2809          */
2810         if (hw->mac.type == ixgbe_mac_X550 ||
2811             hw->mac.type == ixgbe_mac_X550EM_x ||
2812             hw->mac.type == ixgbe_mac_X550EM_a ||
2813             hw->mac.type == ixgbe_mac_X550_vf ||
2814             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2815             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2816                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2817         else
2818                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2819
2820         /*
2821          * Allocate RX ring hardware descriptors. A memzone large enough to
2822          * handle the maximum ring size is allocated in order to allow for
2823          * resizing in later calls to the queue setup function.
2824          */
2825         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2826                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2827         if (rz == NULL) {
2828                 ixgbe_rx_queue_release(rxq);
2829                 return -ENOMEM;
2830         }
2831
2832         /*
2833          * Zero init all the descriptors in the ring.
2834          */
2835         memset(rz->addr, 0, RX_RING_SZ);
2836
2837         /*
2838          * Modified to setup VFRDT for Virtual Function
2839          */
2840         if (hw->mac.type == ixgbe_mac_82599_vf ||
2841             hw->mac.type == ixgbe_mac_X540_vf ||
2842             hw->mac.type == ixgbe_mac_X550_vf ||
2843             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2844             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2845                 rxq->rdt_reg_addr =
2846                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2847                 rxq->rdh_reg_addr =
2848                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2849         } else {
2850                 rxq->rdt_reg_addr =
2851                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2852                 rxq->rdh_reg_addr =
2853                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2854         }
2855
2856         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2857         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2858
2859         /*
2860          * Certain constraints must be met in order to use the bulk buffer
2861          * allocation Rx burst function. If any of Rx queues doesn't meet them
2862          * the feature should be disabled for the whole port.
2863          */
2864         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2865                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2866                                     "preconditions - canceling the feature for "
2867                                     "the whole port[%d]",
2868                              rxq->queue_id, rxq->port_id);
2869                 adapter->rx_bulk_alloc_allowed = false;
2870         }
2871
2872         /*
2873          * Allocate software ring. Allow for space at the end of the
2874          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2875          * function does not access an invalid memory region.
2876          */
2877         len = nb_desc;
2878         if (adapter->rx_bulk_alloc_allowed)
2879                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2880
2881         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2882                                           sizeof(struct ixgbe_rx_entry) * len,
2883                                           RTE_CACHE_LINE_SIZE, socket_id);
2884         if (!rxq->sw_ring) {
2885                 ixgbe_rx_queue_release(rxq);
2886                 return -ENOMEM;
2887         }
2888
2889         /*
2890          * Always allocate even if it's not going to be needed in order to
2891          * simplify the code.
2892          *
2893          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2894          * be requested in ixgbe_dev_rx_init(), which is called later from
2895          * dev_start() flow.
2896          */
2897         rxq->sw_sc_ring =
2898                 rte_zmalloc_socket("rxq->sw_sc_ring",
2899                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2900                                    RTE_CACHE_LINE_SIZE, socket_id);
2901         if (!rxq->sw_sc_ring) {
2902                 ixgbe_rx_queue_release(rxq);
2903                 return -ENOMEM;
2904         }
2905
2906         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2907                             "dma_addr=0x%"PRIx64,
2908                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2909                      rxq->rx_ring_phys_addr);
2910
2911         if (!rte_is_power_of_2(nb_desc)) {
2912                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2913                                     "preconditions - canceling the feature for "
2914                                     "the whole port[%d]",
2915                              rxq->queue_id, rxq->port_id);
2916                 adapter->rx_vec_allowed = false;
2917         } else
2918                 ixgbe_rxq_vec_setup(rxq);
2919
2920         dev->data->rx_queues[queue_idx] = rxq;
2921
2922         ixgbe_reset_rx_queue(adapter, rxq);
2923
2924         return 0;
2925 }
2926
2927 uint32_t
2928 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2929 {
2930 #define IXGBE_RXQ_SCAN_INTERVAL 4
2931         volatile union ixgbe_adv_rx_desc *rxdp;
2932         struct ixgbe_rx_queue *rxq;
2933         uint32_t desc = 0;
2934
2935         rxq = dev->data->rx_queues[rx_queue_id];
2936         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2937
2938         while ((desc < rxq->nb_rx_desc) &&
2939                 (rxdp->wb.upper.status_error &
2940                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2941                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2942                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2943                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2944                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2945                                 desc - rxq->nb_rx_desc]);
2946         }
2947
2948         return desc;
2949 }
2950
2951 int
2952 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2953 {
2954         volatile union ixgbe_adv_rx_desc *rxdp;
2955         struct ixgbe_rx_queue *rxq = rx_queue;
2956         uint32_t desc;
2957
2958         if (unlikely(offset >= rxq->nb_rx_desc))
2959                 return 0;
2960         desc = rxq->rx_tail + offset;
2961         if (desc >= rxq->nb_rx_desc)
2962                 desc -= rxq->nb_rx_desc;
2963
2964         rxdp = &rxq->rx_ring[desc];
2965         return !!(rxdp->wb.upper.status_error &
2966                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2967 }
2968
2969 int
2970 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
2971 {
2972         struct ixgbe_rx_queue *rxq = rx_queue;
2973         volatile uint32_t *status;
2974         uint32_t nb_hold, desc;
2975
2976         if (unlikely(offset >= rxq->nb_rx_desc))
2977                 return -EINVAL;
2978
2979 #ifdef RTE_IXGBE_INC_VECTOR
2980         if (rxq->rx_using_sse)
2981                 nb_hold = rxq->rxrearm_nb;
2982         else
2983 #endif
2984                 nb_hold = rxq->nb_rx_hold;
2985         if (offset >= rxq->nb_rx_desc - nb_hold)
2986                 return RTE_ETH_RX_DESC_UNAVAIL;
2987
2988         desc = rxq->rx_tail + offset;
2989         if (desc >= rxq->nb_rx_desc)
2990                 desc -= rxq->nb_rx_desc;
2991
2992         status = &rxq->rx_ring[desc].wb.upper.status_error;
2993         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
2994                 return RTE_ETH_RX_DESC_DONE;
2995
2996         return RTE_ETH_RX_DESC_AVAIL;
2997 }
2998
2999 int
3000 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3001 {
3002         struct ixgbe_tx_queue *txq = tx_queue;
3003         volatile uint32_t *status;
3004         uint32_t desc;
3005
3006         if (unlikely(offset >= txq->nb_tx_desc))
3007                 return -EINVAL;
3008
3009         desc = txq->tx_tail + offset;
3010         /* go to next desc that has the RS bit */
3011         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3012                 txq->tx_rs_thresh;
3013         if (desc >= txq->nb_tx_desc) {
3014                 desc -= txq->nb_tx_desc;
3015                 if (desc >= txq->nb_tx_desc)
3016                         desc -= txq->nb_tx_desc;
3017         }
3018
3019         status = &txq->tx_ring[desc].wb.status;
3020         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3021                 return RTE_ETH_TX_DESC_DONE;
3022
3023         return RTE_ETH_TX_DESC_FULL;
3024 }
3025
3026 void __attribute__((cold))
3027 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3028 {
3029         unsigned i;
3030         struct ixgbe_adapter *adapter =
3031                 (struct ixgbe_adapter *)dev->data->dev_private;
3032
3033         PMD_INIT_FUNC_TRACE();
3034
3035         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3036                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3037
3038                 if (txq != NULL) {
3039                         txq->ops->release_mbufs(txq);
3040                         txq->ops->reset(txq);
3041                 }
3042         }
3043
3044         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3045                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3046
3047                 if (rxq != NULL) {
3048                         ixgbe_rx_queue_release_mbufs(rxq);
3049                         ixgbe_reset_rx_queue(adapter, rxq);
3050                 }
3051         }
3052 }
3053
3054 void
3055 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3056 {
3057         unsigned i;
3058
3059         PMD_INIT_FUNC_TRACE();
3060
3061         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3062                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3063                 dev->data->rx_queues[i] = NULL;
3064         }
3065         dev->data->nb_rx_queues = 0;
3066
3067         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3068                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3069                 dev->data->tx_queues[i] = NULL;
3070         }
3071         dev->data->nb_tx_queues = 0;
3072 }
3073
3074 /*********************************************************************
3075  *
3076  *  Device RX/TX init functions
3077  *
3078  **********************************************************************/
3079
3080 /**
3081  * Receive Side Scaling (RSS)
3082  * See section 7.1.2.8 in the following document:
3083  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3084  *
3085  * Principles:
3086  * The source and destination IP addresses of the IP header and the source
3087  * and destination ports of TCP/UDP headers, if any, of received packets are
3088  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3089  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3090  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3091  * RSS output index which is used as the RX queue index where to store the
3092  * received packets.
3093  * The following output is supplied in the RX write-back descriptor:
3094  *     - 32-bit result of the Microsoft RSS hash function,
3095  *     - 4-bit RSS type field.
3096  */
3097
3098 /*
3099  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3100  * Used as the default key.
3101  */
3102 static uint8_t rss_intel_key[40] = {
3103         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3104         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3105         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3106         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3107         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3108 };
3109
3110 static void
3111 ixgbe_rss_disable(struct rte_eth_dev *dev)
3112 {
3113         struct ixgbe_hw *hw;
3114         uint32_t mrqc;
3115         uint32_t mrqc_reg;
3116
3117         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3118         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3119         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3120         mrqc &= ~IXGBE_MRQC_RSSEN;
3121         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3122 }
3123
3124 static void
3125 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3126 {
3127         uint8_t  *hash_key;
3128         uint32_t mrqc;
3129         uint32_t rss_key;
3130         uint64_t rss_hf;
3131         uint16_t i;
3132         uint32_t mrqc_reg;
3133         uint32_t rssrk_reg;
3134
3135         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3136         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3137
3138         hash_key = rss_conf->rss_key;
3139         if (hash_key != NULL) {
3140                 /* Fill in RSS hash key */
3141                 for (i = 0; i < 10; i++) {
3142                         rss_key  = hash_key[(i * 4)];
3143                         rss_key |= hash_key[(i * 4) + 1] << 8;
3144                         rss_key |= hash_key[(i * 4) + 2] << 16;
3145                         rss_key |= hash_key[(i * 4) + 3] << 24;
3146                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3147                 }
3148         }
3149
3150         /* Set configured hashing protocols in MRQC register */
3151         rss_hf = rss_conf->rss_hf;
3152         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3153         if (rss_hf & ETH_RSS_IPV4)
3154                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3155         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3156                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3157         if (rss_hf & ETH_RSS_IPV6)
3158                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3159         if (rss_hf & ETH_RSS_IPV6_EX)
3160                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3161         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3162                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3163         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3164                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3165         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3166                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3167         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3168                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3169         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3170                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3171         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3172 }
3173
3174 int
3175 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3176                           struct rte_eth_rss_conf *rss_conf)
3177 {
3178         struct ixgbe_hw *hw;
3179         uint32_t mrqc;
3180         uint64_t rss_hf;
3181         uint32_t mrqc_reg;
3182
3183         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3184
3185         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3186                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3187                         "NIC.");
3188                 return -ENOTSUP;
3189         }
3190         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3191
3192         /*
3193          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3194          *     "RSS enabling cannot be done dynamically while it must be
3195          *      preceded by a software reset"
3196          * Before changing anything, first check that the update RSS operation
3197          * does not attempt to disable RSS, if RSS was enabled at
3198          * initialization time, or does not attempt to enable RSS, if RSS was
3199          * disabled at initialization time.
3200          */
3201         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3202         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3203         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3204                 if (rss_hf != 0) /* Enable RSS */
3205                         return -(EINVAL);
3206                 return 0; /* Nothing to do */
3207         }
3208         /* RSS enabled */
3209         if (rss_hf == 0) /* Disable RSS */
3210                 return -(EINVAL);
3211         ixgbe_hw_rss_hash_set(hw, rss_conf);
3212         return 0;
3213 }
3214
3215 int
3216 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3217                             struct rte_eth_rss_conf *rss_conf)
3218 {
3219         struct ixgbe_hw *hw;
3220         uint8_t *hash_key;
3221         uint32_t mrqc;
3222         uint32_t rss_key;
3223         uint64_t rss_hf;
3224         uint16_t i;
3225         uint32_t mrqc_reg;
3226         uint32_t rssrk_reg;
3227
3228         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3229         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3230         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3231         hash_key = rss_conf->rss_key;
3232         if (hash_key != NULL) {
3233                 /* Return RSS hash key */
3234                 for (i = 0; i < 10; i++) {
3235                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3236                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3237                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3238                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3239                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3240                 }
3241         }
3242
3243         /* Get RSS functions configured in MRQC register */
3244         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3245         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3246                 rss_conf->rss_hf = 0;
3247                 return 0;
3248         }
3249         rss_hf = 0;
3250         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3251                 rss_hf |= ETH_RSS_IPV4;
3252         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3253                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3254         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3255                 rss_hf |= ETH_RSS_IPV6;
3256         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3257                 rss_hf |= ETH_RSS_IPV6_EX;
3258         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3259                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3260         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3261                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3262         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3263                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3264         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3265                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3266         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3267                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3268         rss_conf->rss_hf = rss_hf;
3269         return 0;
3270 }
3271
3272 static void
3273 ixgbe_rss_configure(struct rte_eth_dev *dev)
3274 {
3275         struct rte_eth_rss_conf rss_conf;
3276         struct ixgbe_hw *hw;
3277         uint32_t reta;
3278         uint16_t i;
3279         uint16_t j;
3280         uint16_t sp_reta_size;
3281         uint32_t reta_reg;
3282
3283         PMD_INIT_FUNC_TRACE();
3284         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3285
3286         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3287
3288         /*
3289          * Fill in redirection table
3290          * The byte-swap is needed because NIC registers are in
3291          * little-endian order.
3292          */
3293         reta = 0;
3294         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3295                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3296
3297                 if (j == dev->data->nb_rx_queues)
3298                         j = 0;
3299                 reta = (reta << 8) | j;
3300                 if ((i & 3) == 3)
3301                         IXGBE_WRITE_REG(hw, reta_reg,
3302                                         rte_bswap32(reta));
3303         }
3304
3305         /*
3306          * Configure the RSS key and the RSS protocols used to compute
3307          * the RSS hash of input packets.
3308          */
3309         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3310         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3311                 ixgbe_rss_disable(dev);
3312                 return;
3313         }
3314         if (rss_conf.rss_key == NULL)
3315                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3316         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3317 }
3318
3319 #define NUM_VFTA_REGISTERS 128
3320 #define NIC_RX_BUFFER_SIZE 0x200
3321 #define X550_RX_BUFFER_SIZE 0x180
3322
3323 static void
3324 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3325 {
3326         struct rte_eth_vmdq_dcb_conf *cfg;
3327         struct ixgbe_hw *hw;
3328         enum rte_eth_nb_pools num_pools;
3329         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3330         uint16_t pbsize;
3331         uint8_t nb_tcs; /* number of traffic classes */
3332         int i;
3333
3334         PMD_INIT_FUNC_TRACE();
3335         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3336         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3337         num_pools = cfg->nb_queue_pools;
3338         /* Check we have a valid number of pools */
3339         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3340                 ixgbe_rss_disable(dev);
3341                 return;
3342         }
3343         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3344         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3345
3346         /*
3347          * RXPBSIZE
3348          * split rx buffer up into sections, each for 1 traffic class
3349          */
3350         switch (hw->mac.type) {
3351         case ixgbe_mac_X550:
3352         case ixgbe_mac_X550EM_x:
3353         case ixgbe_mac_X550EM_a:
3354                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3355                 break;
3356         default:
3357                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3358                 break;
3359         }
3360         for (i = 0; i < nb_tcs; i++) {
3361                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3362
3363                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3364                 /* clear 10 bits. */
3365                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3366                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3367         }
3368         /* zero alloc all unused TCs */
3369         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3370                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3371
3372                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3373                 /* clear 10 bits. */
3374                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3375         }
3376
3377         /* MRQC: enable vmdq and dcb */
3378         mrqc = (num_pools == ETH_16_POOLS) ?
3379                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3380         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3381
3382         /* PFVTCTL: turn on virtualisation and set the default pool */
3383         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3384         if (cfg->enable_default_pool) {
3385                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3386         } else {
3387                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3388         }
3389
3390         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3391
3392         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3393         queue_mapping = 0;
3394         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3395                 /*
3396                  * mapping is done with 3 bits per priority,
3397                  * so shift by i*3 each time
3398                  */
3399                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3400
3401         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3402
3403         /* RTRPCS: DCB related */
3404         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3405
3406         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3407         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3408         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3409         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3410
3411         /* VFTA - enable all vlan filters */
3412         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3413                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3414         }
3415
3416         /* VFRE: pool enabling for receive - 16 or 32 */
3417         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3418                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3419
3420         /*
3421          * MPSAR - allow pools to read specific mac addresses
3422          * In this case, all pools should be able to read from mac addr 0
3423          */
3424         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3425         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3426
3427         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3428         for (i = 0; i < cfg->nb_pool_maps; i++) {
3429                 /* set vlan id in VF register and set the valid bit */
3430                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3431                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3432                 /*
3433                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3434                  * pools, we only need to use the first half of the register
3435                  * i.e. bits 0-31
3436                  */
3437                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3438         }
3439 }
3440
3441 /**
3442  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3443  * @dev: pointer to eth_dev structure
3444  * @dcb_config: pointer to ixgbe_dcb_config structure
3445  */
3446 static void
3447 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3448                        struct ixgbe_dcb_config *dcb_config)
3449 {
3450         uint32_t reg;
3451         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3452
3453         PMD_INIT_FUNC_TRACE();
3454         if (hw->mac.type != ixgbe_mac_82598EB) {
3455                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3456                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3457                 reg |= IXGBE_RTTDCS_ARBDIS;
3458                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3459
3460                 /* Enable DCB for Tx with 8 TCs */
3461                 if (dcb_config->num_tcs.pg_tcs == 8) {
3462                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3463                 } else {
3464                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3465                 }
3466                 if (dcb_config->vt_mode)
3467                         reg |= IXGBE_MTQC_VT_ENA;
3468                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3469
3470                 /* Enable the Tx desc arbiter */
3471                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3472                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3473                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3474
3475                 /* Enable Security TX Buffer IFG for DCB */
3476                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3477                 reg |= IXGBE_SECTX_DCB;
3478                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3479         }
3480 }
3481
3482 /**
3483  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3484  * @dev: pointer to rte_eth_dev structure
3485  * @dcb_config: pointer to ixgbe_dcb_config structure
3486  */
3487 static void
3488 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3489                         struct ixgbe_dcb_config *dcb_config)
3490 {
3491         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3492                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3493         struct ixgbe_hw *hw =
3494                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3495
3496         PMD_INIT_FUNC_TRACE();
3497         if (hw->mac.type != ixgbe_mac_82598EB)
3498                 /*PF VF Transmit Enable*/
3499                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3500                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3501
3502         /*Configure general DCB TX parameters*/
3503         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3504 }
3505
3506 static void
3507 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3508                         struct ixgbe_dcb_config *dcb_config)
3509 {
3510         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3511                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3512         struct ixgbe_dcb_tc_config *tc;
3513         uint8_t i, j;
3514
3515         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3516         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3517                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3518                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3519         } else {
3520                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3521                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3522         }
3523         /* User Priority to Traffic Class mapping */
3524         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3525                 j = vmdq_rx_conf->dcb_tc[i];
3526                 tc = &dcb_config->tc_config[j];
3527                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3528                                                 (uint8_t)(1 << j);
3529         }
3530 }
3531
3532 static void
3533 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3534                         struct ixgbe_dcb_config *dcb_config)
3535 {
3536         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3537                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3538         struct ixgbe_dcb_tc_config *tc;
3539         uint8_t i, j;
3540
3541         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3542         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3543                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3544                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3545         } else {
3546                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3547                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3548         }
3549
3550         /* User Priority to Traffic Class mapping */
3551         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3552                 j = vmdq_tx_conf->dcb_tc[i];
3553                 tc = &dcb_config->tc_config[j];
3554                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3555                                                 (uint8_t)(1 << j);
3556         }
3557 }
3558
3559 static void
3560 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3561                 struct ixgbe_dcb_config *dcb_config)
3562 {
3563         struct rte_eth_dcb_rx_conf *rx_conf =
3564                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3565         struct ixgbe_dcb_tc_config *tc;
3566         uint8_t i, j;
3567
3568         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3569         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3570
3571         /* User Priority to Traffic Class mapping */
3572         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3573                 j = rx_conf->dcb_tc[i];
3574                 tc = &dcb_config->tc_config[j];
3575                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3576                                                 (uint8_t)(1 << j);
3577         }
3578 }
3579
3580 static void
3581 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3582                 struct ixgbe_dcb_config *dcb_config)
3583 {
3584         struct rte_eth_dcb_tx_conf *tx_conf =
3585                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3586         struct ixgbe_dcb_tc_config *tc;
3587         uint8_t i, j;
3588
3589         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3590         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3591
3592         /* User Priority to Traffic Class mapping */
3593         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3594                 j = tx_conf->dcb_tc[i];
3595                 tc = &dcb_config->tc_config[j];
3596                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3597                                                 (uint8_t)(1 << j);
3598         }
3599 }
3600
3601 /**
3602  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3603  * @dev: pointer to eth_dev structure
3604  * @dcb_config: pointer to ixgbe_dcb_config structure
3605  */
3606 static void
3607 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3608                        struct ixgbe_dcb_config *dcb_config)
3609 {
3610         uint32_t reg;
3611         uint32_t vlanctrl;
3612         uint8_t i;
3613         uint32_t q;
3614         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3615
3616         PMD_INIT_FUNC_TRACE();
3617         /*
3618          * Disable the arbiter before changing parameters
3619          * (always enable recycle mode; WSP)
3620          */
3621         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3622         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3623
3624         if (hw->mac.type != ixgbe_mac_82598EB) {
3625                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3626                 if (dcb_config->num_tcs.pg_tcs == 4) {
3627                         if (dcb_config->vt_mode)
3628                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3629                                         IXGBE_MRQC_VMDQRT4TCEN;
3630                         else {
3631                                 /* no matter the mode is DCB or DCB_RSS, just
3632                                  * set the MRQE to RSSXTCEN. RSS is controlled
3633                                  * by RSS_FIELD
3634                                  */
3635                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3636                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3637                                         IXGBE_MRQC_RTRSS4TCEN;
3638                         }
3639                 }
3640                 if (dcb_config->num_tcs.pg_tcs == 8) {
3641                         if (dcb_config->vt_mode)
3642                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3643                                         IXGBE_MRQC_VMDQRT8TCEN;
3644                         else {
3645                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3646                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3647                                         IXGBE_MRQC_RTRSS8TCEN;
3648                         }
3649                 }
3650
3651                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3652
3653                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3654                         /* Disable drop for all queues in VMDQ mode*/
3655                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3656                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3657                                                 (IXGBE_QDE_WRITE |
3658                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3659                 } else {
3660                         /* Enable drop for all queues in SRIOV mode */
3661                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3662                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3663                                                 (IXGBE_QDE_WRITE |
3664                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3665                                                  IXGBE_QDE_ENABLE));
3666                 }
3667         }
3668
3669         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3670         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3671         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3672         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3673
3674         /* VFTA - enable all vlan filters */
3675         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3676                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3677         }
3678
3679         /*
3680          * Configure Rx packet plane (recycle mode; WSP) and
3681          * enable arbiter
3682          */
3683         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3684         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3685 }
3686
3687 static void
3688 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3689                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3690 {
3691         switch (hw->mac.type) {
3692         case ixgbe_mac_82598EB:
3693                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3694                 break;
3695         case ixgbe_mac_82599EB:
3696         case ixgbe_mac_X540:
3697         case ixgbe_mac_X550:
3698         case ixgbe_mac_X550EM_x:
3699         case ixgbe_mac_X550EM_a:
3700                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3701                                                   tsa, map);
3702                 break;
3703         default:
3704                 break;
3705         }
3706 }
3707
3708 static void
3709 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3710                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3711 {
3712         switch (hw->mac.type) {
3713         case ixgbe_mac_82598EB:
3714                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3715                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3716                 break;
3717         case ixgbe_mac_82599EB:
3718         case ixgbe_mac_X540:
3719         case ixgbe_mac_X550:
3720         case ixgbe_mac_X550EM_x:
3721         case ixgbe_mac_X550EM_a:
3722                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3723                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3724                 break;
3725         default:
3726                 break;
3727         }
3728 }
3729
3730 #define DCB_RX_CONFIG  1
3731 #define DCB_TX_CONFIG  1
3732 #define DCB_TX_PB      1024
3733 /**
3734  * ixgbe_dcb_hw_configure - Enable DCB and configure
3735  * general DCB in VT mode and non-VT mode parameters
3736  * @dev: pointer to rte_eth_dev structure
3737  * @dcb_config: pointer to ixgbe_dcb_config structure
3738  */
3739 static int
3740 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3741                         struct ixgbe_dcb_config *dcb_config)
3742 {
3743         int     ret = 0;
3744         uint8_t i, pfc_en, nb_tcs;
3745         uint16_t pbsize, rx_buffer_size;
3746         uint8_t config_dcb_rx = 0;
3747         uint8_t config_dcb_tx = 0;
3748         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3749         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3750         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3751         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3752         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3753         struct ixgbe_dcb_tc_config *tc;
3754         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3755         struct ixgbe_hw *hw =
3756                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3757         struct ixgbe_bw_conf *bw_conf =
3758                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
3759
3760         switch (dev->data->dev_conf.rxmode.mq_mode) {
3761         case ETH_MQ_RX_VMDQ_DCB:
3762                 dcb_config->vt_mode = true;
3763                 if (hw->mac.type != ixgbe_mac_82598EB) {
3764                         config_dcb_rx = DCB_RX_CONFIG;
3765                         /*
3766                          *get dcb and VT rx configuration parameters
3767                          *from rte_eth_conf
3768                          */
3769                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3770                         /*Configure general VMDQ and DCB RX parameters*/
3771                         ixgbe_vmdq_dcb_configure(dev);
3772                 }
3773                 break;
3774         case ETH_MQ_RX_DCB:
3775         case ETH_MQ_RX_DCB_RSS:
3776                 dcb_config->vt_mode = false;
3777                 config_dcb_rx = DCB_RX_CONFIG;
3778                 /* Get dcb TX configuration parameters from rte_eth_conf */
3779                 ixgbe_dcb_rx_config(dev, dcb_config);
3780                 /*Configure general DCB RX parameters*/
3781                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3782                 break;
3783         default:
3784                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3785                 break;
3786         }
3787         switch (dev->data->dev_conf.txmode.mq_mode) {
3788         case ETH_MQ_TX_VMDQ_DCB:
3789                 dcb_config->vt_mode = true;
3790                 config_dcb_tx = DCB_TX_CONFIG;
3791                 /* get DCB and VT TX configuration parameters
3792                  * from rte_eth_conf
3793                  */
3794                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3795                 /*Configure general VMDQ and DCB TX parameters*/
3796                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3797                 break;
3798
3799         case ETH_MQ_TX_DCB:
3800                 dcb_config->vt_mode = false;
3801                 config_dcb_tx = DCB_TX_CONFIG;
3802                 /*get DCB TX configuration parameters from rte_eth_conf*/
3803                 ixgbe_dcb_tx_config(dev, dcb_config);
3804                 /*Configure general DCB TX parameters*/
3805                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3806                 break;
3807         default:
3808                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3809                 break;
3810         }
3811
3812         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3813         /* Unpack map */
3814         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3815         if (nb_tcs == ETH_4_TCS) {
3816                 /* Avoid un-configured priority mapping to TC0 */
3817                 uint8_t j = 4;
3818                 uint8_t mask = 0xFF;
3819
3820                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3821                         mask = (uint8_t)(mask & (~(1 << map[i])));
3822                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3823                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3824                                 map[j++] = i;
3825                         mask >>= 1;
3826                 }
3827                 /* Re-configure 4 TCs BW */
3828                 for (i = 0; i < nb_tcs; i++) {
3829                         tc = &dcb_config->tc_config[i];
3830                         if (bw_conf->tc_num != nb_tcs)
3831                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3832                                         (uint8_t)(100 / nb_tcs);
3833                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3834                                                 (uint8_t)(100 / nb_tcs);
3835                 }
3836                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3837                         tc = &dcb_config->tc_config[i];
3838                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3839                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3840                 }
3841         } else {
3842                 /* Re-configure 8 TCs BW */
3843                 for (i = 0; i < nb_tcs; i++) {
3844                         tc = &dcb_config->tc_config[i];
3845                         if (bw_conf->tc_num != nb_tcs)
3846                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3847                                         (uint8_t)(100 / nb_tcs + (i & 1));
3848                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3849                                 (uint8_t)(100 / nb_tcs + (i & 1));
3850                 }
3851         }
3852
3853         switch (hw->mac.type) {
3854         case ixgbe_mac_X550:
3855         case ixgbe_mac_X550EM_x:
3856         case ixgbe_mac_X550EM_a:
3857                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3858                 break;
3859         default:
3860                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3861                 break;
3862         }
3863
3864         if (config_dcb_rx) {
3865                 /* Set RX buffer size */
3866                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3867                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3868
3869                 for (i = 0; i < nb_tcs; i++) {
3870                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3871                 }
3872                 /* zero alloc all unused TCs */
3873                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3874                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3875                 }
3876         }
3877         if (config_dcb_tx) {
3878                 /* Only support an equally distributed
3879                  *  Tx packet buffer strategy.
3880                  */
3881                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3882                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3883
3884                 for (i = 0; i < nb_tcs; i++) {
3885                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3886                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3887                 }
3888                 /* Clear unused TCs, if any, to zero buffer size*/
3889                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3890                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3891                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3892                 }
3893         }
3894
3895         /*Calculates traffic class credits*/
3896         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3897                                 IXGBE_DCB_TX_CONFIG);
3898         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3899                                 IXGBE_DCB_RX_CONFIG);
3900
3901         if (config_dcb_rx) {
3902                 /* Unpack CEE standard containers */
3903                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3904                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3905                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3906                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3907                 /* Configure PG(ETS) RX */
3908                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3909         }
3910
3911         if (config_dcb_tx) {
3912                 /* Unpack CEE standard containers */
3913                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3914                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3915                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3916                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3917                 /* Configure PG(ETS) TX */
3918                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3919         }
3920
3921         /*Configure queue statistics registers*/
3922         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3923
3924         /* Check if the PFC is supported */
3925         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3926                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3927                 for (i = 0; i < nb_tcs; i++) {
3928                         /*
3929                         * If the TC count is 8,and the default high_water is 48,
3930                         * the low_water is 16 as default.
3931                         */
3932                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3933                         hw->fc.low_water[i] = pbsize / 4;
3934                         /* Enable pfc for this TC */
3935                         tc = &dcb_config->tc_config[i];
3936                         tc->pfc = ixgbe_dcb_pfc_enabled;
3937                 }
3938                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3939                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3940                         pfc_en &= 0x0F;
3941                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3942         }
3943
3944         return ret;
3945 }
3946
3947 /**
3948  * ixgbe_configure_dcb - Configure DCB  Hardware
3949  * @dev: pointer to rte_eth_dev
3950  */
3951 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3952 {
3953         struct ixgbe_dcb_config *dcb_cfg =
3954                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3955         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3956
3957         PMD_INIT_FUNC_TRACE();
3958
3959         /* check support mq_mode for DCB */
3960         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3961             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3962             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3963                 return;
3964
3965         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3966                 return;
3967
3968         /** Configure DCB hardware **/
3969         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3970 }
3971
3972 /*
3973  * VMDq only support for 10 GbE NIC.
3974  */
3975 static void
3976 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3977 {
3978         struct rte_eth_vmdq_rx_conf *cfg;
3979         struct ixgbe_hw *hw;
3980         enum rte_eth_nb_pools num_pools;
3981         uint32_t mrqc, vt_ctl, vlanctrl;
3982         uint32_t vmolr = 0;
3983         int i;
3984
3985         PMD_INIT_FUNC_TRACE();
3986         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3987         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3988         num_pools = cfg->nb_queue_pools;
3989
3990         ixgbe_rss_disable(dev);
3991
3992         /* MRQC: enable vmdq */
3993         mrqc = IXGBE_MRQC_VMDQEN;
3994         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3995
3996         /* PFVTCTL: turn on virtualisation and set the default pool */
3997         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3998         if (cfg->enable_default_pool)
3999                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4000         else
4001                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4002
4003         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4004
4005         for (i = 0; i < (int)num_pools; i++) {
4006                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4007                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4008         }
4009
4010         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4011         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4012         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4013         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4014
4015         /* VFTA - enable all vlan filters */
4016         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4017                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4018
4019         /* VFRE: pool enabling for receive - 64 */
4020         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4021         if (num_pools == ETH_64_POOLS)
4022                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4023
4024         /*
4025          * MPSAR - allow pools to read specific mac addresses
4026          * In this case, all pools should be able to read from mac addr 0
4027          */
4028         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4029         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4030
4031         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4032         for (i = 0; i < cfg->nb_pool_maps; i++) {
4033                 /* set vlan id in VF register and set the valid bit */
4034                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4035                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4036                 /*
4037                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4038                  * pools, we only need to use the first half of the register
4039                  * i.e. bits 0-31
4040                  */
4041                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4042                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4043                                         (cfg->pool_map[i].pools & UINT32_MAX));
4044                 else
4045                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4046                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4047
4048         }
4049
4050         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4051         if (cfg->enable_loop_back) {
4052                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4053                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4054                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4055         }
4056
4057         IXGBE_WRITE_FLUSH(hw);
4058 }
4059
4060 /*
4061  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4062  * @hw: pointer to hardware structure
4063  */
4064 static void
4065 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4066 {
4067         uint32_t reg;
4068         uint32_t q;
4069
4070         PMD_INIT_FUNC_TRACE();
4071         /*PF VF Transmit Enable*/
4072         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4073         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4074
4075         /* Disable the Tx desc arbiter so that MTQC can be changed */
4076         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4077         reg |= IXGBE_RTTDCS_ARBDIS;
4078         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4079
4080         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4081         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4082
4083         /* Disable drop for all queues */
4084         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4085                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4086                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4087
4088         /* Enable the Tx desc arbiter */
4089         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4090         reg &= ~IXGBE_RTTDCS_ARBDIS;
4091         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4092
4093         IXGBE_WRITE_FLUSH(hw);
4094 }
4095
4096 static int __attribute__((cold))
4097 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4098 {
4099         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4100         uint64_t dma_addr;
4101         unsigned int i;
4102
4103         /* Initialize software ring entries */
4104         for (i = 0; i < rxq->nb_rx_desc; i++) {
4105                 volatile union ixgbe_adv_rx_desc *rxd;
4106                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4107
4108                 if (mbuf == NULL) {
4109                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4110                                      (unsigned) rxq->queue_id);
4111                         return -ENOMEM;
4112                 }
4113
4114                 rte_mbuf_refcnt_set(mbuf, 1);
4115                 mbuf->next = NULL;
4116                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4117                 mbuf->nb_segs = 1;
4118                 mbuf->port = rxq->port_id;
4119
4120                 dma_addr =
4121                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4122                 rxd = &rxq->rx_ring[i];
4123                 rxd->read.hdr_addr = 0;
4124                 rxd->read.pkt_addr = dma_addr;
4125                 rxe[i].mbuf = mbuf;
4126         }
4127
4128         return 0;
4129 }
4130
4131 static int
4132 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4133 {
4134         struct ixgbe_hw *hw;
4135         uint32_t mrqc;
4136
4137         ixgbe_rss_configure(dev);
4138
4139         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4140
4141         /* MRQC: enable VF RSS */
4142         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4143         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4144         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4145         case ETH_64_POOLS:
4146                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4147                 break;
4148
4149         case ETH_32_POOLS:
4150                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4151                 break;
4152
4153         default:
4154                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4155                 return -EINVAL;
4156         }
4157
4158         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4159
4160         return 0;
4161 }
4162
4163 static int
4164 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4165 {
4166         struct ixgbe_hw *hw =
4167                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4168
4169         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4170         case ETH_64_POOLS:
4171                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4172                         IXGBE_MRQC_VMDQEN);
4173                 break;
4174
4175         case ETH_32_POOLS:
4176                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4177                         IXGBE_MRQC_VMDQRT4TCEN);
4178                 break;
4179
4180         case ETH_16_POOLS:
4181                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4182                         IXGBE_MRQC_VMDQRT8TCEN);
4183                 break;
4184         default:
4185                 PMD_INIT_LOG(ERR,
4186                         "invalid pool number in IOV mode");
4187                 break;
4188         }
4189         return 0;
4190 }
4191
4192 static int
4193 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4194 {
4195         struct ixgbe_hw *hw =
4196                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4197
4198         if (hw->mac.type == ixgbe_mac_82598EB)
4199                 return 0;
4200
4201         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4202                 /*
4203                  * SRIOV inactive scheme
4204                  * any DCB/RSS w/o VMDq multi-queue setting
4205                  */
4206                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4207                 case ETH_MQ_RX_RSS:
4208                 case ETH_MQ_RX_DCB_RSS:
4209                 case ETH_MQ_RX_VMDQ_RSS:
4210                         ixgbe_rss_configure(dev);
4211                         break;
4212
4213                 case ETH_MQ_RX_VMDQ_DCB:
4214                         ixgbe_vmdq_dcb_configure(dev);
4215                         break;
4216
4217                 case ETH_MQ_RX_VMDQ_ONLY:
4218                         ixgbe_vmdq_rx_hw_configure(dev);
4219                         break;
4220
4221                 case ETH_MQ_RX_NONE:
4222                 default:
4223                         /* if mq_mode is none, disable rss mode.*/
4224                         ixgbe_rss_disable(dev);
4225                         break;
4226                 }
4227         } else {
4228                 /* SRIOV active scheme
4229                  * Support RSS together with SRIOV.
4230                  */
4231                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4232                 case ETH_MQ_RX_RSS:
4233                 case ETH_MQ_RX_VMDQ_RSS:
4234                         ixgbe_config_vf_rss(dev);
4235                         break;
4236                 case ETH_MQ_RX_VMDQ_DCB:
4237                 case ETH_MQ_RX_DCB:
4238                 /* In SRIOV, the configuration is the same as VMDq case */
4239                         ixgbe_vmdq_dcb_configure(dev);
4240                         break;
4241                 /* DCB/RSS together with SRIOV is not supported */
4242                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4243                 case ETH_MQ_RX_DCB_RSS:
4244                         PMD_INIT_LOG(ERR,
4245                                 "Could not support DCB/RSS with VMDq & SRIOV");
4246                         return -1;
4247                 default:
4248                         ixgbe_config_vf_default(dev);
4249                         break;
4250                 }
4251         }
4252
4253         return 0;
4254 }
4255
4256 static int
4257 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4258 {
4259         struct ixgbe_hw *hw =
4260                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4261         uint32_t mtqc;
4262         uint32_t rttdcs;
4263
4264         if (hw->mac.type == ixgbe_mac_82598EB)
4265                 return 0;
4266
4267         /* disable arbiter before setting MTQC */
4268         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4269         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4270         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4271
4272         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4273                 /*
4274                  * SRIOV inactive scheme
4275                  * any DCB w/o VMDq multi-queue setting
4276                  */
4277                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4278                         ixgbe_vmdq_tx_hw_configure(hw);
4279                 else {
4280                         mtqc = IXGBE_MTQC_64Q_1PB;
4281                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4282                 }
4283         } else {
4284                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4285
4286                 /*
4287                  * SRIOV active scheme
4288                  * FIXME if support DCB together with VMDq & SRIOV
4289                  */
4290                 case ETH_64_POOLS:
4291                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4292                         break;
4293                 case ETH_32_POOLS:
4294                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4295                         break;
4296                 case ETH_16_POOLS:
4297                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4298                                 IXGBE_MTQC_8TC_8TQ;
4299                         break;
4300                 default:
4301                         mtqc = IXGBE_MTQC_64Q_1PB;
4302                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4303                 }
4304                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4305         }
4306
4307         /* re-enable arbiter */
4308         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4309         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4310
4311         return 0;
4312 }
4313
4314 /**
4315  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4316  *
4317  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4318  * spec rev. 3.0 chapter 8.2.3.8.13.
4319  *
4320  * @pool Memory pool of the Rx queue
4321  */
4322 static inline uint32_t
4323 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4324 {
4325         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4326
4327         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4328         uint16_t maxdesc =
4329                 IPV4_MAX_PKT_LEN /
4330                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4331
4332         if (maxdesc >= 16)
4333                 return IXGBE_RSCCTL_MAXDESC_16;
4334         else if (maxdesc >= 8)
4335                 return IXGBE_RSCCTL_MAXDESC_8;
4336         else if (maxdesc >= 4)
4337                 return IXGBE_RSCCTL_MAXDESC_4;
4338         else
4339                 return IXGBE_RSCCTL_MAXDESC_1;
4340 }
4341
4342 /**
4343  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4344  * interrupt
4345  *
4346  * (Taken from FreeBSD tree)
4347  * (yes this is all very magic and confusing :)
4348  *
4349  * @dev port handle
4350  * @entry the register array entry
4351  * @vector the MSIX vector for this queue
4352  * @type RX/TX/MISC
4353  */
4354 static void
4355 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4356 {
4357         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4358         u32 ivar, index;
4359
4360         vector |= IXGBE_IVAR_ALLOC_VAL;
4361
4362         switch (hw->mac.type) {
4363
4364         case ixgbe_mac_82598EB:
4365                 if (type == -1)
4366                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4367                 else
4368                         entry += (type * 64);
4369                 index = (entry >> 2) & 0x1F;
4370                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4371                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4372                 ivar |= (vector << (8 * (entry & 0x3)));
4373                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4374                 break;
4375
4376         case ixgbe_mac_82599EB:
4377         case ixgbe_mac_X540:
4378                 if (type == -1) { /* MISC IVAR */
4379                         index = (entry & 1) * 8;
4380                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4381                         ivar &= ~(0xFF << index);
4382                         ivar |= (vector << index);
4383                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4384                 } else {        /* RX/TX IVARS */
4385                         index = (16 * (entry & 1)) + (8 * type);
4386                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4387                         ivar &= ~(0xFF << index);
4388                         ivar |= (vector << index);
4389                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4390                 }
4391
4392                 break;
4393
4394         default:
4395                 break;
4396         }
4397 }
4398
4399 void __attribute__((cold))
4400 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4401 {
4402         uint16_t i, rx_using_sse;
4403         struct ixgbe_adapter *adapter =
4404                 (struct ixgbe_adapter *)dev->data->dev_private;
4405
4406         /*
4407          * In order to allow Vector Rx there are a few configuration
4408          * conditions to be met and Rx Bulk Allocation should be allowed.
4409          */
4410         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4411             !adapter->rx_bulk_alloc_allowed) {
4412                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4413                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4414                                     "not enabled",
4415                              dev->data->port_id);
4416
4417                 adapter->rx_vec_allowed = false;
4418         }
4419
4420         /*
4421          * Initialize the appropriate LRO callback.
4422          *
4423          * If all queues satisfy the bulk allocation preconditions
4424          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4425          * Otherwise use a single allocation version.
4426          */
4427         if (dev->data->lro) {
4428                 if (adapter->rx_bulk_alloc_allowed) {
4429                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4430                                            "allocation version");
4431                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4432                 } else {
4433                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4434                                            "allocation version");
4435                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4436                 }
4437         } else if (dev->data->scattered_rx) {
4438                 /*
4439                  * Set the non-LRO scattered callback: there are Vector and
4440                  * single allocation versions.
4441                  */
4442                 if (adapter->rx_vec_allowed) {
4443                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4444                                             "callback (port=%d).",
4445                                      dev->data->port_id);
4446
4447                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4448                 } else if (adapter->rx_bulk_alloc_allowed) {
4449                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4450                                            "allocation callback (port=%d).",
4451                                      dev->data->port_id);
4452                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4453                 } else {
4454                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4455                                             "single allocation) "
4456                                             "Scattered Rx callback "
4457                                             "(port=%d).",
4458                                      dev->data->port_id);
4459
4460                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4461                 }
4462         /*
4463          * Below we set "simple" callbacks according to port/queues parameters.
4464          * If parameters allow we are going to choose between the following
4465          * callbacks:
4466          *    - Vector
4467          *    - Bulk Allocation
4468          *    - Single buffer allocation (the simplest one)
4469          */
4470         } else if (adapter->rx_vec_allowed) {
4471                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4472                                     "burst size no less than %d (port=%d).",
4473                              RTE_IXGBE_DESCS_PER_LOOP,
4474                              dev->data->port_id);
4475
4476                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4477         } else if (adapter->rx_bulk_alloc_allowed) {
4478                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4479                                     "satisfied. Rx Burst Bulk Alloc function "
4480                                     "will be used on port=%d.",
4481                              dev->data->port_id);
4482
4483                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4484         } else {
4485                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4486                                     "satisfied, or Scattered Rx is requested "
4487                                     "(port=%d).",
4488                              dev->data->port_id);
4489
4490                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4491         }
4492
4493         /* Propagate information about RX function choice through all queues. */
4494
4495         rx_using_sse =
4496                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4497                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4498
4499         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4500                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4501
4502                 rxq->rx_using_sse = rx_using_sse;
4503         }
4504 }
4505
4506 /**
4507  * ixgbe_set_rsc - configure RSC related port HW registers
4508  *
4509  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4510  * of 82599 Spec (x540 configuration is virtually the same).
4511  *
4512  * @dev port handle
4513  *
4514  * Returns 0 in case of success or a non-zero error code
4515  */
4516 static int
4517 ixgbe_set_rsc(struct rte_eth_dev *dev)
4518 {
4519         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4520         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4521         struct rte_eth_dev_info dev_info = { 0 };
4522         bool rsc_capable = false;
4523         uint16_t i;
4524         uint32_t rdrxctl;
4525         uint32_t rfctl;
4526
4527         /* Sanity check */
4528         dev->dev_ops->dev_infos_get(dev, &dev_info);
4529         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4530                 rsc_capable = true;
4531
4532         if (!rsc_capable && rx_conf->enable_lro) {
4533                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4534                                    "support it");
4535                 return -EINVAL;
4536         }
4537
4538         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4539
4540         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4541                 /*
4542                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4543                  * 3.0 RSC configuration requires HW CRC stripping being
4544                  * enabled. If user requested both HW CRC stripping off
4545                  * and RSC on - return an error.
4546                  */
4547                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4548                                     "is disabled");
4549                 return -EINVAL;
4550         }
4551
4552         /* RFCTL configuration  */
4553         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4554         if ((rsc_capable) && (rx_conf->enable_lro))
4555                 /*
4556                  * Since NFS packets coalescing is not supported - clear
4557                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4558                  * enabled.
4559                  */
4560                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4561                            IXGBE_RFCTL_NFSR_DIS);
4562         else
4563                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4564         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4565
4566         /* If LRO hasn't been requested - we are done here. */
4567         if (!rx_conf->enable_lro)
4568                 return 0;
4569
4570         /* Set RDRXCTL.RSCACKC bit */
4571         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4572         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4573         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4574
4575         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4576         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4577                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4578                 uint32_t srrctl =
4579                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4580                 uint32_t rscctl =
4581                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4582                 uint32_t psrtype =
4583                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4584                 uint32_t eitr =
4585                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4586
4587                 /*
4588                  * ixgbe PMD doesn't support header-split at the moment.
4589                  *
4590                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4591                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4592                  * should be configured even if header split is not
4593                  * enabled. We will configure it 128 bytes following the
4594                  * recommendation in the spec.
4595                  */
4596                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4597                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4598                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4599
4600                 /*
4601                  * TODO: Consider setting the Receive Descriptor Minimum
4602                  * Threshold Size for an RSC case. This is not an obviously
4603                  * beneficiary option but the one worth considering...
4604                  */
4605
4606                 rscctl |= IXGBE_RSCCTL_RSCEN;
4607                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4608                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4609
4610                 /*
4611                  * RSC: Set ITR interval corresponding to 2K ints/s.
4612                  *
4613                  * Full-sized RSC aggregations for a 10Gb/s link will
4614                  * arrive at about 20K aggregation/s rate.
4615                  *
4616                  * 2K inst/s rate will make only 10% of the
4617                  * aggregations to be closed due to the interrupt timer
4618                  * expiration for a streaming at wire-speed case.
4619                  *
4620                  * For a sparse streaming case this setting will yield
4621                  * at most 500us latency for a single RSC aggregation.
4622                  */
4623                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4624                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4625
4626                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4627                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4628                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4629                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4630
4631                 /*
4632                  * RSC requires the mapping of the queue to the
4633                  * interrupt vector.
4634                  */
4635                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4636         }
4637
4638         dev->data->lro = 1;
4639
4640         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4641
4642         return 0;
4643 }
4644
4645 /*
4646  * Initializes Receive Unit.
4647  */
4648 int __attribute__((cold))
4649 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4650 {
4651         struct ixgbe_hw     *hw;
4652         struct ixgbe_rx_queue *rxq;
4653         uint64_t bus_addr;
4654         uint32_t rxctrl;
4655         uint32_t fctrl;
4656         uint32_t hlreg0;
4657         uint32_t maxfrs;
4658         uint32_t srrctl;
4659         uint32_t rdrxctl;
4660         uint32_t rxcsum;
4661         uint16_t buf_size;
4662         uint16_t i;
4663         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4664         int rc;
4665
4666         PMD_INIT_FUNC_TRACE();
4667         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4668
4669         /*
4670          * Make sure receives are disabled while setting
4671          * up the RX context (registers, descriptor rings, etc.).
4672          */
4673         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4674         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4675
4676         /* Enable receipt of broadcasted frames */
4677         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4678         fctrl |= IXGBE_FCTRL_BAM;
4679         fctrl |= IXGBE_FCTRL_DPF;
4680         fctrl |= IXGBE_FCTRL_PMCF;
4681         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4682
4683         /*
4684          * Configure CRC stripping, if any.
4685          */
4686         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4687         if (rx_conf->hw_strip_crc)
4688                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4689         else
4690                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4691
4692         /*
4693          * Configure jumbo frame support, if any.
4694          */
4695         if (rx_conf->jumbo_frame == 1) {
4696                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4697                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4698                 maxfrs &= 0x0000FFFF;
4699                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4700                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4701         } else
4702                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4703
4704         /*
4705          * If loopback mode is configured for 82599, set LPBK bit.
4706          */
4707         if (hw->mac.type == ixgbe_mac_82599EB &&
4708                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4709                 hlreg0 |= IXGBE_HLREG0_LPBK;
4710         else
4711                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4712
4713         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4714
4715         /* Setup RX queues */
4716         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4717                 rxq = dev->data->rx_queues[i];
4718
4719                 /*
4720                  * Reset crc_len in case it was changed after queue setup by a
4721                  * call to configure.
4722                  */
4723                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4724
4725                 /* Setup the Base and Length of the Rx Descriptor Rings */
4726                 bus_addr = rxq->rx_ring_phys_addr;
4727                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4728                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4729                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4730                                 (uint32_t)(bus_addr >> 32));
4731                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4732                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4733                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4734                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4735
4736                 /* Configure the SRRCTL register */
4737 #ifdef RTE_HEADER_SPLIT_ENABLE
4738                 /*
4739                  * Configure Header Split
4740                  */
4741                 if (rx_conf->header_split) {
4742                         if (hw->mac.type == ixgbe_mac_82599EB) {
4743                                 /* Must setup the PSRTYPE register */
4744                                 uint32_t psrtype;
4745
4746                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4747                                         IXGBE_PSRTYPE_UDPHDR   |
4748                                         IXGBE_PSRTYPE_IPV4HDR  |
4749                                         IXGBE_PSRTYPE_IPV6HDR;
4750                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4751                         }
4752                         srrctl = ((rx_conf->split_hdr_size <<
4753                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4754                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4755                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4756                 } else
4757 #endif
4758                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4759
4760                 /* Set if packets are dropped when no descriptors available */
4761                 if (rxq->drop_en)
4762                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4763
4764                 /*
4765                  * Configure the RX buffer size in the BSIZEPACKET field of
4766                  * the SRRCTL register of the queue.
4767                  * The value is in 1 KB resolution. Valid values can be from
4768                  * 1 KB to 16 KB.
4769                  */
4770                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4771                         RTE_PKTMBUF_HEADROOM);
4772                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4773                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4774
4775                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4776
4777                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4778                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4779
4780                 /* It adds dual VLAN length for supporting dual VLAN */
4781                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4782                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4783                         dev->data->scattered_rx = 1;
4784         }
4785
4786         if (rx_conf->enable_scatter)
4787                 dev->data->scattered_rx = 1;
4788
4789         /*
4790          * Device configured with multiple RX queues.
4791          */
4792         ixgbe_dev_mq_rx_configure(dev);
4793
4794         /*
4795          * Setup the Checksum Register.
4796          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4797          * Enable IP/L4 checkum computation by hardware if requested to do so.
4798          */
4799         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4800         rxcsum |= IXGBE_RXCSUM_PCSD;
4801         if (rx_conf->hw_ip_checksum)
4802                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4803         else
4804                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4805
4806         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4807
4808         if (hw->mac.type == ixgbe_mac_82599EB ||
4809             hw->mac.type == ixgbe_mac_X540) {
4810                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4811                 if (rx_conf->hw_strip_crc)
4812                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4813                 else
4814                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4815                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4816                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4817         }
4818
4819         rc = ixgbe_set_rsc(dev);
4820         if (rc)
4821                 return rc;
4822
4823         ixgbe_set_rx_function(dev);
4824
4825         return 0;
4826 }
4827
4828 /*
4829  * Initializes Transmit Unit.
4830  */
4831 void __attribute__((cold))
4832 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4833 {
4834         struct ixgbe_hw     *hw;
4835         struct ixgbe_tx_queue *txq;
4836         uint64_t bus_addr;
4837         uint32_t hlreg0;
4838         uint32_t txctrl;
4839         uint16_t i;
4840
4841         PMD_INIT_FUNC_TRACE();
4842         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4843
4844         /* Enable TX CRC (checksum offload requirement) and hw padding
4845          * (TSO requirement)
4846          */
4847         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4848         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4849         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4850
4851         /* Setup the Base and Length of the Tx Descriptor Rings */
4852         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4853                 txq = dev->data->tx_queues[i];
4854
4855                 bus_addr = txq->tx_ring_phys_addr;
4856                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4857                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4858                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4859                                 (uint32_t)(bus_addr >> 32));
4860                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4861                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4862                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4863                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4864                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4865
4866                 /*
4867                  * Disable Tx Head Writeback RO bit, since this hoses
4868                  * bookkeeping if things aren't delivered in order.
4869                  */
4870                 switch (hw->mac.type) {
4871                 case ixgbe_mac_82598EB:
4872                         txctrl = IXGBE_READ_REG(hw,
4873                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4874                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4875                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4876                                         txctrl);
4877                         break;
4878
4879                 case ixgbe_mac_82599EB:
4880                 case ixgbe_mac_X540:
4881                 case ixgbe_mac_X550:
4882                 case ixgbe_mac_X550EM_x:
4883                 case ixgbe_mac_X550EM_a:
4884                 default:
4885                         txctrl = IXGBE_READ_REG(hw,
4886                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4887                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4888                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4889                                         txctrl);
4890                         break;
4891                 }
4892         }
4893
4894         /* Device configured with multiple TX queues. */
4895         ixgbe_dev_mq_tx_configure(dev);
4896 }
4897
4898 /*
4899  * Set up link for 82599 loopback mode Tx->Rx.
4900  */
4901 static inline void __attribute__((cold))
4902 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4903 {
4904         PMD_INIT_FUNC_TRACE();
4905
4906         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4907                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4908                                 IXGBE_SUCCESS) {
4909                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4910                         /* ignore error */
4911                         return;
4912                 }
4913         }
4914
4915         /* Restart link */
4916         IXGBE_WRITE_REG(hw,
4917                         IXGBE_AUTOC,
4918                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4919         ixgbe_reset_pipeline_82599(hw);
4920
4921         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4922         msec_delay(50);
4923 }
4924
4925
4926 /*
4927  * Start Transmit and Receive Units.
4928  */
4929 int __attribute__((cold))
4930 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4931 {
4932         struct ixgbe_hw     *hw;
4933         struct ixgbe_tx_queue *txq;
4934         struct ixgbe_rx_queue *rxq;
4935         uint32_t txdctl;
4936         uint32_t dmatxctl;
4937         uint32_t rxctrl;
4938         uint16_t i;
4939         int ret = 0;
4940
4941         PMD_INIT_FUNC_TRACE();
4942         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4943
4944         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4945                 txq = dev->data->tx_queues[i];
4946                 /* Setup Transmit Threshold Registers */
4947                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4948                 txdctl |= txq->pthresh & 0x7F;
4949                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4950                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4951                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4952         }
4953
4954         if (hw->mac.type != ixgbe_mac_82598EB) {
4955                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4956                 dmatxctl |= IXGBE_DMATXCTL_TE;
4957                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4958         }
4959
4960         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4961                 txq = dev->data->tx_queues[i];
4962                 if (!txq->tx_deferred_start) {
4963                         ret = ixgbe_dev_tx_queue_start(dev, i);
4964                         if (ret < 0)
4965                                 return ret;
4966                 }
4967         }
4968
4969         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4970                 rxq = dev->data->rx_queues[i];
4971                 if (!rxq->rx_deferred_start) {
4972                         ret = ixgbe_dev_rx_queue_start(dev, i);
4973                         if (ret < 0)
4974                                 return ret;
4975                 }
4976         }
4977
4978         /* Enable Receive engine */
4979         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4980         if (hw->mac.type == ixgbe_mac_82598EB)
4981                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4982         rxctrl |= IXGBE_RXCTRL_RXEN;
4983         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4984
4985         /* If loopback mode is enabled for 82599, set up the link accordingly */
4986         if (hw->mac.type == ixgbe_mac_82599EB &&
4987                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4988                 ixgbe_setup_loopback_link_82599(hw);
4989
4990         return 0;
4991 }
4992
4993 /*
4994  * Start Receive Units for specified queue.
4995  */
4996 int __attribute__((cold))
4997 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4998 {
4999         struct ixgbe_hw     *hw;
5000         struct ixgbe_rx_queue *rxq;
5001         uint32_t rxdctl;
5002         int poll_ms;
5003
5004         PMD_INIT_FUNC_TRACE();
5005         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5006
5007         if (rx_queue_id < dev->data->nb_rx_queues) {
5008                 rxq = dev->data->rx_queues[rx_queue_id];
5009
5010                 /* Allocate buffers for descriptor rings */
5011                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5012                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5013                                      rx_queue_id);
5014                         return -1;
5015                 }
5016                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5017                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5018                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5019
5020                 /* Wait until RX Enable ready */
5021                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5022                 do {
5023                         rte_delay_ms(1);
5024                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5025                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5026                 if (!poll_ms)
5027                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
5028                                      rx_queue_id);
5029                 rte_wmb();
5030                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5031                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5032                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5033         } else
5034                 return -1;
5035
5036         return 0;
5037 }
5038
5039 /*
5040  * Stop Receive Units for specified queue.
5041  */
5042 int __attribute__((cold))
5043 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5044 {
5045         struct ixgbe_hw     *hw;
5046         struct ixgbe_adapter *adapter =
5047                 (struct ixgbe_adapter *)dev->data->dev_private;
5048         struct ixgbe_rx_queue *rxq;
5049         uint32_t rxdctl;
5050         int poll_ms;
5051
5052         PMD_INIT_FUNC_TRACE();
5053         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5054
5055         if (rx_queue_id < dev->data->nb_rx_queues) {
5056                 rxq = dev->data->rx_queues[rx_queue_id];
5057
5058                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5059                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5060                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5061
5062                 /* Wait until RX Enable bit clear */
5063                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5064                 do {
5065                         rte_delay_ms(1);
5066                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5067                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5068                 if (!poll_ms)
5069                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
5070                                      rx_queue_id);
5071
5072                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5073
5074                 ixgbe_rx_queue_release_mbufs(rxq);
5075                 ixgbe_reset_rx_queue(adapter, rxq);
5076                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5077         } else
5078                 return -1;
5079
5080         return 0;
5081 }
5082
5083
5084 /*
5085  * Start Transmit Units for specified queue.
5086  */
5087 int __attribute__((cold))
5088 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5089 {
5090         struct ixgbe_hw     *hw;
5091         struct ixgbe_tx_queue *txq;
5092         uint32_t txdctl;
5093         int poll_ms;
5094
5095         PMD_INIT_FUNC_TRACE();
5096         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5097
5098         if (tx_queue_id < dev->data->nb_tx_queues) {
5099                 txq = dev->data->tx_queues[tx_queue_id];
5100                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5101                 txdctl |= IXGBE_TXDCTL_ENABLE;
5102                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5103
5104                 /* Wait until TX Enable ready */
5105                 if (hw->mac.type == ixgbe_mac_82599EB) {
5106                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5107                         do {
5108                                 rte_delay_ms(1);
5109                                 txdctl = IXGBE_READ_REG(hw,
5110                                         IXGBE_TXDCTL(txq->reg_idx));
5111                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5112                         if (!poll_ms)
5113                                 PMD_INIT_LOG(ERR, "Could not enable "
5114                                              "Tx Queue %d", tx_queue_id);
5115                 }
5116                 rte_wmb();
5117                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5118                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5119                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5120         } else
5121                 return -1;
5122
5123         return 0;
5124 }
5125
5126 /*
5127  * Stop Transmit Units for specified queue.
5128  */
5129 int __attribute__((cold))
5130 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5131 {
5132         struct ixgbe_hw     *hw;
5133         struct ixgbe_tx_queue *txq;
5134         uint32_t txdctl;
5135         uint32_t txtdh, txtdt;
5136         int poll_ms;
5137
5138         PMD_INIT_FUNC_TRACE();
5139         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5140
5141         if (tx_queue_id >= dev->data->nb_tx_queues)
5142                 return -1;
5143
5144         txq = dev->data->tx_queues[tx_queue_id];
5145
5146         /* Wait until TX queue is empty */
5147         if (hw->mac.type == ixgbe_mac_82599EB) {
5148                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5149                 do {
5150                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5151                         txtdh = IXGBE_READ_REG(hw,
5152                                                IXGBE_TDH(txq->reg_idx));
5153                         txtdt = IXGBE_READ_REG(hw,
5154                                                IXGBE_TDT(txq->reg_idx));
5155                 } while (--poll_ms && (txtdh != txtdt));
5156                 if (!poll_ms)
5157                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5158                                      "when stopping.", tx_queue_id);
5159         }
5160
5161         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5162         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5163         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5164
5165         /* Wait until TX Enable bit clear */
5166         if (hw->mac.type == ixgbe_mac_82599EB) {
5167                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5168                 do {
5169                         rte_delay_ms(1);
5170                         txdctl = IXGBE_READ_REG(hw,
5171                                                 IXGBE_TXDCTL(txq->reg_idx));
5172                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5173                 if (!poll_ms)
5174                         PMD_INIT_LOG(ERR, "Could not disable "
5175                                      "Tx Queue %d", tx_queue_id);
5176         }
5177
5178         if (txq->ops != NULL) {
5179                 txq->ops->release_mbufs(txq);
5180                 txq->ops->reset(txq);
5181         }
5182         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5183
5184         return 0;
5185 }
5186
5187 void
5188 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5189         struct rte_eth_rxq_info *qinfo)
5190 {
5191         struct ixgbe_rx_queue *rxq;
5192
5193         rxq = dev->data->rx_queues[queue_id];
5194
5195         qinfo->mp = rxq->mb_pool;
5196         qinfo->scattered_rx = dev->data->scattered_rx;
5197         qinfo->nb_desc = rxq->nb_rx_desc;
5198
5199         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5200         qinfo->conf.rx_drop_en = rxq->drop_en;
5201         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5202 }
5203
5204 void
5205 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5206         struct rte_eth_txq_info *qinfo)
5207 {
5208         struct ixgbe_tx_queue *txq;
5209
5210         txq = dev->data->tx_queues[queue_id];
5211
5212         qinfo->nb_desc = txq->nb_tx_desc;
5213
5214         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5215         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5216         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5217
5218         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5219         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5220         qinfo->conf.txq_flags = txq->txq_flags;
5221         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5222 }
5223
5224 /*
5225  * [VF] Initializes Receive Unit.
5226  */
5227 int __attribute__((cold))
5228 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5229 {
5230         struct ixgbe_hw     *hw;
5231         struct ixgbe_rx_queue *rxq;
5232         uint64_t bus_addr;
5233         uint32_t srrctl, psrtype = 0;
5234         uint16_t buf_size;
5235         uint16_t i;
5236         int ret;
5237
5238         PMD_INIT_FUNC_TRACE();
5239         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5240
5241         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5242                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5243                         "it should be power of 2");
5244                 return -1;
5245         }
5246
5247         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5248                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5249                         "it should be equal to or less than %d",
5250                         hw->mac.max_rx_queues);
5251                 return -1;
5252         }
5253
5254         /*
5255          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5256          * disables the VF receipt of packets if the PF MTU is > 1500.
5257          * This is done to deal with 82599 limitations that imposes
5258          * the PF and all VFs to share the same MTU.
5259          * Then, the PF driver enables again the VF receipt of packet when
5260          * the VF driver issues a IXGBE_VF_SET_LPE request.
5261          * In the meantime, the VF device cannot be used, even if the VF driver
5262          * and the Guest VM network stack are ready to accept packets with a
5263          * size up to the PF MTU.
5264          * As a work-around to this PF behaviour, force the call to
5265          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5266          * VF packets received can work in all cases.
5267          */
5268         ixgbevf_rlpml_set_vf(hw,
5269                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5270
5271         /* Setup RX queues */
5272         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5273                 rxq = dev->data->rx_queues[i];
5274
5275                 /* Allocate buffers for descriptor rings */
5276                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5277                 if (ret)
5278                         return ret;
5279
5280                 /* Setup the Base and Length of the Rx Descriptor Rings */
5281                 bus_addr = rxq->rx_ring_phys_addr;
5282
5283                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5284                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5285                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5286                                 (uint32_t)(bus_addr >> 32));
5287                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5288                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5289                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5290                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5291
5292
5293                 /* Configure the SRRCTL register */
5294 #ifdef RTE_HEADER_SPLIT_ENABLE
5295                 /*
5296                  * Configure Header Split
5297                  */
5298                 if (dev->data->dev_conf.rxmode.header_split) {
5299                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5300                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5301                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5302                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5303                 } else
5304 #endif
5305                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5306
5307                 /* Set if packets are dropped when no descriptors available */
5308                 if (rxq->drop_en)
5309                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5310
5311                 /*
5312                  * Configure the RX buffer size in the BSIZEPACKET field of
5313                  * the SRRCTL register of the queue.
5314                  * The value is in 1 KB resolution. Valid values can be from
5315                  * 1 KB to 16 KB.
5316                  */
5317                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5318                         RTE_PKTMBUF_HEADROOM);
5319                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5320                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5321
5322                 /*
5323                  * VF modification to write virtual function SRRCTL register
5324                  */
5325                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5326
5327                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5328                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5329
5330                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5331                     /* It adds dual VLAN length for supporting dual VLAN */
5332                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5333                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5334                         if (!dev->data->scattered_rx)
5335                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5336                         dev->data->scattered_rx = 1;
5337                 }
5338         }
5339
5340 #ifdef RTE_HEADER_SPLIT_ENABLE
5341         if (dev->data->dev_conf.rxmode.header_split)
5342                 /* Must setup the PSRTYPE register */
5343                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5344                         IXGBE_PSRTYPE_UDPHDR   |
5345                         IXGBE_PSRTYPE_IPV4HDR  |
5346                         IXGBE_PSRTYPE_IPV6HDR;
5347 #endif
5348
5349         /* Set RQPL for VF RSS according to max Rx queue */
5350         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5351                 IXGBE_PSRTYPE_RQPL_SHIFT;
5352         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5353
5354         ixgbe_set_rx_function(dev);
5355
5356         return 0;
5357 }
5358
5359 /*
5360  * [VF] Initializes Transmit Unit.
5361  */
5362 void __attribute__((cold))
5363 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5364 {
5365         struct ixgbe_hw     *hw;
5366         struct ixgbe_tx_queue *txq;
5367         uint64_t bus_addr;
5368         uint32_t txctrl;
5369         uint16_t i;
5370
5371         PMD_INIT_FUNC_TRACE();
5372         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5373
5374         /* Setup the Base and Length of the Tx Descriptor Rings */
5375         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5376                 txq = dev->data->tx_queues[i];
5377                 bus_addr = txq->tx_ring_phys_addr;
5378                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5379                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5380                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5381                                 (uint32_t)(bus_addr >> 32));
5382                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5383                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5384                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5385                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5386                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5387
5388                 /*
5389                  * Disable Tx Head Writeback RO bit, since this hoses
5390                  * bookkeeping if things aren't delivered in order.
5391                  */
5392                 txctrl = IXGBE_READ_REG(hw,
5393                                 IXGBE_VFDCA_TXCTRL(i));
5394                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5395                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5396                                 txctrl);
5397         }
5398 }
5399
5400 /*
5401  * [VF] Start Transmit and Receive Units.
5402  */
5403 void __attribute__((cold))
5404 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5405 {
5406         struct ixgbe_hw     *hw;
5407         struct ixgbe_tx_queue *txq;
5408         struct ixgbe_rx_queue *rxq;
5409         uint32_t txdctl;
5410         uint32_t rxdctl;
5411         uint16_t i;
5412         int poll_ms;
5413
5414         PMD_INIT_FUNC_TRACE();
5415         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5416
5417         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5418                 txq = dev->data->tx_queues[i];
5419                 /* Setup Transmit Threshold Registers */
5420                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5421                 txdctl |= txq->pthresh & 0x7F;
5422                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5423                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5424                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5425         }
5426
5427         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5428
5429                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5430                 txdctl |= IXGBE_TXDCTL_ENABLE;
5431                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5432
5433                 poll_ms = 10;
5434                 /* Wait until TX Enable ready */
5435                 do {
5436                         rte_delay_ms(1);
5437                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5438                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5439                 if (!poll_ms)
5440                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5441         }
5442         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5443
5444                 rxq = dev->data->rx_queues[i];
5445
5446                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5447                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5448                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5449
5450                 /* Wait until RX Enable ready */
5451                 poll_ms = 10;
5452                 do {
5453                         rte_delay_ms(1);
5454                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5455                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5456                 if (!poll_ms)
5457                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5458                 rte_wmb();
5459                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5460
5461         }
5462 }
5463
5464 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5465 int __attribute__((weak))
5466 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5467 {
5468         return -1;
5469 }
5470
5471 uint16_t __attribute__((weak))
5472 ixgbe_recv_pkts_vec(
5473         void __rte_unused *rx_queue,
5474         struct rte_mbuf __rte_unused **rx_pkts,
5475         uint16_t __rte_unused nb_pkts)
5476 {
5477         return 0;
5478 }
5479
5480 uint16_t __attribute__((weak))
5481 ixgbe_recv_scattered_pkts_vec(
5482         void __rte_unused *rx_queue,
5483         struct rte_mbuf __rte_unused **rx_pkts,
5484         uint16_t __rte_unused nb_pkts)
5485 {
5486         return 0;
5487 }
5488
5489 int __attribute__((weak))
5490 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5491 {
5492         return -1;
5493 }