64bff2584b6f8c4160fd936ee1710fa631ed3306
[deb_dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
85 #else
86 #define IXGBE_TX_IEEE1588_TMST 0
87 #endif
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK (                  \
90                 PKT_TX_VLAN_PKT |                \
91                 PKT_TX_IP_CKSUM |                \
92                 PKT_TX_L4_MASK |                 \
93                 PKT_TX_TCP_SEG |                 \
94                 PKT_TX_MACSEC |                  \
95                 PKT_TX_OUTER_IP_CKSUM |          \
96                 IXGBE_TX_IEEE1588_TMST)
97
98 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
99                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while (0)
112 #endif
113
114 #ifdef RTE_IXGBE_INC_VECTOR
115 uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
116                                     uint16_t nb_pkts);
117 #endif
118
119 /*********************************************************************
120  *
121  *  TX functions
122  *
123  **********************************************************************/
124
125 /*
126  * Check for descriptors with their DD bit set and free mbufs.
127  * Return the total number of buffers freed.
128  */
129 static __rte_always_inline int
130 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
131 {
132         struct ixgbe_tx_entry *txep;
133         uint32_t status;
134         int i, nb_free = 0;
135         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
136
137         /* check DD bit on threshold descriptor */
138         status = txq->tx_ring[txq->tx_next_dd].wb.status;
139         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
140                 return 0;
141
142         /*
143          * first buffer to free from S/W ring is at index
144          * tx_next_dd - (tx_rs_thresh-1)
145          */
146         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
147
148         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
149                 /* free buffers one at a time */
150                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
151                 txep->mbuf = NULL;
152
153                 if (unlikely(m == NULL))
154                         continue;
155
156                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
157                     (nb_free > 0 && m->pool != free[0]->pool)) {
158                         rte_mempool_put_bulk(free[0]->pool,
159                                              (void **)free, nb_free);
160                         nb_free = 0;
161                 }
162
163                 free[nb_free++] = m;
164         }
165
166         if (nb_free > 0)
167                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
168
169         /* buffers were freed, update counters */
170         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
171         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
172         if (txq->tx_next_dd >= txq->nb_tx_desc)
173                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
174
175         return txq->tx_rs_thresh;
176 }
177
178 /* Populate 4 descriptors with data from 4 mbufs */
179 static inline void
180 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
181 {
182         uint64_t buf_dma_addr;
183         uint32_t pkt_len;
184         int i;
185
186         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
187                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
188                 pkt_len = (*pkts)->data_len;
189
190                 /* write data to descriptor */
191                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
192
193                 txdp->read.cmd_type_len =
194                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
195
196                 txdp->read.olinfo_status =
197                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
198
199                 rte_prefetch0(&(*pkts)->pool);
200         }
201 }
202
203 /* Populate 1 descriptor with data from 1 mbuf */
204 static inline void
205 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
206 {
207         uint64_t buf_dma_addr;
208         uint32_t pkt_len;
209
210         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
211         pkt_len = (*pkts)->data_len;
212
213         /* write data to descriptor */
214         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
215         txdp->read.cmd_type_len =
216                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
217         txdp->read.olinfo_status =
218                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
219         rte_prefetch0(&(*pkts)->pool);
220 }
221
222 /*
223  * Fill H/W descriptor ring with mbuf data.
224  * Copy mbuf pointers to the S/W ring.
225  */
226 static inline void
227 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
228                       uint16_t nb_pkts)
229 {
230         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
231         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
232         const int N_PER_LOOP = 4;
233         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
234         int mainpart, leftover;
235         int i, j;
236
237         /*
238          * Process most of the packets in chunks of N pkts.  Any
239          * leftover packets will get processed one at a time.
240          */
241         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
242         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
243         for (i = 0; i < mainpart; i += N_PER_LOOP) {
244                 /* Copy N mbuf pointers to the S/W ring */
245                 for (j = 0; j < N_PER_LOOP; ++j) {
246                         (txep + i + j)->mbuf = *(pkts + i + j);
247                 }
248                 tx4(txdp + i, pkts + i);
249         }
250
251         if (unlikely(leftover > 0)) {
252                 for (i = 0; i < leftover; ++i) {
253                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
254                         tx1(txdp + mainpart + i, pkts + mainpart + i);
255                 }
256         }
257 }
258
259 static inline uint16_t
260 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
261              uint16_t nb_pkts)
262 {
263         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
264         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
265         uint16_t n = 0;
266
267         /*
268          * Begin scanning the H/W ring for done descriptors when the
269          * number of available descriptors drops below tx_free_thresh.  For
270          * each done descriptor, free the associated buffer.
271          */
272         if (txq->nb_tx_free < txq->tx_free_thresh)
273                 ixgbe_tx_free_bufs(txq);
274
275         /* Only use descriptors that are available */
276         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
277         if (unlikely(nb_pkts == 0))
278                 return 0;
279
280         /* Use exactly nb_pkts descriptors */
281         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
282
283         /*
284          * At this point, we know there are enough descriptors in the
285          * ring to transmit all the packets.  This assumes that each
286          * mbuf contains a single segment, and that no new offloads
287          * are expected, which would require a new context descriptor.
288          */
289
290         /*
291          * See if we're going to wrap-around. If so, handle the top
292          * of the descriptor ring first, then do the bottom.  If not,
293          * the processing looks just like the "bottom" part anyway...
294          */
295         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
296                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
297                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
298
299                 /*
300                  * We know that the last descriptor in the ring will need to
301                  * have its RS bit set because tx_rs_thresh has to be
302                  * a divisor of the ring size
303                  */
304                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
305                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
306                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
307
308                 txq->tx_tail = 0;
309         }
310
311         /* Fill H/W descriptor ring with mbuf data */
312         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
313         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
314
315         /*
316          * Determine if RS bit should be set
317          * This is what we actually want:
318          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
319          * but instead of subtracting 1 and doing >=, we can just do
320          * greater than without subtracting.
321          */
322         if (txq->tx_tail > txq->tx_next_rs) {
323                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
324                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
325                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
326                                                 txq->tx_rs_thresh);
327                 if (txq->tx_next_rs >= txq->nb_tx_desc)
328                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
329         }
330
331         /*
332          * Check for wrap-around. This would only happen if we used
333          * up to the last descriptor in the ring, no more, no less.
334          */
335         if (txq->tx_tail >= txq->nb_tx_desc)
336                 txq->tx_tail = 0;
337
338         /* update tail pointer */
339         rte_wmb();
340         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
341
342         return nb_pkts;
343 }
344
345 uint16_t
346 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
347                        uint16_t nb_pkts)
348 {
349         uint16_t nb_tx;
350
351         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
352         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
353                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
354
355         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
356         nb_tx = 0;
357         while (nb_pkts) {
358                 uint16_t ret, n;
359
360                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
361                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
362                 nb_tx = (uint16_t)(nb_tx + ret);
363                 nb_pkts = (uint16_t)(nb_pkts - ret);
364                 if (ret < n)
365                         break;
366         }
367
368         return nb_tx;
369 }
370
371 #ifdef RTE_IXGBE_INC_VECTOR
372 static uint16_t
373 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
374                     uint16_t nb_pkts)
375 {
376         uint16_t nb_tx = 0;
377         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
378
379         while (nb_pkts) {
380                 uint16_t ret, num;
381
382                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
383                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
384                                                  num);
385                 nb_tx += ret;
386                 nb_pkts -= ret;
387                 if (ret < num)
388                         break;
389         }
390
391         return nb_tx;
392 }
393 #endif
394
395 static inline void
396 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
397                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
398                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
399 {
400         uint32_t type_tucmd_mlhl;
401         uint32_t mss_l4len_idx = 0;
402         uint32_t ctx_idx;
403         uint32_t vlan_macip_lens;
404         union ixgbe_tx_offload tx_offload_mask;
405         uint32_t seqnum_seed = 0;
406
407         ctx_idx = txq->ctx_curr;
408         tx_offload_mask.data[0] = 0;
409         tx_offload_mask.data[1] = 0;
410         type_tucmd_mlhl = 0;
411
412         /* Specify which HW CTX to upload. */
413         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
414
415         if (ol_flags & PKT_TX_VLAN_PKT) {
416                 tx_offload_mask.vlan_tci |= ~0;
417         }
418
419         /* check if TCP segmentation required for this packet */
420         if (ol_flags & PKT_TX_TCP_SEG) {
421                 /* implies IP cksum in IPv4 */
422                 if (ol_flags & PKT_TX_IP_CKSUM)
423                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
424                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                 else
427                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
428                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
429                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
430
431                 tx_offload_mask.l2_len |= ~0;
432                 tx_offload_mask.l3_len |= ~0;
433                 tx_offload_mask.l4_len |= ~0;
434                 tx_offload_mask.tso_segsz |= ~0;
435                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
436                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
437         } else { /* no TSO, check if hardware checksum is needed */
438                 if (ol_flags & PKT_TX_IP_CKSUM) {
439                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
440                         tx_offload_mask.l2_len |= ~0;
441                         tx_offload_mask.l3_len |= ~0;
442                 }
443
444                 switch (ol_flags & PKT_TX_L4_MASK) {
445                 case PKT_TX_UDP_CKSUM:
446                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
447                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
448                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
449                         tx_offload_mask.l2_len |= ~0;
450                         tx_offload_mask.l3_len |= ~0;
451                         break;
452                 case PKT_TX_TCP_CKSUM:
453                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
454                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
455                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
456                         tx_offload_mask.l2_len |= ~0;
457                         tx_offload_mask.l3_len |= ~0;
458                         break;
459                 case PKT_TX_SCTP_CKSUM:
460                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
461                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
462                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
463                         tx_offload_mask.l2_len |= ~0;
464                         tx_offload_mask.l3_len |= ~0;
465                         break;
466                 default:
467                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
468                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
469                         break;
470                 }
471         }
472
473         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
474                 tx_offload_mask.outer_l2_len |= ~0;
475                 tx_offload_mask.outer_l3_len |= ~0;
476                 tx_offload_mask.l2_len |= ~0;
477                 seqnum_seed |= tx_offload.outer_l3_len
478                                << IXGBE_ADVTXD_OUTER_IPLEN;
479                 seqnum_seed |= tx_offload.l2_len
480                                << IXGBE_ADVTXD_TUNNEL_LEN;
481         }
482
483         txq->ctx_cache[ctx_idx].flags = ol_flags;
484         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
485                 tx_offload_mask.data[0] & tx_offload.data[0];
486         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
487                 tx_offload_mask.data[1] & tx_offload.data[1];
488         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
489
490         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
491         vlan_macip_lens = tx_offload.l3_len;
492         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
493                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
494                                     IXGBE_ADVTXD_MACLEN_SHIFT);
495         else
496                 vlan_macip_lens |= (tx_offload.l2_len <<
497                                     IXGBE_ADVTXD_MACLEN_SHIFT);
498         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
499         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
500         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
501         ctx_txd->seqnum_seed     = seqnum_seed;
502 }
503
504 /*
505  * Check which hardware context can be used. Use the existing match
506  * or create a new context descriptor.
507  */
508 static inline uint32_t
509 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
510                    union ixgbe_tx_offload tx_offload)
511 {
512         /* If match with the current used context */
513         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
514                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
515                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
516                      & tx_offload.data[0])) &&
517                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
518                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
519                      & tx_offload.data[1]))))
520                 return txq->ctx_curr;
521
522         /* What if match with the next context  */
523         txq->ctx_curr ^= 1;
524         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
525                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
526                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
527                      & tx_offload.data[0])) &&
528                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
529                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
530                      & tx_offload.data[1]))))
531                 return txq->ctx_curr;
532
533         /* Mismatch, use the previous context */
534         return IXGBE_CTX_NUM;
535 }
536
537 static inline uint32_t
538 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
539 {
540         uint32_t tmp = 0;
541
542         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
543                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
544         if (ol_flags & PKT_TX_IP_CKSUM)
545                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
546         if (ol_flags & PKT_TX_TCP_SEG)
547                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
548         return tmp;
549 }
550
551 static inline uint32_t
552 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
553 {
554         uint32_t cmdtype = 0;
555
556         if (ol_flags & PKT_TX_VLAN_PKT)
557                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
558         if (ol_flags & PKT_TX_TCP_SEG)
559                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
560         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
561                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
562         if (ol_flags & PKT_TX_MACSEC)
563                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
564         return cmdtype;
565 }
566
567 /* Default RS bit threshold values */
568 #ifndef DEFAULT_TX_RS_THRESH
569 #define DEFAULT_TX_RS_THRESH   32
570 #endif
571 #ifndef DEFAULT_TX_FREE_THRESH
572 #define DEFAULT_TX_FREE_THRESH 32
573 #endif
574
575 /* Reset transmit descriptors after they have been used */
576 static inline int
577 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
578 {
579         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
580         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
581         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
582         uint16_t nb_tx_desc = txq->nb_tx_desc;
583         uint16_t desc_to_clean_to;
584         uint16_t nb_tx_to_clean;
585         uint32_t status;
586
587         /* Determine the last descriptor needing to be cleaned */
588         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
589         if (desc_to_clean_to >= nb_tx_desc)
590                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
591
592         /* Check to make sure the last descriptor to clean is done */
593         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
594         status = txr[desc_to_clean_to].wb.status;
595         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
596                 PMD_TX_FREE_LOG(DEBUG,
597                                 "TX descriptor %4u is not done"
598                                 "(port=%d queue=%d)",
599                                 desc_to_clean_to,
600                                 txq->port_id, txq->queue_id);
601                 /* Failed to clean any descriptors, better luck next time */
602                 return -(1);
603         }
604
605         /* Figure out how many descriptors will be cleaned */
606         if (last_desc_cleaned > desc_to_clean_to)
607                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
608                                                         desc_to_clean_to);
609         else
610                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
611                                                 last_desc_cleaned);
612
613         PMD_TX_FREE_LOG(DEBUG,
614                         "Cleaning %4u TX descriptors: %4u to %4u "
615                         "(port=%d queue=%d)",
616                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
617                         txq->port_id, txq->queue_id);
618
619         /*
620          * The last descriptor to clean is done, so that means all the
621          * descriptors from the last descriptor that was cleaned
622          * up to the last descriptor with the RS bit set
623          * are done. Only reset the threshold descriptor.
624          */
625         txr[desc_to_clean_to].wb.status = 0;
626
627         /* Update the txq to reflect the last descriptor that was cleaned */
628         txq->last_desc_cleaned = desc_to_clean_to;
629         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
630
631         /* No Error */
632         return 0;
633 }
634
635 uint16_t
636 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
637                 uint16_t nb_pkts)
638 {
639         struct ixgbe_tx_queue *txq;
640         struct ixgbe_tx_entry *sw_ring;
641         struct ixgbe_tx_entry *txe, *txn;
642         volatile union ixgbe_adv_tx_desc *txr;
643         volatile union ixgbe_adv_tx_desc *txd, *txp;
644         struct rte_mbuf     *tx_pkt;
645         struct rte_mbuf     *m_seg;
646         uint64_t buf_dma_addr;
647         uint32_t olinfo_status;
648         uint32_t cmd_type_len;
649         uint32_t pkt_len;
650         uint16_t slen;
651         uint64_t ol_flags;
652         uint16_t tx_id;
653         uint16_t tx_last;
654         uint16_t nb_tx;
655         uint16_t nb_used;
656         uint64_t tx_ol_req;
657         uint32_t ctx = 0;
658         uint32_t new_ctx;
659         union ixgbe_tx_offload tx_offload;
660
661         tx_offload.data[0] = 0;
662         tx_offload.data[1] = 0;
663         txq = tx_queue;
664         sw_ring = txq->sw_ring;
665         txr     = txq->tx_ring;
666         tx_id   = txq->tx_tail;
667         txe = &sw_ring[tx_id];
668         txp = NULL;
669
670         /* Determine if the descriptor ring needs to be cleaned. */
671         if (txq->nb_tx_free < txq->tx_free_thresh)
672                 ixgbe_xmit_cleanup(txq);
673
674         rte_prefetch0(&txe->mbuf->pool);
675
676         /* TX loop */
677         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
678                 new_ctx = 0;
679                 tx_pkt = *tx_pkts++;
680                 pkt_len = tx_pkt->pkt_len;
681
682                 /*
683                  * Determine how many (if any) context descriptors
684                  * are needed for offload functionality.
685                  */
686                 ol_flags = tx_pkt->ol_flags;
687
688                 /* If hardware offload required */
689                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
690                 if (tx_ol_req) {
691                         tx_offload.l2_len = tx_pkt->l2_len;
692                         tx_offload.l3_len = tx_pkt->l3_len;
693                         tx_offload.l4_len = tx_pkt->l4_len;
694                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
695                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
696                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
697                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
698
699                         /* If new context need be built or reuse the exist ctx. */
700                         ctx = what_advctx_update(txq, tx_ol_req,
701                                 tx_offload);
702                         /* Only allocate context descriptor if required*/
703                         new_ctx = (ctx == IXGBE_CTX_NUM);
704                         ctx = txq->ctx_curr;
705                 }
706
707                 /*
708                  * Keep track of how many descriptors are used this loop
709                  * This will always be the number of segments + the number of
710                  * Context descriptors required to transmit the packet
711                  */
712                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
713
714                 if (txp != NULL &&
715                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
716                         /* set RS on the previous packet in the burst */
717                         txp->read.cmd_type_len |=
718                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
719
720                 /*
721                  * The number of descriptors that must be allocated for a
722                  * packet is the number of segments of that packet, plus 1
723                  * Context Descriptor for the hardware offload, if any.
724                  * Determine the last TX descriptor to allocate in the TX ring
725                  * for the packet, starting from the current position (tx_id)
726                  * in the ring.
727                  */
728                 tx_last = (uint16_t) (tx_id + nb_used - 1);
729
730                 /* Circular ring */
731                 if (tx_last >= txq->nb_tx_desc)
732                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
733
734                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
735                            " tx_first=%u tx_last=%u",
736                            (unsigned) txq->port_id,
737                            (unsigned) txq->queue_id,
738                            (unsigned) pkt_len,
739                            (unsigned) tx_id,
740                            (unsigned) tx_last);
741
742                 /*
743                  * Make sure there are enough TX descriptors available to
744                  * transmit the entire packet.
745                  * nb_used better be less than or equal to txq->tx_rs_thresh
746                  */
747                 if (nb_used > txq->nb_tx_free) {
748                         PMD_TX_FREE_LOG(DEBUG,
749                                         "Not enough free TX descriptors "
750                                         "nb_used=%4u nb_free=%4u "
751                                         "(port=%d queue=%d)",
752                                         nb_used, txq->nb_tx_free,
753                                         txq->port_id, txq->queue_id);
754
755                         if (ixgbe_xmit_cleanup(txq) != 0) {
756                                 /* Could not clean any descriptors */
757                                 if (nb_tx == 0)
758                                         return 0;
759                                 goto end_of_tx;
760                         }
761
762                         /* nb_used better be <= txq->tx_rs_thresh */
763                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
764                                 PMD_TX_FREE_LOG(DEBUG,
765                                         "The number of descriptors needed to "
766                                         "transmit the packet exceeds the "
767                                         "RS bit threshold. This will impact "
768                                         "performance."
769                                         "nb_used=%4u nb_free=%4u "
770                                         "tx_rs_thresh=%4u. "
771                                         "(port=%d queue=%d)",
772                                         nb_used, txq->nb_tx_free,
773                                         txq->tx_rs_thresh,
774                                         txq->port_id, txq->queue_id);
775                                 /*
776                                  * Loop here until there are enough TX
777                                  * descriptors or until the ring cannot be
778                                  * cleaned.
779                                  */
780                                 while (nb_used > txq->nb_tx_free) {
781                                         if (ixgbe_xmit_cleanup(txq) != 0) {
782                                                 /*
783                                                  * Could not clean any
784                                                  * descriptors
785                                                  */
786                                                 if (nb_tx == 0)
787                                                         return 0;
788                                                 goto end_of_tx;
789                                         }
790                                 }
791                         }
792                 }
793
794                 /*
795                  * By now there are enough free TX descriptors to transmit
796                  * the packet.
797                  */
798
799                 /*
800                  * Set common flags of all TX Data Descriptors.
801                  *
802                  * The following bits must be set in all Data Descriptors:
803                  *   - IXGBE_ADVTXD_DTYP_DATA
804                  *   - IXGBE_ADVTXD_DCMD_DEXT
805                  *
806                  * The following bits must be set in the first Data Descriptor
807                  * and are ignored in the other ones:
808                  *   - IXGBE_ADVTXD_DCMD_IFCS
809                  *   - IXGBE_ADVTXD_MAC_1588
810                  *   - IXGBE_ADVTXD_DCMD_VLE
811                  *
812                  * The following bits must only be set in the last Data
813                  * Descriptor:
814                  *   - IXGBE_TXD_CMD_EOP
815                  *
816                  * The following bits can be set in any Data Descriptor, but
817                  * are only set in the last Data Descriptor:
818                  *   - IXGBE_TXD_CMD_RS
819                  */
820                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
821                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
822
823 #ifdef RTE_LIBRTE_IEEE1588
824                 if (ol_flags & PKT_TX_IEEE1588_TMST)
825                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
826 #endif
827
828                 olinfo_status = 0;
829                 if (tx_ol_req) {
830
831                         if (ol_flags & PKT_TX_TCP_SEG) {
832                                 /* when TSO is on, paylen in descriptor is the
833                                  * not the packet len but the tcp payload len */
834                                 pkt_len -= (tx_offload.l2_len +
835                                         tx_offload.l3_len + tx_offload.l4_len);
836                         }
837
838                         /*
839                          * Setup the TX Advanced Context Descriptor if required
840                          */
841                         if (new_ctx) {
842                                 volatile struct ixgbe_adv_tx_context_desc *
843                                     ctx_txd;
844
845                                 ctx_txd = (volatile struct
846                                     ixgbe_adv_tx_context_desc *)
847                                     &txr[tx_id];
848
849                                 txn = &sw_ring[txe->next_id];
850                                 rte_prefetch0(&txn->mbuf->pool);
851
852                                 if (txe->mbuf != NULL) {
853                                         rte_pktmbuf_free_seg(txe->mbuf);
854                                         txe->mbuf = NULL;
855                                 }
856
857                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
858                                         tx_offload);
859
860                                 txe->last_id = tx_last;
861                                 tx_id = txe->next_id;
862                                 txe = txn;
863                         }
864
865                         /*
866                          * Setup the TX Advanced Data Descriptor,
867                          * This path will go through
868                          * whatever new/reuse the context descriptor
869                          */
870                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
871                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
872                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
873                 }
874
875                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
876
877                 m_seg = tx_pkt;
878                 do {
879                         txd = &txr[tx_id];
880                         txn = &sw_ring[txe->next_id];
881                         rte_prefetch0(&txn->mbuf->pool);
882
883                         if (txe->mbuf != NULL)
884                                 rte_pktmbuf_free_seg(txe->mbuf);
885                         txe->mbuf = m_seg;
886
887                         /*
888                          * Set up Transmit Data Descriptor.
889                          */
890                         slen = m_seg->data_len;
891                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
892                         txd->read.buffer_addr =
893                                 rte_cpu_to_le_64(buf_dma_addr);
894                         txd->read.cmd_type_len =
895                                 rte_cpu_to_le_32(cmd_type_len | slen);
896                         txd->read.olinfo_status =
897                                 rte_cpu_to_le_32(olinfo_status);
898                         txe->last_id = tx_last;
899                         tx_id = txe->next_id;
900                         txe = txn;
901                         m_seg = m_seg->next;
902                 } while (m_seg != NULL);
903
904                 /*
905                  * The last packet data descriptor needs End Of Packet (EOP)
906                  */
907                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
908                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
909                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
910
911                 /* Set RS bit only on threshold packets' last descriptor */
912                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
913                         PMD_TX_FREE_LOG(DEBUG,
914                                         "Setting RS bit on TXD id="
915                                         "%4u (port=%d queue=%d)",
916                                         tx_last, txq->port_id, txq->queue_id);
917
918                         cmd_type_len |= IXGBE_TXD_CMD_RS;
919
920                         /* Update txq RS bit counters */
921                         txq->nb_tx_used = 0;
922                         txp = NULL;
923                 } else
924                         txp = txd;
925
926                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
927         }
928
929 end_of_tx:
930         /* set RS on last packet in the burst */
931         if (txp != NULL)
932                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
933
934         rte_wmb();
935
936         /*
937          * Set the Transmit Descriptor Tail (TDT)
938          */
939         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
940                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
941                    (unsigned) tx_id, (unsigned) nb_tx);
942         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
943         txq->tx_tail = tx_id;
944
945         return nb_tx;
946 }
947
948 /*********************************************************************
949  *
950  *  TX prep functions
951  *
952  **********************************************************************/
953 uint16_t
954 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
955 {
956         int i, ret;
957         uint64_t ol_flags;
958         struct rte_mbuf *m;
959         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
960
961         for (i = 0; i < nb_pkts; i++) {
962                 m = tx_pkts[i];
963                 ol_flags = m->ol_flags;
964
965                 /**
966                  * Check if packet meets requirements for number of segments
967                  *
968                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
969                  *       non-TSO
970                  */
971
972                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
973                         rte_errno = -EINVAL;
974                         return i;
975                 }
976
977                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
978                         rte_errno = -ENOTSUP;
979                         return i;
980                 }
981
982 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
983                 ret = rte_validate_tx_offload(m);
984                 if (ret != 0) {
985                         rte_errno = ret;
986                         return i;
987                 }
988 #endif
989                 ret = rte_net_intel_cksum_prepare(m);
990                 if (ret != 0) {
991                         rte_errno = ret;
992                         return i;
993                 }
994         }
995
996         return i;
997 }
998
999 /*********************************************************************
1000  *
1001  *  RX functions
1002  *
1003  **********************************************************************/
1004
1005 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1006 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1007 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1008 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1009 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1010 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1011 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1012 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1013 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1014 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1015 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1016 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1017 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1018 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1019 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1020 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1021 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1022 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1023 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1024 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1025 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1028 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1029 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1030 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1031 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1032 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1033 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1034 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1035 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1036 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1037 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1038
1039 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1040 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1041 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1042 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1043 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1044 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1045 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1046 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1047 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1048 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1049 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1050 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1051 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1052 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1053 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1062
1063 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1064 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1065 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1066 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1067 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1068 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1069 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1070 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1071 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1072 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1073 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1074 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1075 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1076 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1077 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1086
1087 /**
1088  * Use 2 different table for normal packet and tunnel packet
1089  * to save the space.
1090  */
1091 const uint32_t
1092         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1093         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1094         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1095                 RTE_PTYPE_L3_IPV4,
1096         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1097                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1098         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1099                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1100         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1101                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1102         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1103                 RTE_PTYPE_L3_IPV4_EXT,
1104         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1105                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1106         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1107                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1108         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1109                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1110         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1111                 RTE_PTYPE_L3_IPV6,
1112         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1113                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1114         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1115                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1116         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1117                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1118         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1119                 RTE_PTYPE_L3_IPV6_EXT,
1120         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1121                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1122         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1123                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1124         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1125                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1126         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1127                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1128                 RTE_PTYPE_INNER_L3_IPV6,
1129         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1130                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1131                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1132         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1133                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1134         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1135         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1136                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1137                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1138         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1140                 RTE_PTYPE_INNER_L3_IPV6,
1141         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1142                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1143                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1144         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1145                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1146                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1147         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1148                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1149                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1150         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1151                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1152                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1153         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1154                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1155                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1156         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1157                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1158                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1159         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1160                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1161                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1162         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1163                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1164                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1165         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1166                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1167                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1168         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1169                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1170                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1171         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1172                 RTE_PTYPE_L2_ETHER |
1173                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1174                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1175 };
1176
1177 const uint32_t
1178         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1179         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1180                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1181                 RTE_PTYPE_INNER_L2_ETHER,
1182         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1183                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1184                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1185         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1186                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1187                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1188         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1189                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1190                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1191         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1192                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1193                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1194         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1195                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1196                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1197         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1198                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1199                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1200         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1201                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1202                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1203                 RTE_PTYPE_INNER_L4_TCP,
1204         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1205                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1207                 RTE_PTYPE_INNER_L4_TCP,
1208         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1209                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1210                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1211         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1212                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1213                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1214                 RTE_PTYPE_INNER_L4_TCP,
1215         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1216                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1217                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1218                 RTE_PTYPE_INNER_L3_IPV4,
1219         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1220                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1221                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1222                 RTE_PTYPE_INNER_L4_UDP,
1223         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1224                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1225                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1226                 RTE_PTYPE_INNER_L4_UDP,
1227         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1228                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1230                 RTE_PTYPE_INNER_L4_SCTP,
1231         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1232                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1233                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1234         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1235                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1236                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1237                 RTE_PTYPE_INNER_L4_UDP,
1238         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1239                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1240                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1241                 RTE_PTYPE_INNER_L4_SCTP,
1242         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1243                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1244                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1245                 RTE_PTYPE_INNER_L3_IPV4,
1246         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1247                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1248                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1249                 RTE_PTYPE_INNER_L4_SCTP,
1250         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1251                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1252                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1253                 RTE_PTYPE_INNER_L4_SCTP,
1254         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1255                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1256                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1257                 RTE_PTYPE_INNER_L4_TCP,
1258         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1259                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1260                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1261                 RTE_PTYPE_INNER_L4_UDP,
1262
1263         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1264                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1265                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1266         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1267                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1268                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1269                 RTE_PTYPE_INNER_L3_IPV4,
1270         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1271                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1272                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1273                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1274         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1275                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1276                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1277                 RTE_PTYPE_INNER_L3_IPV6,
1278         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1279                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1280                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1281                 RTE_PTYPE_INNER_L3_IPV4,
1282         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1283                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1284                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1285                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1286         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1287                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1288                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1289                 RTE_PTYPE_INNER_L3_IPV4,
1290         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1291                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1292                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1293                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1294         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1295                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1296                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1297                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1298         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1299                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1300                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1301                 RTE_PTYPE_INNER_L3_IPV4,
1302         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1303                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1306         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1307                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1308                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1309                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1310         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1311                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1314         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1315                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1316                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1317                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1318         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1319                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1322         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1323                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1324                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1325                 RTE_PTYPE_INNER_L3_IPV4,
1326         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1327                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1330         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1331                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1334         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1335                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1336                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1337                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1338         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1339                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1342         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1343                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1344                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1345                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1346         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1347                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1348                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1349                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1350         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1351                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1352                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1353                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1354 };
1355
1356 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1357 static inline uint32_t
1358 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1359 {
1360
1361         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1362                 return RTE_PTYPE_UNKNOWN;
1363
1364         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1365
1366         /* For tunnel packet */
1367         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1368                 /* Remove the tunnel bit to save the space. */
1369                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1370                 return ptype_table_tn[pkt_info];
1371         }
1372
1373         /**
1374          * For x550, if it's not tunnel,
1375          * tunnel type bit should be set to 0.
1376          * Reuse 82599's mask.
1377          */
1378         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1379
1380         return ptype_table[pkt_info];
1381 }
1382
1383 static inline uint64_t
1384 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1385 {
1386         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1387                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1388                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1389                 PKT_RX_RSS_HASH, 0, 0, 0,
1390                 0, 0, 0,  PKT_RX_FDIR,
1391         };
1392 #ifdef RTE_LIBRTE_IEEE1588
1393         static uint64_t ip_pkt_etqf_map[8] = {
1394                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1395                 0, 0, 0, 0,
1396         };
1397
1398         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1399                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1400                                 ip_rss_types_map[pkt_info & 0XF];
1401         else
1402                 return ip_rss_types_map[pkt_info & 0XF];
1403 #else
1404         return ip_rss_types_map[pkt_info & 0XF];
1405 #endif
1406 }
1407
1408 static inline uint64_t
1409 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1410 {
1411         uint64_t pkt_flags;
1412
1413         /*
1414          * Check if VLAN present only.
1415          * Do not check whether L3/L4 rx checksum done by NIC or not,
1416          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1417          */
1418         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1419
1420 #ifdef RTE_LIBRTE_IEEE1588
1421         if (rx_status & IXGBE_RXD_STAT_TMST)
1422                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1423 #endif
1424         return pkt_flags;
1425 }
1426
1427 static inline uint64_t
1428 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1429 {
1430         uint64_t pkt_flags;
1431
1432         /*
1433          * Bit 31: IPE, IPv4 checksum error
1434          * Bit 30: L4I, L4I integrity error
1435          */
1436         static uint64_t error_to_pkt_flags_map[4] = {
1437                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1438                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1439                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1440                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1441         };
1442         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1443                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1444
1445         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1446             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1447                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1448         }
1449
1450         return pkt_flags;
1451 }
1452
1453 /*
1454  * LOOK_AHEAD defines how many desc statuses to check beyond the
1455  * current descriptor.
1456  * It must be a pound define for optimal performance.
1457  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1458  * function only works with LOOK_AHEAD=8.
1459  */
1460 #define LOOK_AHEAD 8
1461 #if (LOOK_AHEAD != 8)
1462 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1463 #endif
1464 static inline int
1465 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1466 {
1467         volatile union ixgbe_adv_rx_desc *rxdp;
1468         struct ixgbe_rx_entry *rxep;
1469         struct rte_mbuf *mb;
1470         uint16_t pkt_len;
1471         uint64_t pkt_flags;
1472         int nb_dd;
1473         uint32_t s[LOOK_AHEAD];
1474         uint32_t pkt_info[LOOK_AHEAD];
1475         int i, j, nb_rx = 0;
1476         uint32_t status;
1477         uint64_t vlan_flags = rxq->vlan_flags;
1478
1479         /* get references to current descriptor and S/W ring entry */
1480         rxdp = &rxq->rx_ring[rxq->rx_tail];
1481         rxep = &rxq->sw_ring[rxq->rx_tail];
1482
1483         status = rxdp->wb.upper.status_error;
1484         /* check to make sure there is at least 1 packet to receive */
1485         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1486                 return 0;
1487
1488         /*
1489          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1490          * reference packets that are ready to be received.
1491          */
1492         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1493              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1494                 /* Read desc statuses backwards to avoid race condition */
1495                 for (j = 0; j < LOOK_AHEAD; j++)
1496                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1497
1498                 rte_smp_rmb();
1499
1500                 /* Compute how many status bits were set */
1501                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1502                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1503                         ;
1504
1505                 for (j = 0; j < nb_dd; j++)
1506                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1507                                                        lo_dword.data);
1508
1509                 nb_rx += nb_dd;
1510
1511                 /* Translate descriptor info to mbuf format */
1512                 for (j = 0; j < nb_dd; ++j) {
1513                         mb = rxep[j].mbuf;
1514                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1515                                   rxq->crc_len;
1516                         mb->data_len = pkt_len;
1517                         mb->pkt_len = pkt_len;
1518                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1519
1520                         /* convert descriptor fields to rte mbuf flags */
1521                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1522                                 vlan_flags);
1523                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1524                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1525                                         ((uint16_t)pkt_info[j]);
1526                         mb->ol_flags = pkt_flags;
1527                         mb->packet_type =
1528                                 ixgbe_rxd_pkt_info_to_pkt_type
1529                                         (pkt_info[j], rxq->pkt_type_mask);
1530
1531                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1532                                 mb->hash.rss = rte_le_to_cpu_32(
1533                                     rxdp[j].wb.lower.hi_dword.rss);
1534                         else if (pkt_flags & PKT_RX_FDIR) {
1535                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1536                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1537                                     IXGBE_ATR_HASH_MASK;
1538                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1539                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1540                         }
1541                 }
1542
1543                 /* Move mbuf pointers from the S/W ring to the stage */
1544                 for (j = 0; j < LOOK_AHEAD; ++j) {
1545                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1546                 }
1547
1548                 /* stop if all requested packets could not be received */
1549                 if (nb_dd != LOOK_AHEAD)
1550                         break;
1551         }
1552
1553         /* clear software ring entries so we can cleanup correctly */
1554         for (i = 0; i < nb_rx; ++i) {
1555                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1556         }
1557
1558
1559         return nb_rx;
1560 }
1561
1562 static inline int
1563 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1564 {
1565         volatile union ixgbe_adv_rx_desc *rxdp;
1566         struct ixgbe_rx_entry *rxep;
1567         struct rte_mbuf *mb;
1568         uint16_t alloc_idx;
1569         __le64 dma_addr;
1570         int diag, i;
1571
1572         /* allocate buffers in bulk directly into the S/W ring */
1573         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1574         rxep = &rxq->sw_ring[alloc_idx];
1575         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1576                                     rxq->rx_free_thresh);
1577         if (unlikely(diag != 0))
1578                 return -ENOMEM;
1579
1580         rxdp = &rxq->rx_ring[alloc_idx];
1581         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1582                 /* populate the static rte mbuf fields */
1583                 mb = rxep[i].mbuf;
1584                 if (reset_mbuf) {
1585                         mb->port = rxq->port_id;
1586                 }
1587
1588                 rte_mbuf_refcnt_set(mb, 1);
1589                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1590
1591                 /* populate the descriptors */
1592                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1593                 rxdp[i].read.hdr_addr = 0;
1594                 rxdp[i].read.pkt_addr = dma_addr;
1595         }
1596
1597         /* update state of internal queue structure */
1598         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1599         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1600                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1601
1602         /* no errors */
1603         return 0;
1604 }
1605
1606 static inline uint16_t
1607 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1608                          uint16_t nb_pkts)
1609 {
1610         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1611         int i;
1612
1613         /* how many packets are ready to return? */
1614         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1615
1616         /* copy mbuf pointers to the application's packet list */
1617         for (i = 0; i < nb_pkts; ++i)
1618                 rx_pkts[i] = stage[i];
1619
1620         /* update internal queue state */
1621         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1622         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1623
1624         return nb_pkts;
1625 }
1626
1627 static inline uint16_t
1628 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1629              uint16_t nb_pkts)
1630 {
1631         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1632         uint16_t nb_rx = 0;
1633
1634         /* Any previously recv'd pkts will be returned from the Rx stage */
1635         if (rxq->rx_nb_avail)
1636                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1637
1638         /* Scan the H/W ring for packets to receive */
1639         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1640
1641         /* update internal queue state */
1642         rxq->rx_next_avail = 0;
1643         rxq->rx_nb_avail = nb_rx;
1644         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1645
1646         /* if required, allocate new buffers to replenish descriptors */
1647         if (rxq->rx_tail > rxq->rx_free_trigger) {
1648                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1649
1650                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1651                         int i, j;
1652
1653                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1654                                    "queue_id=%u", (unsigned) rxq->port_id,
1655                                    (unsigned) rxq->queue_id);
1656
1657                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1658                                 rxq->rx_free_thresh;
1659
1660                         /*
1661                          * Need to rewind any previous receives if we cannot
1662                          * allocate new buffers to replenish the old ones.
1663                          */
1664                         rxq->rx_nb_avail = 0;
1665                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1666                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1667                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1668
1669                         return 0;
1670                 }
1671
1672                 /* update tail pointer */
1673                 rte_wmb();
1674                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1675                                             cur_free_trigger);
1676         }
1677
1678         if (rxq->rx_tail >= rxq->nb_rx_desc)
1679                 rxq->rx_tail = 0;
1680
1681         /* received any packets this loop? */
1682         if (rxq->rx_nb_avail)
1683                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1684
1685         return 0;
1686 }
1687
1688 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1689 uint16_t
1690 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1691                            uint16_t nb_pkts)
1692 {
1693         uint16_t nb_rx;
1694
1695         if (unlikely(nb_pkts == 0))
1696                 return 0;
1697
1698         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1699                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1700
1701         /* request is relatively large, chunk it up */
1702         nb_rx = 0;
1703         while (nb_pkts) {
1704                 uint16_t ret, n;
1705
1706                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1707                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1708                 nb_rx = (uint16_t)(nb_rx + ret);
1709                 nb_pkts = (uint16_t)(nb_pkts - ret);
1710                 if (ret < n)
1711                         break;
1712         }
1713
1714         return nb_rx;
1715 }
1716
1717 uint16_t
1718 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1719                 uint16_t nb_pkts)
1720 {
1721         struct ixgbe_rx_queue *rxq;
1722         volatile union ixgbe_adv_rx_desc *rx_ring;
1723         volatile union ixgbe_adv_rx_desc *rxdp;
1724         struct ixgbe_rx_entry *sw_ring;
1725         struct ixgbe_rx_entry *rxe;
1726         struct rte_mbuf *rxm;
1727         struct rte_mbuf *nmb;
1728         union ixgbe_adv_rx_desc rxd;
1729         uint64_t dma_addr;
1730         uint32_t staterr;
1731         uint32_t pkt_info;
1732         uint16_t pkt_len;
1733         uint16_t rx_id;
1734         uint16_t nb_rx;
1735         uint16_t nb_hold;
1736         uint64_t pkt_flags;
1737         uint64_t vlan_flags;
1738
1739         nb_rx = 0;
1740         nb_hold = 0;
1741         rxq = rx_queue;
1742         rx_id = rxq->rx_tail;
1743         rx_ring = rxq->rx_ring;
1744         sw_ring = rxq->sw_ring;
1745         vlan_flags = rxq->vlan_flags;
1746         while (nb_rx < nb_pkts) {
1747                 /*
1748                  * The order of operations here is important as the DD status
1749                  * bit must not be read after any other descriptor fields.
1750                  * rx_ring and rxdp are pointing to volatile data so the order
1751                  * of accesses cannot be reordered by the compiler. If they were
1752                  * not volatile, they could be reordered which could lead to
1753                  * using invalid descriptor fields when read from rxd.
1754                  */
1755                 rxdp = &rx_ring[rx_id];
1756                 staterr = rxdp->wb.upper.status_error;
1757                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1758                         break;
1759                 rxd = *rxdp;
1760
1761                 /*
1762                  * End of packet.
1763                  *
1764                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1765                  * is likely to be invalid and to be dropped by the various
1766                  * validation checks performed by the network stack.
1767                  *
1768                  * Allocate a new mbuf to replenish the RX ring descriptor.
1769                  * If the allocation fails:
1770                  *    - arrange for that RX descriptor to be the first one
1771                  *      being parsed the next time the receive function is
1772                  *      invoked [on the same queue].
1773                  *
1774                  *    - Stop parsing the RX ring and return immediately.
1775                  *
1776                  * This policy do not drop the packet received in the RX
1777                  * descriptor for which the allocation of a new mbuf failed.
1778                  * Thus, it allows that packet to be later retrieved if
1779                  * mbuf have been freed in the mean time.
1780                  * As a side effect, holding RX descriptors instead of
1781                  * systematically giving them back to the NIC may lead to
1782                  * RX ring exhaustion situations.
1783                  * However, the NIC can gracefully prevent such situations
1784                  * to happen by sending specific "back-pressure" flow control
1785                  * frames to its peer(s).
1786                  */
1787                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1788                            "ext_err_stat=0x%08x pkt_len=%u",
1789                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1790                            (unsigned) rx_id, (unsigned) staterr,
1791                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1792
1793                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1794                 if (nmb == NULL) {
1795                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1796                                    "queue_id=%u", (unsigned) rxq->port_id,
1797                                    (unsigned) rxq->queue_id);
1798                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1799                         break;
1800                 }
1801
1802                 nb_hold++;
1803                 rxe = &sw_ring[rx_id];
1804                 rx_id++;
1805                 if (rx_id == rxq->nb_rx_desc)
1806                         rx_id = 0;
1807
1808                 /* Prefetch next mbuf while processing current one. */
1809                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1810
1811                 /*
1812                  * When next RX descriptor is on a cache-line boundary,
1813                  * prefetch the next 4 RX descriptors and the next 8 pointers
1814                  * to mbufs.
1815                  */
1816                 if ((rx_id & 0x3) == 0) {
1817                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1818                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1819                 }
1820
1821                 rxm = rxe->mbuf;
1822                 rxe->mbuf = nmb;
1823                 dma_addr =
1824                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1825                 rxdp->read.hdr_addr = 0;
1826                 rxdp->read.pkt_addr = dma_addr;
1827
1828                 /*
1829                  * Initialize the returned mbuf.
1830                  * 1) setup generic mbuf fields:
1831                  *    - number of segments,
1832                  *    - next segment,
1833                  *    - packet length,
1834                  *    - RX port identifier.
1835                  * 2) integrate hardware offload data, if any:
1836                  *    - RSS flag & hash,
1837                  *    - IP checksum flag,
1838                  *    - VLAN TCI, if any,
1839                  *    - error flags.
1840                  */
1841                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1842                                       rxq->crc_len);
1843                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1844                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1845                 rxm->nb_segs = 1;
1846                 rxm->next = NULL;
1847                 rxm->pkt_len = pkt_len;
1848                 rxm->data_len = pkt_len;
1849                 rxm->port = rxq->port_id;
1850
1851                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1852                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1853                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1854
1855                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1856                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1857                 pkt_flags = pkt_flags |
1858                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1859                 rxm->ol_flags = pkt_flags;
1860                 rxm->packet_type =
1861                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1862                                                        rxq->pkt_type_mask);
1863
1864                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1865                         rxm->hash.rss = rte_le_to_cpu_32(
1866                                                 rxd.wb.lower.hi_dword.rss);
1867                 else if (pkt_flags & PKT_RX_FDIR) {
1868                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1869                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1870                                         IXGBE_ATR_HASH_MASK;
1871                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1872                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1873                 }
1874                 /*
1875                  * Store the mbuf address into the next entry of the array
1876                  * of returned packets.
1877                  */
1878                 rx_pkts[nb_rx++] = rxm;
1879         }
1880         rxq->rx_tail = rx_id;
1881
1882         /*
1883          * If the number of free RX descriptors is greater than the RX free
1884          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1885          * register.
1886          * Update the RDT with the value of the last processed RX descriptor
1887          * minus 1, to guarantee that the RDT register is never equal to the
1888          * RDH register, which creates a "full" ring situtation from the
1889          * hardware point of view...
1890          */
1891         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1892         if (nb_hold > rxq->rx_free_thresh) {
1893                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1894                            "nb_hold=%u nb_rx=%u",
1895                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1896                            (unsigned) rx_id, (unsigned) nb_hold,
1897                            (unsigned) nb_rx);
1898                 rx_id = (uint16_t) ((rx_id == 0) ?
1899                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1900                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1901                 nb_hold = 0;
1902         }
1903         rxq->nb_rx_hold = nb_hold;
1904         return nb_rx;
1905 }
1906
1907 /**
1908  * Detect an RSC descriptor.
1909  */
1910 static inline uint32_t
1911 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1912 {
1913         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1914                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1915 }
1916
1917 /**
1918  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1919  *
1920  * Fill the following info in the HEAD buffer of the Rx cluster:
1921  *    - RX port identifier
1922  *    - hardware offload data, if any:
1923  *      - RSS flag & hash
1924  *      - IP checksum flag
1925  *      - VLAN TCI, if any
1926  *      - error flags
1927  * @head HEAD of the packet cluster
1928  * @desc HW descriptor to get data from
1929  * @rxq Pointer to the Rx queue
1930  */
1931 static inline void
1932 ixgbe_fill_cluster_head_buf(
1933         struct rte_mbuf *head,
1934         union ixgbe_adv_rx_desc *desc,
1935         struct ixgbe_rx_queue *rxq,
1936         uint32_t staterr)
1937 {
1938         uint32_t pkt_info;
1939         uint64_t pkt_flags;
1940
1941         head->port = rxq->port_id;
1942
1943         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1944          * set in the pkt_flags field.
1945          */
1946         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1947         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1948         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1949         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1950         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1951         head->ol_flags = pkt_flags;
1952         head->packet_type =
1953                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1954
1955         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1956                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1957         else if (pkt_flags & PKT_RX_FDIR) {
1958                 head->hash.fdir.hash =
1959                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1960                                                           & IXGBE_ATR_HASH_MASK;
1961                 head->hash.fdir.id =
1962                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1963         }
1964 }
1965
1966 /**
1967  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1968  *
1969  * @rx_queue Rx queue handle
1970  * @rx_pkts table of received packets
1971  * @nb_pkts size of rx_pkts table
1972  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1973  *
1974  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1975  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1976  *
1977  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1978  * 1) When non-EOP RSC completion arrives:
1979  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1980  *       segment's data length.
1981  *    b) Set the "next" pointer of the current segment to point to the segment
1982  *       at the NEXTP index.
1983  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1984  *       in the sw_rsc_ring.
1985  * 2) When EOP arrives we just update the cluster's total length and offload
1986  *    flags and deliver the cluster up to the upper layers. In our case - put it
1987  *    in the rx_pkts table.
1988  *
1989  * Returns the number of received packets/clusters (according to the "bulk
1990  * receive" interface).
1991  */
1992 static inline uint16_t
1993 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1994                     bool bulk_alloc)
1995 {
1996         struct ixgbe_rx_queue *rxq = rx_queue;
1997         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1998         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1999         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2000         uint16_t rx_id = rxq->rx_tail;
2001         uint16_t nb_rx = 0;
2002         uint16_t nb_hold = rxq->nb_rx_hold;
2003         uint16_t prev_id = rxq->rx_tail;
2004
2005         while (nb_rx < nb_pkts) {
2006                 bool eop;
2007                 struct ixgbe_rx_entry *rxe;
2008                 struct ixgbe_scattered_rx_entry *sc_entry;
2009                 struct ixgbe_scattered_rx_entry *next_sc_entry;
2010                 struct ixgbe_rx_entry *next_rxe = NULL;
2011                 struct rte_mbuf *first_seg;
2012                 struct rte_mbuf *rxm;
2013                 struct rte_mbuf *nmb;
2014                 union ixgbe_adv_rx_desc rxd;
2015                 uint16_t data_len;
2016                 uint16_t next_id;
2017                 volatile union ixgbe_adv_rx_desc *rxdp;
2018                 uint32_t staterr;
2019
2020 next_desc:
2021                 /*
2022                  * The code in this whole file uses the volatile pointer to
2023                  * ensure the read ordering of the status and the rest of the
2024                  * descriptor fields (on the compiler level only!!!). This is so
2025                  * UGLY - why not to just use the compiler barrier instead? DPDK
2026                  * even has the rte_compiler_barrier() for that.
2027                  *
2028                  * But most importantly this is just wrong because this doesn't
2029                  * ensure memory ordering in a general case at all. For
2030                  * instance, DPDK is supposed to work on Power CPUs where
2031                  * compiler barrier may just not be enough!
2032                  *
2033                  * I tried to write only this function properly to have a
2034                  * starting point (as a part of an LRO/RSC series) but the
2035                  * compiler cursed at me when I tried to cast away the
2036                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2037                  * keeping it the way it is for now.
2038                  *
2039                  * The code in this file is broken in so many other places and
2040                  * will just not work on a big endian CPU anyway therefore the
2041                  * lines below will have to be revisited together with the rest
2042                  * of the ixgbe PMD.
2043                  *
2044                  * TODO:
2045                  *    - Get rid of "volatile" crap and let the compiler do its
2046                  *      job.
2047                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2048                  *      memory ordering below.
2049                  */
2050                 rxdp = &rx_ring[rx_id];
2051                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2052
2053                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2054                         break;
2055
2056                 rxd = *rxdp;
2057
2058                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2059                                   "staterr=0x%x data_len=%u",
2060                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2061                            rte_le_to_cpu_16(rxd.wb.upper.length));
2062
2063                 if (!bulk_alloc) {
2064                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2065                         if (nmb == NULL) {
2066                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2067                                                   "port_id=%u queue_id=%u",
2068                                            rxq->port_id, rxq->queue_id);
2069
2070                                 rte_eth_devices[rxq->port_id].data->
2071                                                         rx_mbuf_alloc_failed++;
2072                                 break;
2073                         }
2074                 } else if (nb_hold > rxq->rx_free_thresh) {
2075                         uint16_t next_rdt = rxq->rx_free_trigger;
2076
2077                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2078                                 rte_wmb();
2079                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2080                                                             next_rdt);
2081                                 nb_hold -= rxq->rx_free_thresh;
2082                         } else {
2083                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2084                                                   "port_id=%u queue_id=%u",
2085                                            rxq->port_id, rxq->queue_id);
2086
2087                                 rte_eth_devices[rxq->port_id].data->
2088                                                         rx_mbuf_alloc_failed++;
2089                                 break;
2090                         }
2091                 }
2092
2093                 nb_hold++;
2094                 rxe = &sw_ring[rx_id];
2095                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2096
2097                 next_id = rx_id + 1;
2098                 if (next_id == rxq->nb_rx_desc)
2099                         next_id = 0;
2100
2101                 /* Prefetch next mbuf while processing current one. */
2102                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2103
2104                 /*
2105                  * When next RX descriptor is on a cache-line boundary,
2106                  * prefetch the next 4 RX descriptors and the next 4 pointers
2107                  * to mbufs.
2108                  */
2109                 if ((next_id & 0x3) == 0) {
2110                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2111                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2112                 }
2113
2114                 rxm = rxe->mbuf;
2115
2116                 if (!bulk_alloc) {
2117                         __le64 dma =
2118                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2119                         /*
2120                          * Update RX descriptor with the physical address of the
2121                          * new data buffer of the new allocated mbuf.
2122                          */
2123                         rxe->mbuf = nmb;
2124
2125                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2126                         rxdp->read.hdr_addr = 0;
2127                         rxdp->read.pkt_addr = dma;
2128                 } else
2129                         rxe->mbuf = NULL;
2130
2131                 /*
2132                  * Set data length & data buffer address of mbuf.
2133                  */
2134                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2135                 rxm->data_len = data_len;
2136
2137                 if (!eop) {
2138                         uint16_t nextp_id;
2139                         /*
2140                          * Get next descriptor index:
2141                          *  - For RSC it's in the NEXTP field.
2142                          *  - For a scattered packet - it's just a following
2143                          *    descriptor.
2144                          */
2145                         if (ixgbe_rsc_count(&rxd))
2146                                 nextp_id =
2147                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2148                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2149                         else
2150                                 nextp_id = next_id;
2151
2152                         next_sc_entry = &sw_sc_ring[nextp_id];
2153                         next_rxe = &sw_ring[nextp_id];
2154                         rte_ixgbe_prefetch(next_rxe);
2155                 }
2156
2157                 sc_entry = &sw_sc_ring[rx_id];
2158                 first_seg = sc_entry->fbuf;
2159                 sc_entry->fbuf = NULL;
2160
2161                 /*
2162                  * If this is the first buffer of the received packet,
2163                  * set the pointer to the first mbuf of the packet and
2164                  * initialize its context.
2165                  * Otherwise, update the total length and the number of segments
2166                  * of the current scattered packet, and update the pointer to
2167                  * the last mbuf of the current packet.
2168                  */
2169                 if (first_seg == NULL) {
2170                         first_seg = rxm;
2171                         first_seg->pkt_len = data_len;
2172                         first_seg->nb_segs = 1;
2173                 } else {
2174                         first_seg->pkt_len += data_len;
2175                         first_seg->nb_segs++;
2176                 }
2177
2178                 prev_id = rx_id;
2179                 rx_id = next_id;
2180
2181                 /*
2182                  * If this is not the last buffer of the received packet, update
2183                  * the pointer to the first mbuf at the NEXTP entry in the
2184                  * sw_sc_ring and continue to parse the RX ring.
2185                  */
2186                 if (!eop && next_rxe) {
2187                         rxm->next = next_rxe->mbuf;
2188                         next_sc_entry->fbuf = first_seg;
2189                         goto next_desc;
2190                 }
2191
2192                 /* Initialize the first mbuf of the returned packet */
2193                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2194
2195                 /*
2196                  * Deal with the case, when HW CRC srip is disabled.
2197                  * That can't happen when LRO is enabled, but still could
2198                  * happen for scattered RX mode.
2199                  */
2200                 first_seg->pkt_len -= rxq->crc_len;
2201                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2202                         struct rte_mbuf *lp;
2203
2204                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2205                                 ;
2206
2207                         first_seg->nb_segs--;
2208                         lp->data_len -= rxq->crc_len - rxm->data_len;
2209                         lp->next = NULL;
2210                         rte_pktmbuf_free_seg(rxm);
2211                 } else
2212                         rxm->data_len -= rxq->crc_len;
2213
2214                 /* Prefetch data of first segment, if configured to do so. */
2215                 rte_packet_prefetch((char *)first_seg->buf_addr +
2216                         first_seg->data_off);
2217
2218                 /*
2219                  * Store the mbuf address into the next entry of the array
2220                  * of returned packets.
2221                  */
2222                 rx_pkts[nb_rx++] = first_seg;
2223         }
2224
2225         /*
2226          * Record index of the next RX descriptor to probe.
2227          */
2228         rxq->rx_tail = rx_id;
2229
2230         /*
2231          * If the number of free RX descriptors is greater than the RX free
2232          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2233          * register.
2234          * Update the RDT with the value of the last processed RX descriptor
2235          * minus 1, to guarantee that the RDT register is never equal to the
2236          * RDH register, which creates a "full" ring situtation from the
2237          * hardware point of view...
2238          */
2239         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2240                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2241                            "nb_hold=%u nb_rx=%u",
2242                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2243
2244                 rte_wmb();
2245                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2246                 nb_hold = 0;
2247         }
2248
2249         rxq->nb_rx_hold = nb_hold;
2250         return nb_rx;
2251 }
2252
2253 uint16_t
2254 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2255                                  uint16_t nb_pkts)
2256 {
2257         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2258 }
2259
2260 uint16_t
2261 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2262                                uint16_t nb_pkts)
2263 {
2264         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2265 }
2266
2267 /*********************************************************************
2268  *
2269  *  Queue management functions
2270  *
2271  **********************************************************************/
2272
2273 static void __attribute__((cold))
2274 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2275 {
2276         unsigned i;
2277
2278         if (txq->sw_ring != NULL) {
2279                 for (i = 0; i < txq->nb_tx_desc; i++) {
2280                         if (txq->sw_ring[i].mbuf != NULL) {
2281                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2282                                 txq->sw_ring[i].mbuf = NULL;
2283                         }
2284                 }
2285         }
2286 }
2287
2288 static void __attribute__((cold))
2289 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2290 {
2291         if (txq != NULL &&
2292             txq->sw_ring != NULL)
2293                 rte_free(txq->sw_ring);
2294 }
2295
2296 static void __attribute__((cold))
2297 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2298 {
2299         if (txq != NULL && txq->ops != NULL) {
2300                 txq->ops->release_mbufs(txq);
2301                 txq->ops->free_swring(txq);
2302                 rte_free(txq);
2303         }
2304 }
2305
2306 void __attribute__((cold))
2307 ixgbe_dev_tx_queue_release(void *txq)
2308 {
2309         ixgbe_tx_queue_release(txq);
2310 }
2311
2312 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2313 static void __attribute__((cold))
2314 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2315 {
2316         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2317         struct ixgbe_tx_entry *txe = txq->sw_ring;
2318         uint16_t prev, i;
2319
2320         /* Zero out HW ring memory */
2321         for (i = 0; i < txq->nb_tx_desc; i++) {
2322                 txq->tx_ring[i] = zeroed_desc;
2323         }
2324
2325         /* Initialize SW ring entries */
2326         prev = (uint16_t) (txq->nb_tx_desc - 1);
2327         for (i = 0; i < txq->nb_tx_desc; i++) {
2328                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2329
2330                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2331                 txe[i].mbuf = NULL;
2332                 txe[i].last_id = i;
2333                 txe[prev].next_id = i;
2334                 prev = i;
2335         }
2336
2337         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2338         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2339
2340         txq->tx_tail = 0;
2341         txq->nb_tx_used = 0;
2342         /*
2343          * Always allow 1 descriptor to be un-allocated to avoid
2344          * a H/W race condition
2345          */
2346         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2347         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2348         txq->ctx_curr = 0;
2349         memset((void *)&txq->ctx_cache, 0,
2350                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2351 }
2352
2353 static const struct ixgbe_txq_ops def_txq_ops = {
2354         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2355         .free_swring = ixgbe_tx_free_swring,
2356         .reset = ixgbe_reset_tx_queue,
2357 };
2358
2359 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2360  * the queue parameters. Used in tx_queue_setup by primary process and then
2361  * in dev_init by secondary process when attaching to an existing ethdev.
2362  */
2363 void __attribute__((cold))
2364 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2365 {
2366         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2367         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2368                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2369                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2370                 dev->tx_pkt_prepare = NULL;
2371 #ifdef RTE_IXGBE_INC_VECTOR
2372                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2373                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2374                                         ixgbe_txq_vec_setup(txq) == 0)) {
2375                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2376                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2377                 } else
2378 #endif
2379                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2380         } else {
2381                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2382                 PMD_INIT_LOG(DEBUG,
2383                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2384                                 (unsigned long)txq->txq_flags,
2385                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2386                 PMD_INIT_LOG(DEBUG,
2387                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2388                                 (unsigned long)txq->tx_rs_thresh,
2389                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2390                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2391                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2392         }
2393 }
2394
2395 int __attribute__((cold))
2396 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2397                          uint16_t queue_idx,
2398                          uint16_t nb_desc,
2399                          unsigned int socket_id,
2400                          const struct rte_eth_txconf *tx_conf)
2401 {
2402         const struct rte_memzone *tz;
2403         struct ixgbe_tx_queue *txq;
2404         struct ixgbe_hw     *hw;
2405         uint16_t tx_rs_thresh, tx_free_thresh;
2406
2407         PMD_INIT_FUNC_TRACE();
2408         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2409
2410         /*
2411          * Validate number of transmit descriptors.
2412          * It must not exceed hardware maximum, and must be multiple
2413          * of IXGBE_ALIGN.
2414          */
2415         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2416                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2417                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2418                 return -EINVAL;
2419         }
2420
2421         /*
2422          * The following two parameters control the setting of the RS bit on
2423          * transmit descriptors.
2424          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2425          * descriptors have been used.
2426          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2427          * descriptors are used or if the number of descriptors required
2428          * to transmit a packet is greater than the number of free TX
2429          * descriptors.
2430          * The following constraints must be satisfied:
2431          *  tx_rs_thresh must be greater than 0.
2432          *  tx_rs_thresh must be less than the size of the ring minus 2.
2433          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2434          *  tx_rs_thresh must be a divisor of the ring size.
2435          *  tx_free_thresh must be greater than 0.
2436          *  tx_free_thresh must be less than the size of the ring minus 3.
2437          * One descriptor in the TX ring is used as a sentinel to avoid a
2438          * H/W race condition, hence the maximum threshold constraints.
2439          * When set to zero use default values.
2440          */
2441         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2442                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2443         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2444                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2445         if (tx_rs_thresh >= (nb_desc - 2)) {
2446                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2447                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2448                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2449                         (int)dev->data->port_id, (int)queue_idx);
2450                 return -(EINVAL);
2451         }
2452         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2453                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2454                         "(tx_rs_thresh=%u port=%d queue=%d)",
2455                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2456                         (int)dev->data->port_id, (int)queue_idx);
2457                 return -(EINVAL);
2458         }
2459         if (tx_free_thresh >= (nb_desc - 3)) {
2460                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2461                              "tx_free_thresh must be less than the number of "
2462                              "TX descriptors minus 3. (tx_free_thresh=%u "
2463                              "port=%d queue=%d)",
2464                              (unsigned int)tx_free_thresh,
2465                              (int)dev->data->port_id, (int)queue_idx);
2466                 return -(EINVAL);
2467         }
2468         if (tx_rs_thresh > tx_free_thresh) {
2469                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2470                              "tx_free_thresh. (tx_free_thresh=%u "
2471                              "tx_rs_thresh=%u port=%d queue=%d)",
2472                              (unsigned int)tx_free_thresh,
2473                              (unsigned int)tx_rs_thresh,
2474                              (int)dev->data->port_id,
2475                              (int)queue_idx);
2476                 return -(EINVAL);
2477         }
2478         if ((nb_desc % tx_rs_thresh) != 0) {
2479                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2480                              "number of TX descriptors. (tx_rs_thresh=%u "
2481                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2482                              (int)dev->data->port_id, (int)queue_idx);
2483                 return -(EINVAL);
2484         }
2485
2486         /*
2487          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2488          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2489          * by the NIC and all descriptors are written back after the NIC
2490          * accumulates WTHRESH descriptors.
2491          */
2492         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2493                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2494                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2495                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2496                              (int)dev->data->port_id, (int)queue_idx);
2497                 return -(EINVAL);
2498         }
2499
2500         /* Free memory prior to re-allocation if needed... */
2501         if (dev->data->tx_queues[queue_idx] != NULL) {
2502                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2503                 dev->data->tx_queues[queue_idx] = NULL;
2504         }
2505
2506         /* First allocate the tx queue data structure */
2507         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2508                                  RTE_CACHE_LINE_SIZE, socket_id);
2509         if (txq == NULL)
2510                 return -ENOMEM;
2511
2512         /*
2513          * Allocate TX ring hardware descriptors. A memzone large enough to
2514          * handle the maximum ring size is allocated in order to allow for
2515          * resizing in later calls to the queue setup function.
2516          */
2517         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2518                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2519                         IXGBE_ALIGN, socket_id);
2520         if (tz == NULL) {
2521                 ixgbe_tx_queue_release(txq);
2522                 return -ENOMEM;
2523         }
2524
2525         txq->nb_tx_desc = nb_desc;
2526         txq->tx_rs_thresh = tx_rs_thresh;
2527         txq->tx_free_thresh = tx_free_thresh;
2528         txq->pthresh = tx_conf->tx_thresh.pthresh;
2529         txq->hthresh = tx_conf->tx_thresh.hthresh;
2530         txq->wthresh = tx_conf->tx_thresh.wthresh;
2531         txq->queue_id = queue_idx;
2532         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2533                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2534         txq->port_id = dev->data->port_id;
2535         txq->txq_flags = tx_conf->txq_flags;
2536         txq->ops = &def_txq_ops;
2537         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2538
2539         /*
2540          * Modification to set VFTDT for virtual function if vf is detected
2541          */
2542         if (hw->mac.type == ixgbe_mac_82599_vf ||
2543             hw->mac.type == ixgbe_mac_X540_vf ||
2544             hw->mac.type == ixgbe_mac_X550_vf ||
2545             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2546             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2547                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2548         else
2549                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2550
2551         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2552         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2553
2554         /* Allocate software ring */
2555         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2556                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2557                                 RTE_CACHE_LINE_SIZE, socket_id);
2558         if (txq->sw_ring == NULL) {
2559                 ixgbe_tx_queue_release(txq);
2560                 return -ENOMEM;
2561         }
2562         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2563                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2564
2565         /* set up vector or scalar TX function as appropriate */
2566         ixgbe_set_tx_function(dev, txq);
2567
2568         txq->ops->reset(txq);
2569
2570         dev->data->tx_queues[queue_idx] = txq;
2571
2572
2573         return 0;
2574 }
2575
2576 /**
2577  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2578  *
2579  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2580  * in the sw_rsc_ring is not set to NULL but rather points to the next
2581  * mbuf of this RSC aggregation (that has not been completed yet and still
2582  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2583  * will just free first "nb_segs" segments of the cluster explicitly by calling
2584  * an rte_pktmbuf_free_seg().
2585  *
2586  * @m scattered cluster head
2587  */
2588 static void __attribute__((cold))
2589 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2590 {
2591         uint8_t i, nb_segs = m->nb_segs;
2592         struct rte_mbuf *next_seg;
2593
2594         for (i = 0; i < nb_segs; i++) {
2595                 next_seg = m->next;
2596                 rte_pktmbuf_free_seg(m);
2597                 m = next_seg;
2598         }
2599 }
2600
2601 static void __attribute__((cold))
2602 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2603 {
2604         unsigned i;
2605
2606 #ifdef RTE_IXGBE_INC_VECTOR
2607         /* SSE Vector driver has a different way of releasing mbufs. */
2608         if (rxq->rx_using_sse) {
2609                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2610                 return;
2611         }
2612 #endif
2613
2614         if (rxq->sw_ring != NULL) {
2615                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2616                         if (rxq->sw_ring[i].mbuf != NULL) {
2617                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2618                                 rxq->sw_ring[i].mbuf = NULL;
2619                         }
2620                 }
2621                 if (rxq->rx_nb_avail) {
2622                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2623                                 struct rte_mbuf *mb;
2624
2625                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2626                                 rte_pktmbuf_free_seg(mb);
2627                         }
2628                         rxq->rx_nb_avail = 0;
2629                 }
2630         }
2631
2632         if (rxq->sw_sc_ring)
2633                 for (i = 0; i < rxq->nb_rx_desc; i++)
2634                         if (rxq->sw_sc_ring[i].fbuf) {
2635                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2636                                 rxq->sw_sc_ring[i].fbuf = NULL;
2637                         }
2638 }
2639
2640 static void __attribute__((cold))
2641 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2642 {
2643         if (rxq != NULL) {
2644                 ixgbe_rx_queue_release_mbufs(rxq);
2645                 rte_free(rxq->sw_ring);
2646                 rte_free(rxq->sw_sc_ring);
2647                 rte_free(rxq);
2648         }
2649 }
2650
2651 void __attribute__((cold))
2652 ixgbe_dev_rx_queue_release(void *rxq)
2653 {
2654         ixgbe_rx_queue_release(rxq);
2655 }
2656
2657 /*
2658  * Check if Rx Burst Bulk Alloc function can be used.
2659  * Return
2660  *        0: the preconditions are satisfied and the bulk allocation function
2661  *           can be used.
2662  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2663  *           function must be used.
2664  */
2665 static inline int __attribute__((cold))
2666 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2667 {
2668         int ret = 0;
2669
2670         /*
2671          * Make sure the following pre-conditions are satisfied:
2672          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2673          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2674          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2675          * Scattered packets are not supported.  This should be checked
2676          * outside of this function.
2677          */
2678         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2679                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2680                              "rxq->rx_free_thresh=%d, "
2681                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2682                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2683                 ret = -EINVAL;
2684         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2685                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2686                              "rxq->rx_free_thresh=%d, "
2687                              "rxq->nb_rx_desc=%d",
2688                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2689                 ret = -EINVAL;
2690         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2691                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2692                              "rxq->nb_rx_desc=%d, "
2693                              "rxq->rx_free_thresh=%d",
2694                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2695                 ret = -EINVAL;
2696         }
2697
2698         return ret;
2699 }
2700
2701 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2702 static void __attribute__((cold))
2703 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2704 {
2705         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2706         unsigned i;
2707         uint16_t len = rxq->nb_rx_desc;
2708
2709         /*
2710          * By default, the Rx queue setup function allocates enough memory for
2711          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2712          * extra memory at the end of the descriptor ring to be zero'd out.
2713          */
2714         if (adapter->rx_bulk_alloc_allowed)
2715                 /* zero out extra memory */
2716                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2717
2718         /*
2719          * Zero out HW ring memory. Zero out extra memory at the end of
2720          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2721          * reads extra memory as zeros.
2722          */
2723         for (i = 0; i < len; i++) {
2724                 rxq->rx_ring[i] = zeroed_desc;
2725         }
2726
2727         /*
2728          * initialize extra software ring entries. Space for these extra
2729          * entries is always allocated
2730          */
2731         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2732         for (i = rxq->nb_rx_desc; i < len; ++i) {
2733                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2734         }
2735
2736         rxq->rx_nb_avail = 0;
2737         rxq->rx_next_avail = 0;
2738         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2739         rxq->rx_tail = 0;
2740         rxq->nb_rx_hold = 0;
2741         rxq->pkt_first_seg = NULL;
2742         rxq->pkt_last_seg = NULL;
2743
2744 #ifdef RTE_IXGBE_INC_VECTOR
2745         rxq->rxrearm_start = 0;
2746         rxq->rxrearm_nb = 0;
2747 #endif
2748 }
2749
2750 int __attribute__((cold))
2751 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2752                          uint16_t queue_idx,
2753                          uint16_t nb_desc,
2754                          unsigned int socket_id,
2755                          const struct rte_eth_rxconf *rx_conf,
2756                          struct rte_mempool *mp)
2757 {
2758         const struct rte_memzone *rz;
2759         struct ixgbe_rx_queue *rxq;
2760         struct ixgbe_hw     *hw;
2761         uint16_t len;
2762         struct ixgbe_adapter *adapter =
2763                 (struct ixgbe_adapter *)dev->data->dev_private;
2764
2765         PMD_INIT_FUNC_TRACE();
2766         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2767
2768         /*
2769          * Validate number of receive descriptors.
2770          * It must not exceed hardware maximum, and must be multiple
2771          * of IXGBE_ALIGN.
2772          */
2773         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2774                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2775                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2776                 return -EINVAL;
2777         }
2778
2779         /* Free memory prior to re-allocation if needed... */
2780         if (dev->data->rx_queues[queue_idx] != NULL) {
2781                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2782                 dev->data->rx_queues[queue_idx] = NULL;
2783         }
2784
2785         /* First allocate the rx queue data structure */
2786         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2787                                  RTE_CACHE_LINE_SIZE, socket_id);
2788         if (rxq == NULL)
2789                 return -ENOMEM;
2790         rxq->mb_pool = mp;
2791         rxq->nb_rx_desc = nb_desc;
2792         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2793         rxq->queue_id = queue_idx;
2794         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2795                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2796         rxq->port_id = dev->data->port_id;
2797         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2798                                                         0 : ETHER_CRC_LEN);
2799         rxq->drop_en = rx_conf->rx_drop_en;
2800         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2801
2802         /*
2803          * The packet type in RX descriptor is different for different NICs.
2804          * Some bits are used for x550 but reserved for other NICS.
2805          * So set different masks for different NICs.
2806          */
2807         if (hw->mac.type == ixgbe_mac_X550 ||
2808             hw->mac.type == ixgbe_mac_X550EM_x ||
2809             hw->mac.type == ixgbe_mac_X550EM_a ||
2810             hw->mac.type == ixgbe_mac_X550_vf ||
2811             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2812             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2813                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2814         else
2815                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2816
2817         /*
2818          * Allocate RX ring hardware descriptors. A memzone large enough to
2819          * handle the maximum ring size is allocated in order to allow for
2820          * resizing in later calls to the queue setup function.
2821          */
2822         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2823                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2824         if (rz == NULL) {
2825                 ixgbe_rx_queue_release(rxq);
2826                 return -ENOMEM;
2827         }
2828
2829         /*
2830          * Zero init all the descriptors in the ring.
2831          */
2832         memset(rz->addr, 0, RX_RING_SZ);
2833
2834         /*
2835          * Modified to setup VFRDT for Virtual Function
2836          */
2837         if (hw->mac.type == ixgbe_mac_82599_vf ||
2838             hw->mac.type == ixgbe_mac_X540_vf ||
2839             hw->mac.type == ixgbe_mac_X550_vf ||
2840             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2841             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2842                 rxq->rdt_reg_addr =
2843                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2844                 rxq->rdh_reg_addr =
2845                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2846         } else {
2847                 rxq->rdt_reg_addr =
2848                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2849                 rxq->rdh_reg_addr =
2850                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2851         }
2852
2853         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2854         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2855
2856         /*
2857          * Certain constraints must be met in order to use the bulk buffer
2858          * allocation Rx burst function. If any of Rx queues doesn't meet them
2859          * the feature should be disabled for the whole port.
2860          */
2861         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2862                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2863                                     "preconditions - canceling the feature for "
2864                                     "the whole port[%d]",
2865                              rxq->queue_id, rxq->port_id);
2866                 adapter->rx_bulk_alloc_allowed = false;
2867         }
2868
2869         /*
2870          * Allocate software ring. Allow for space at the end of the
2871          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2872          * function does not access an invalid memory region.
2873          */
2874         len = nb_desc;
2875         if (adapter->rx_bulk_alloc_allowed)
2876                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2877
2878         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2879                                           sizeof(struct ixgbe_rx_entry) * len,
2880                                           RTE_CACHE_LINE_SIZE, socket_id);
2881         if (!rxq->sw_ring) {
2882                 ixgbe_rx_queue_release(rxq);
2883                 return -ENOMEM;
2884         }
2885
2886         /*
2887          * Always allocate even if it's not going to be needed in order to
2888          * simplify the code.
2889          *
2890          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2891          * be requested in ixgbe_dev_rx_init(), which is called later from
2892          * dev_start() flow.
2893          */
2894         rxq->sw_sc_ring =
2895                 rte_zmalloc_socket("rxq->sw_sc_ring",
2896                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2897                                    RTE_CACHE_LINE_SIZE, socket_id);
2898         if (!rxq->sw_sc_ring) {
2899                 ixgbe_rx_queue_release(rxq);
2900                 return -ENOMEM;
2901         }
2902
2903         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2904                             "dma_addr=0x%"PRIx64,
2905                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2906                      rxq->rx_ring_phys_addr);
2907
2908         if (!rte_is_power_of_2(nb_desc)) {
2909                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2910                                     "preconditions - canceling the feature for "
2911                                     "the whole port[%d]",
2912                              rxq->queue_id, rxq->port_id);
2913                 adapter->rx_vec_allowed = false;
2914         } else
2915                 ixgbe_rxq_vec_setup(rxq);
2916
2917         dev->data->rx_queues[queue_idx] = rxq;
2918
2919         ixgbe_reset_rx_queue(adapter, rxq);
2920
2921         return 0;
2922 }
2923
2924 uint32_t
2925 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2926 {
2927 #define IXGBE_RXQ_SCAN_INTERVAL 4
2928         volatile union ixgbe_adv_rx_desc *rxdp;
2929         struct ixgbe_rx_queue *rxq;
2930         uint32_t desc = 0;
2931
2932         rxq = dev->data->rx_queues[rx_queue_id];
2933         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2934
2935         while ((desc < rxq->nb_rx_desc) &&
2936                 (rxdp->wb.upper.status_error &
2937                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2938                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2939                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2940                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2941                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2942                                 desc - rxq->nb_rx_desc]);
2943         }
2944
2945         return desc;
2946 }
2947
2948 int
2949 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2950 {
2951         volatile union ixgbe_adv_rx_desc *rxdp;
2952         struct ixgbe_rx_queue *rxq = rx_queue;
2953         uint32_t desc;
2954
2955         if (unlikely(offset >= rxq->nb_rx_desc))
2956                 return 0;
2957         desc = rxq->rx_tail + offset;
2958         if (desc >= rxq->nb_rx_desc)
2959                 desc -= rxq->nb_rx_desc;
2960
2961         rxdp = &rxq->rx_ring[desc];
2962         return !!(rxdp->wb.upper.status_error &
2963                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2964 }
2965
2966 int
2967 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
2968 {
2969         struct ixgbe_rx_queue *rxq = rx_queue;
2970         volatile uint32_t *status;
2971         uint32_t nb_hold, desc;
2972
2973         if (unlikely(offset >= rxq->nb_rx_desc))
2974                 return -EINVAL;
2975
2976 #ifdef RTE_IXGBE_INC_VECTOR
2977         if (rxq->rx_using_sse)
2978                 nb_hold = rxq->rxrearm_nb;
2979         else
2980 #endif
2981                 nb_hold = rxq->nb_rx_hold;
2982         if (offset >= rxq->nb_rx_desc - nb_hold)
2983                 return RTE_ETH_RX_DESC_UNAVAIL;
2984
2985         desc = rxq->rx_tail + offset;
2986         if (desc >= rxq->nb_rx_desc)
2987                 desc -= rxq->nb_rx_desc;
2988
2989         status = &rxq->rx_ring[desc].wb.upper.status_error;
2990         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
2991                 return RTE_ETH_RX_DESC_DONE;
2992
2993         return RTE_ETH_RX_DESC_AVAIL;
2994 }
2995
2996 int
2997 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
2998 {
2999         struct ixgbe_tx_queue *txq = tx_queue;
3000         volatile uint32_t *status;
3001         uint32_t desc;
3002
3003         if (unlikely(offset >= txq->nb_tx_desc))
3004                 return -EINVAL;
3005
3006         desc = txq->tx_tail + offset;
3007         /* go to next desc that has the RS bit */
3008         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3009                 txq->tx_rs_thresh;
3010         if (desc >= txq->nb_tx_desc) {
3011                 desc -= txq->nb_tx_desc;
3012                 if (desc >= txq->nb_tx_desc)
3013                         desc -= txq->nb_tx_desc;
3014         }
3015
3016         status = &txq->tx_ring[desc].wb.status;
3017         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3018                 return RTE_ETH_TX_DESC_DONE;
3019
3020         return RTE_ETH_TX_DESC_FULL;
3021 }
3022
3023 void __attribute__((cold))
3024 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3025 {
3026         unsigned i;
3027         struct ixgbe_adapter *adapter =
3028                 (struct ixgbe_adapter *)dev->data->dev_private;
3029
3030         PMD_INIT_FUNC_TRACE();
3031
3032         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3033                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3034
3035                 if (txq != NULL) {
3036                         txq->ops->release_mbufs(txq);
3037                         txq->ops->reset(txq);
3038                 }
3039         }
3040
3041         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3042                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3043
3044                 if (rxq != NULL) {
3045                         ixgbe_rx_queue_release_mbufs(rxq);
3046                         ixgbe_reset_rx_queue(adapter, rxq);
3047                 }
3048         }
3049 }
3050
3051 void
3052 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3053 {
3054         unsigned i;
3055
3056         PMD_INIT_FUNC_TRACE();
3057
3058         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3059                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3060                 dev->data->rx_queues[i] = NULL;
3061         }
3062         dev->data->nb_rx_queues = 0;
3063
3064         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3065                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3066                 dev->data->tx_queues[i] = NULL;
3067         }
3068         dev->data->nb_tx_queues = 0;
3069 }
3070
3071 /*********************************************************************
3072  *
3073  *  Device RX/TX init functions
3074  *
3075  **********************************************************************/
3076
3077 /**
3078  * Receive Side Scaling (RSS)
3079  * See section 7.1.2.8 in the following document:
3080  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3081  *
3082  * Principles:
3083  * The source and destination IP addresses of the IP header and the source
3084  * and destination ports of TCP/UDP headers, if any, of received packets are
3085  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3086  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3087  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3088  * RSS output index which is used as the RX queue index where to store the
3089  * received packets.
3090  * The following output is supplied in the RX write-back descriptor:
3091  *     - 32-bit result of the Microsoft RSS hash function,
3092  *     - 4-bit RSS type field.
3093  */
3094
3095 /*
3096  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3097  * Used as the default key.
3098  */
3099 static uint8_t rss_intel_key[40] = {
3100         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3101         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3102         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3103         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3104         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3105 };
3106
3107 static void
3108 ixgbe_rss_disable(struct rte_eth_dev *dev)
3109 {
3110         struct ixgbe_hw *hw;
3111         uint32_t mrqc;
3112         uint32_t mrqc_reg;
3113
3114         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3115         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3116         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3117         mrqc &= ~IXGBE_MRQC_RSSEN;
3118         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3119 }
3120
3121 static void
3122 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3123 {
3124         uint8_t  *hash_key;
3125         uint32_t mrqc;
3126         uint32_t rss_key;
3127         uint64_t rss_hf;
3128         uint16_t i;
3129         uint32_t mrqc_reg;
3130         uint32_t rssrk_reg;
3131
3132         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3133         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3134
3135         hash_key = rss_conf->rss_key;
3136         if (hash_key != NULL) {
3137                 /* Fill in RSS hash key */
3138                 for (i = 0; i < 10; i++) {
3139                         rss_key  = hash_key[(i * 4)];
3140                         rss_key |= hash_key[(i * 4) + 1] << 8;
3141                         rss_key |= hash_key[(i * 4) + 2] << 16;
3142                         rss_key |= hash_key[(i * 4) + 3] << 24;
3143                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3144                 }
3145         }
3146
3147         /* Set configured hashing protocols in MRQC register */
3148         rss_hf = rss_conf->rss_hf;
3149         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3150         if (rss_hf & ETH_RSS_IPV4)
3151                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3152         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3153                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3154         if (rss_hf & ETH_RSS_IPV6)
3155                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3156         if (rss_hf & ETH_RSS_IPV6_EX)
3157                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3158         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3159                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3160         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3161                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3162         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3163                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3164         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3165                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3166         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3167                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3168         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3169 }
3170
3171 int
3172 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3173                           struct rte_eth_rss_conf *rss_conf)
3174 {
3175         struct ixgbe_hw *hw;
3176         uint32_t mrqc;
3177         uint64_t rss_hf;
3178         uint32_t mrqc_reg;
3179
3180         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3181
3182         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3183                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3184                         "NIC.");
3185                 return -ENOTSUP;
3186         }
3187         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3188
3189         /*
3190          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3191          *     "RSS enabling cannot be done dynamically while it must be
3192          *      preceded by a software reset"
3193          * Before changing anything, first check that the update RSS operation
3194          * does not attempt to disable RSS, if RSS was enabled at
3195          * initialization time, or does not attempt to enable RSS, if RSS was
3196          * disabled at initialization time.
3197          */
3198         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3199         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3200         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3201                 if (rss_hf != 0) /* Enable RSS */
3202                         return -(EINVAL);
3203                 return 0; /* Nothing to do */
3204         }
3205         /* RSS enabled */
3206         if (rss_hf == 0) /* Disable RSS */
3207                 return -(EINVAL);
3208         ixgbe_hw_rss_hash_set(hw, rss_conf);
3209         return 0;
3210 }
3211
3212 int
3213 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3214                             struct rte_eth_rss_conf *rss_conf)
3215 {
3216         struct ixgbe_hw *hw;
3217         uint8_t *hash_key;
3218         uint32_t mrqc;
3219         uint32_t rss_key;
3220         uint64_t rss_hf;
3221         uint16_t i;
3222         uint32_t mrqc_reg;
3223         uint32_t rssrk_reg;
3224
3225         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3226         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3227         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3228         hash_key = rss_conf->rss_key;
3229         if (hash_key != NULL) {
3230                 /* Return RSS hash key */
3231                 for (i = 0; i < 10; i++) {
3232                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3233                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3234                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3235                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3236                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3237                 }
3238         }
3239
3240         /* Get RSS functions configured in MRQC register */
3241         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3242         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3243                 rss_conf->rss_hf = 0;
3244                 return 0;
3245         }
3246         rss_hf = 0;
3247         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3248                 rss_hf |= ETH_RSS_IPV4;
3249         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3250                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3251         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3252                 rss_hf |= ETH_RSS_IPV6;
3253         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3254                 rss_hf |= ETH_RSS_IPV6_EX;
3255         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3256                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3257         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3258                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3259         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3260                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3261         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3262                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3263         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3264                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3265         rss_conf->rss_hf = rss_hf;
3266         return 0;
3267 }
3268
3269 static void
3270 ixgbe_rss_configure(struct rte_eth_dev *dev)
3271 {
3272         struct rte_eth_rss_conf rss_conf;
3273         struct ixgbe_hw *hw;
3274         uint32_t reta;
3275         uint16_t i;
3276         uint16_t j;
3277         uint16_t sp_reta_size;
3278         uint32_t reta_reg;
3279
3280         PMD_INIT_FUNC_TRACE();
3281         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3282
3283         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3284
3285         /*
3286          * Fill in redirection table
3287          * The byte-swap is needed because NIC registers are in
3288          * little-endian order.
3289          */
3290         reta = 0;
3291         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3292                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3293
3294                 if (j == dev->data->nb_rx_queues)
3295                         j = 0;
3296                 reta = (reta << 8) | j;
3297                 if ((i & 3) == 3)
3298                         IXGBE_WRITE_REG(hw, reta_reg,
3299                                         rte_bswap32(reta));
3300         }
3301
3302         /*
3303          * Configure the RSS key and the RSS protocols used to compute
3304          * the RSS hash of input packets.
3305          */
3306         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3307         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3308                 ixgbe_rss_disable(dev);
3309                 return;
3310         }
3311         if (rss_conf.rss_key == NULL)
3312                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3313         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3314 }
3315
3316 #define NUM_VFTA_REGISTERS 128
3317 #define NIC_RX_BUFFER_SIZE 0x200
3318 #define X550_RX_BUFFER_SIZE 0x180
3319
3320 static void
3321 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3322 {
3323         struct rte_eth_vmdq_dcb_conf *cfg;
3324         struct ixgbe_hw *hw;
3325         enum rte_eth_nb_pools num_pools;
3326         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3327         uint16_t pbsize;
3328         uint8_t nb_tcs; /* number of traffic classes */
3329         int i;
3330
3331         PMD_INIT_FUNC_TRACE();
3332         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3333         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3334         num_pools = cfg->nb_queue_pools;
3335         /* Check we have a valid number of pools */
3336         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3337                 ixgbe_rss_disable(dev);
3338                 return;
3339         }
3340         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3341         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3342
3343         /*
3344          * RXPBSIZE
3345          * split rx buffer up into sections, each for 1 traffic class
3346          */
3347         switch (hw->mac.type) {
3348         case ixgbe_mac_X550:
3349         case ixgbe_mac_X550EM_x:
3350         case ixgbe_mac_X550EM_a:
3351                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3352                 break;
3353         default:
3354                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3355                 break;
3356         }
3357         for (i = 0; i < nb_tcs; i++) {
3358                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3359
3360                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3361                 /* clear 10 bits. */
3362                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3363                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3364         }
3365         /* zero alloc all unused TCs */
3366         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3367                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3368
3369                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3370                 /* clear 10 bits. */
3371                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3372         }
3373
3374         /* MRQC: enable vmdq and dcb */
3375         mrqc = (num_pools == ETH_16_POOLS) ?
3376                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3377         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3378
3379         /* PFVTCTL: turn on virtualisation and set the default pool */
3380         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3381         if (cfg->enable_default_pool) {
3382                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3383         } else {
3384                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3385         }
3386
3387         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3388
3389         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3390         queue_mapping = 0;
3391         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3392                 /*
3393                  * mapping is done with 3 bits per priority,
3394                  * so shift by i*3 each time
3395                  */
3396                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3397
3398         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3399
3400         /* RTRPCS: DCB related */
3401         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3402
3403         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3404         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3405         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3406         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3407
3408         /* VFTA - enable all vlan filters */
3409         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3410                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3411         }
3412
3413         /* VFRE: pool enabling for receive - 16 or 32 */
3414         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3415                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3416
3417         /*
3418          * MPSAR - allow pools to read specific mac addresses
3419          * In this case, all pools should be able to read from mac addr 0
3420          */
3421         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3422         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3423
3424         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3425         for (i = 0; i < cfg->nb_pool_maps; i++) {
3426                 /* set vlan id in VF register and set the valid bit */
3427                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3428                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3429                 /*
3430                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3431                  * pools, we only need to use the first half of the register
3432                  * i.e. bits 0-31
3433                  */
3434                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3435         }
3436 }
3437
3438 /**
3439  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3440  * @dev: pointer to eth_dev structure
3441  * @dcb_config: pointer to ixgbe_dcb_config structure
3442  */
3443 static void
3444 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3445                        struct ixgbe_dcb_config *dcb_config)
3446 {
3447         uint32_t reg;
3448         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3449
3450         PMD_INIT_FUNC_TRACE();
3451         if (hw->mac.type != ixgbe_mac_82598EB) {
3452                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3453                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3454                 reg |= IXGBE_RTTDCS_ARBDIS;
3455                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3456
3457                 /* Enable DCB for Tx with 8 TCs */
3458                 if (dcb_config->num_tcs.pg_tcs == 8) {
3459                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3460                 } else {
3461                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3462                 }
3463                 if (dcb_config->vt_mode)
3464                         reg |= IXGBE_MTQC_VT_ENA;
3465                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3466
3467                 /* Enable the Tx desc arbiter */
3468                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3469                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3470                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3471
3472                 /* Enable Security TX Buffer IFG for DCB */
3473                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3474                 reg |= IXGBE_SECTX_DCB;
3475                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3476         }
3477 }
3478
3479 /**
3480  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3481  * @dev: pointer to rte_eth_dev structure
3482  * @dcb_config: pointer to ixgbe_dcb_config structure
3483  */
3484 static void
3485 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3486                         struct ixgbe_dcb_config *dcb_config)
3487 {
3488         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3489                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3490         struct ixgbe_hw *hw =
3491                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3492
3493         PMD_INIT_FUNC_TRACE();
3494         if (hw->mac.type != ixgbe_mac_82598EB)
3495                 /*PF VF Transmit Enable*/
3496                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3497                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3498
3499         /*Configure general DCB TX parameters*/
3500         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3501 }
3502
3503 static void
3504 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3505                         struct ixgbe_dcb_config *dcb_config)
3506 {
3507         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3508                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3509         struct ixgbe_dcb_tc_config *tc;
3510         uint8_t i, j;
3511
3512         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3513         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3514                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3515                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3516         } else {
3517                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3518                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3519         }
3520         /* User Priority to Traffic Class mapping */
3521         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3522                 j = vmdq_rx_conf->dcb_tc[i];
3523                 tc = &dcb_config->tc_config[j];
3524                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3525                                                 (uint8_t)(1 << j);
3526         }
3527 }
3528
3529 static void
3530 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3531                         struct ixgbe_dcb_config *dcb_config)
3532 {
3533         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3534                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3535         struct ixgbe_dcb_tc_config *tc;
3536         uint8_t i, j;
3537
3538         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3539         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3540                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3541                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3542         } else {
3543                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3544                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3545         }
3546
3547         /* User Priority to Traffic Class mapping */
3548         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3549                 j = vmdq_tx_conf->dcb_tc[i];
3550                 tc = &dcb_config->tc_config[j];
3551                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3552                                                 (uint8_t)(1 << j);
3553         }
3554 }
3555
3556 static void
3557 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3558                 struct ixgbe_dcb_config *dcb_config)
3559 {
3560         struct rte_eth_dcb_rx_conf *rx_conf =
3561                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3562         struct ixgbe_dcb_tc_config *tc;
3563         uint8_t i, j;
3564
3565         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3566         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3567
3568         /* User Priority to Traffic Class mapping */
3569         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3570                 j = rx_conf->dcb_tc[i];
3571                 tc = &dcb_config->tc_config[j];
3572                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3573                                                 (uint8_t)(1 << j);
3574         }
3575 }
3576
3577 static void
3578 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3579                 struct ixgbe_dcb_config *dcb_config)
3580 {
3581         struct rte_eth_dcb_tx_conf *tx_conf =
3582                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3583         struct ixgbe_dcb_tc_config *tc;
3584         uint8_t i, j;
3585
3586         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3587         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3588
3589         /* User Priority to Traffic Class mapping */
3590         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3591                 j = tx_conf->dcb_tc[i];
3592                 tc = &dcb_config->tc_config[j];
3593                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3594                                                 (uint8_t)(1 << j);
3595         }
3596 }
3597
3598 /**
3599  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3600  * @dev: pointer to eth_dev structure
3601  * @dcb_config: pointer to ixgbe_dcb_config structure
3602  */
3603 static void
3604 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3605                        struct ixgbe_dcb_config *dcb_config)
3606 {
3607         uint32_t reg;
3608         uint32_t vlanctrl;
3609         uint8_t i;
3610         uint32_t q;
3611         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3612
3613         PMD_INIT_FUNC_TRACE();
3614         /*
3615          * Disable the arbiter before changing parameters
3616          * (always enable recycle mode; WSP)
3617          */
3618         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3619         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3620
3621         if (hw->mac.type != ixgbe_mac_82598EB) {
3622                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3623                 if (dcb_config->num_tcs.pg_tcs == 4) {
3624                         if (dcb_config->vt_mode)
3625                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3626                                         IXGBE_MRQC_VMDQRT4TCEN;
3627                         else {
3628                                 /* no matter the mode is DCB or DCB_RSS, just
3629                                  * set the MRQE to RSSXTCEN. RSS is controlled
3630                                  * by RSS_FIELD
3631                                  */
3632                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3633                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3634                                         IXGBE_MRQC_RTRSS4TCEN;
3635                         }
3636                 }
3637                 if (dcb_config->num_tcs.pg_tcs == 8) {
3638                         if (dcb_config->vt_mode)
3639                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3640                                         IXGBE_MRQC_VMDQRT8TCEN;
3641                         else {
3642                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3643                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3644                                         IXGBE_MRQC_RTRSS8TCEN;
3645                         }
3646                 }
3647
3648                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3649
3650                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3651                         /* Disable drop for all queues in VMDQ mode*/
3652                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3653                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3654                                                 (IXGBE_QDE_WRITE |
3655                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3656                 } else {
3657                         /* Enable drop for all queues in SRIOV mode */
3658                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3659                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3660                                                 (IXGBE_QDE_WRITE |
3661                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3662                                                  IXGBE_QDE_ENABLE));
3663                 }
3664         }
3665
3666         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3667         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3668         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3669         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3670
3671         /* VFTA - enable all vlan filters */
3672         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3673                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3674         }
3675
3676         /*
3677          * Configure Rx packet plane (recycle mode; WSP) and
3678          * enable arbiter
3679          */
3680         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3681         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3682 }
3683
3684 static void
3685 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3686                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3687 {
3688         switch (hw->mac.type) {
3689         case ixgbe_mac_82598EB:
3690                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3691                 break;
3692         case ixgbe_mac_82599EB:
3693         case ixgbe_mac_X540:
3694         case ixgbe_mac_X550:
3695         case ixgbe_mac_X550EM_x:
3696         case ixgbe_mac_X550EM_a:
3697                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3698                                                   tsa, map);
3699                 break;
3700         default:
3701                 break;
3702         }
3703 }
3704
3705 static void
3706 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3707                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3708 {
3709         switch (hw->mac.type) {
3710         case ixgbe_mac_82598EB:
3711                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3712                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3713                 break;
3714         case ixgbe_mac_82599EB:
3715         case ixgbe_mac_X540:
3716         case ixgbe_mac_X550:
3717         case ixgbe_mac_X550EM_x:
3718         case ixgbe_mac_X550EM_a:
3719                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3720                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3721                 break;
3722         default:
3723                 break;
3724         }
3725 }
3726
3727 #define DCB_RX_CONFIG  1
3728 #define DCB_TX_CONFIG  1
3729 #define DCB_TX_PB      1024
3730 /**
3731  * ixgbe_dcb_hw_configure - Enable DCB and configure
3732  * general DCB in VT mode and non-VT mode parameters
3733  * @dev: pointer to rte_eth_dev structure
3734  * @dcb_config: pointer to ixgbe_dcb_config structure
3735  */
3736 static int
3737 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3738                         struct ixgbe_dcb_config *dcb_config)
3739 {
3740         int     ret = 0;
3741         uint8_t i, pfc_en, nb_tcs;
3742         uint16_t pbsize, rx_buffer_size;
3743         uint8_t config_dcb_rx = 0;
3744         uint8_t config_dcb_tx = 0;
3745         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3746         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3747         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3748         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3749         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3750         struct ixgbe_dcb_tc_config *tc;
3751         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3752         struct ixgbe_hw *hw =
3753                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3754         struct ixgbe_bw_conf *bw_conf =
3755                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
3756
3757         switch (dev->data->dev_conf.rxmode.mq_mode) {
3758         case ETH_MQ_RX_VMDQ_DCB:
3759                 dcb_config->vt_mode = true;
3760                 if (hw->mac.type != ixgbe_mac_82598EB) {
3761                         config_dcb_rx = DCB_RX_CONFIG;
3762                         /*
3763                          *get dcb and VT rx configuration parameters
3764                          *from rte_eth_conf
3765                          */
3766                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3767                         /*Configure general VMDQ and DCB RX parameters*/
3768                         ixgbe_vmdq_dcb_configure(dev);
3769                 }
3770                 break;
3771         case ETH_MQ_RX_DCB:
3772         case ETH_MQ_RX_DCB_RSS:
3773                 dcb_config->vt_mode = false;
3774                 config_dcb_rx = DCB_RX_CONFIG;
3775                 /* Get dcb TX configuration parameters from rte_eth_conf */
3776                 ixgbe_dcb_rx_config(dev, dcb_config);
3777                 /*Configure general DCB RX parameters*/
3778                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3779                 break;
3780         default:
3781                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3782                 break;
3783         }
3784         switch (dev->data->dev_conf.txmode.mq_mode) {
3785         case ETH_MQ_TX_VMDQ_DCB:
3786                 dcb_config->vt_mode = true;
3787                 config_dcb_tx = DCB_TX_CONFIG;
3788                 /* get DCB and VT TX configuration parameters
3789                  * from rte_eth_conf
3790                  */
3791                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3792                 /*Configure general VMDQ and DCB TX parameters*/
3793                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3794                 break;
3795
3796         case ETH_MQ_TX_DCB:
3797                 dcb_config->vt_mode = false;
3798                 config_dcb_tx = DCB_TX_CONFIG;
3799                 /*get DCB TX configuration parameters from rte_eth_conf*/
3800                 ixgbe_dcb_tx_config(dev, dcb_config);
3801                 /*Configure general DCB TX parameters*/
3802                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3803                 break;
3804         default:
3805                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3806                 break;
3807         }
3808
3809         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3810         /* Unpack map */
3811         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3812         if (nb_tcs == ETH_4_TCS) {
3813                 /* Avoid un-configured priority mapping to TC0 */
3814                 uint8_t j = 4;
3815                 uint8_t mask = 0xFF;
3816
3817                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3818                         mask = (uint8_t)(mask & (~(1 << map[i])));
3819                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3820                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3821                                 map[j++] = i;
3822                         mask >>= 1;
3823                 }
3824                 /* Re-configure 4 TCs BW */
3825                 for (i = 0; i < nb_tcs; i++) {
3826                         tc = &dcb_config->tc_config[i];
3827                         if (bw_conf->tc_num != nb_tcs)
3828                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3829                                         (uint8_t)(100 / nb_tcs);
3830                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3831                                                 (uint8_t)(100 / nb_tcs);
3832                 }
3833                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3834                         tc = &dcb_config->tc_config[i];
3835                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3836                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3837                 }
3838         } else {
3839                 /* Re-configure 8 TCs BW */
3840                 for (i = 0; i < nb_tcs; i++) {
3841                         tc = &dcb_config->tc_config[i];
3842                         if (bw_conf->tc_num != nb_tcs)
3843                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3844                                         (uint8_t)(100 / nb_tcs + (i & 1));
3845                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3846                                 (uint8_t)(100 / nb_tcs + (i & 1));
3847                 }
3848         }
3849
3850         switch (hw->mac.type) {
3851         case ixgbe_mac_X550:
3852         case ixgbe_mac_X550EM_x:
3853         case ixgbe_mac_X550EM_a:
3854                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3855                 break;
3856         default:
3857                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3858                 break;
3859         }
3860
3861         if (config_dcb_rx) {
3862                 /* Set RX buffer size */
3863                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3864                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3865
3866                 for (i = 0; i < nb_tcs; i++) {
3867                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3868                 }
3869                 /* zero alloc all unused TCs */
3870                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3871                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3872                 }
3873         }
3874         if (config_dcb_tx) {
3875                 /* Only support an equally distributed
3876                  *  Tx packet buffer strategy.
3877                  */
3878                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3879                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3880
3881                 for (i = 0; i < nb_tcs; i++) {
3882                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3883                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3884                 }
3885                 /* Clear unused TCs, if any, to zero buffer size*/
3886                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3887                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3888                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3889                 }
3890         }
3891
3892         /*Calculates traffic class credits*/
3893         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3894                                 IXGBE_DCB_TX_CONFIG);
3895         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3896                                 IXGBE_DCB_RX_CONFIG);
3897
3898         if (config_dcb_rx) {
3899                 /* Unpack CEE standard containers */
3900                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3901                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3902                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3903                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3904                 /* Configure PG(ETS) RX */
3905                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3906         }
3907
3908         if (config_dcb_tx) {
3909                 /* Unpack CEE standard containers */
3910                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3911                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3912                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3913                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3914                 /* Configure PG(ETS) TX */
3915                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3916         }
3917
3918         /*Configure queue statistics registers*/
3919         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3920
3921         /* Check if the PFC is supported */
3922         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3923                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3924                 for (i = 0; i < nb_tcs; i++) {
3925                         /*
3926                         * If the TC count is 8,and the default high_water is 48,
3927                         * the low_water is 16 as default.
3928                         */
3929                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3930                         hw->fc.low_water[i] = pbsize / 4;
3931                         /* Enable pfc for this TC */
3932                         tc = &dcb_config->tc_config[i];
3933                         tc->pfc = ixgbe_dcb_pfc_enabled;
3934                 }
3935                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3936                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3937                         pfc_en &= 0x0F;
3938                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3939         }
3940
3941         return ret;
3942 }
3943
3944 /**
3945  * ixgbe_configure_dcb - Configure DCB  Hardware
3946  * @dev: pointer to rte_eth_dev
3947  */
3948 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3949 {
3950         struct ixgbe_dcb_config *dcb_cfg =
3951                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3952         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3953
3954         PMD_INIT_FUNC_TRACE();
3955
3956         /* check support mq_mode for DCB */
3957         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3958             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3959             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3960                 return;
3961
3962         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3963                 return;
3964
3965         /** Configure DCB hardware **/
3966         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3967 }
3968
3969 /*
3970  * VMDq only support for 10 GbE NIC.
3971  */
3972 static void
3973 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3974 {
3975         struct rte_eth_vmdq_rx_conf *cfg;
3976         struct ixgbe_hw *hw;
3977         enum rte_eth_nb_pools num_pools;
3978         uint32_t mrqc, vt_ctl, vlanctrl;
3979         uint32_t vmolr = 0;
3980         int i;
3981
3982         PMD_INIT_FUNC_TRACE();
3983         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3984         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3985         num_pools = cfg->nb_queue_pools;
3986
3987         ixgbe_rss_disable(dev);
3988
3989         /* MRQC: enable vmdq */
3990         mrqc = IXGBE_MRQC_VMDQEN;
3991         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3992
3993         /* PFVTCTL: turn on virtualisation and set the default pool */
3994         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3995         if (cfg->enable_default_pool)
3996                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3997         else
3998                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3999
4000         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4001
4002         for (i = 0; i < (int)num_pools; i++) {
4003                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4004                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4005         }
4006
4007         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4008         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4009         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4010         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4011
4012         /* VFTA - enable all vlan filters */
4013         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4014                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4015
4016         /* VFRE: pool enabling for receive - 64 */
4017         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4018         if (num_pools == ETH_64_POOLS)
4019                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4020
4021         /*
4022          * MPSAR - allow pools to read specific mac addresses
4023          * In this case, all pools should be able to read from mac addr 0
4024          */
4025         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4026         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4027
4028         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4029         for (i = 0; i < cfg->nb_pool_maps; i++) {
4030                 /* set vlan id in VF register and set the valid bit */
4031                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4032                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4033                 /*
4034                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4035                  * pools, we only need to use the first half of the register
4036                  * i.e. bits 0-31
4037                  */
4038                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4039                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4040                                         (cfg->pool_map[i].pools & UINT32_MAX));
4041                 else
4042                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4043                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4044
4045         }
4046
4047         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4048         if (cfg->enable_loop_back) {
4049                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4050                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4051                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4052         }
4053
4054         IXGBE_WRITE_FLUSH(hw);
4055 }
4056
4057 /*
4058  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4059  * @hw: pointer to hardware structure
4060  */
4061 static void
4062 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4063 {
4064         uint32_t reg;
4065         uint32_t q;
4066
4067         PMD_INIT_FUNC_TRACE();
4068         /*PF VF Transmit Enable*/
4069         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4070         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4071
4072         /* Disable the Tx desc arbiter so that MTQC can be changed */
4073         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4074         reg |= IXGBE_RTTDCS_ARBDIS;
4075         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4076
4077         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4078         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4079
4080         /* Disable drop for all queues */
4081         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4082                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4083                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4084
4085         /* Enable the Tx desc arbiter */
4086         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4087         reg &= ~IXGBE_RTTDCS_ARBDIS;
4088         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4089
4090         IXGBE_WRITE_FLUSH(hw);
4091 }
4092
4093 static int __attribute__((cold))
4094 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4095 {
4096         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4097         uint64_t dma_addr;
4098         unsigned int i;
4099
4100         /* Initialize software ring entries */
4101         for (i = 0; i < rxq->nb_rx_desc; i++) {
4102                 volatile union ixgbe_adv_rx_desc *rxd;
4103                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4104
4105                 if (mbuf == NULL) {
4106                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4107                                      (unsigned) rxq->queue_id);
4108                         return -ENOMEM;
4109                 }
4110
4111                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4112                 mbuf->port = rxq->port_id;
4113
4114                 dma_addr =
4115                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4116                 rxd = &rxq->rx_ring[i];
4117                 rxd->read.hdr_addr = 0;
4118                 rxd->read.pkt_addr = dma_addr;
4119                 rxe[i].mbuf = mbuf;
4120         }
4121
4122         return 0;
4123 }
4124
4125 static int
4126 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4127 {
4128         struct ixgbe_hw *hw;
4129         uint32_t mrqc;
4130
4131         ixgbe_rss_configure(dev);
4132
4133         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4134
4135         /* MRQC: enable VF RSS */
4136         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4137         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4138         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4139         case ETH_64_POOLS:
4140                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4141                 break;
4142
4143         case ETH_32_POOLS:
4144                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4145                 break;
4146
4147         default:
4148                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4149                 return -EINVAL;
4150         }
4151
4152         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4153
4154         return 0;
4155 }
4156
4157 static int
4158 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4159 {
4160         struct ixgbe_hw *hw =
4161                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4162
4163         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4164         case ETH_64_POOLS:
4165                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4166                         IXGBE_MRQC_VMDQEN);
4167                 break;
4168
4169         case ETH_32_POOLS:
4170                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4171                         IXGBE_MRQC_VMDQRT4TCEN);
4172                 break;
4173
4174         case ETH_16_POOLS:
4175                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4176                         IXGBE_MRQC_VMDQRT8TCEN);
4177                 break;
4178         default:
4179                 PMD_INIT_LOG(ERR,
4180                         "invalid pool number in IOV mode");
4181                 break;
4182         }
4183         return 0;
4184 }
4185
4186 static int
4187 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4188 {
4189         struct ixgbe_hw *hw =
4190                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4191
4192         if (hw->mac.type == ixgbe_mac_82598EB)
4193                 return 0;
4194
4195         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4196                 /*
4197                  * SRIOV inactive scheme
4198                  * any DCB/RSS w/o VMDq multi-queue setting
4199                  */
4200                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4201                 case ETH_MQ_RX_RSS:
4202                 case ETH_MQ_RX_DCB_RSS:
4203                 case ETH_MQ_RX_VMDQ_RSS:
4204                         ixgbe_rss_configure(dev);
4205                         break;
4206
4207                 case ETH_MQ_RX_VMDQ_DCB:
4208                         ixgbe_vmdq_dcb_configure(dev);
4209                         break;
4210
4211                 case ETH_MQ_RX_VMDQ_ONLY:
4212                         ixgbe_vmdq_rx_hw_configure(dev);
4213                         break;
4214
4215                 case ETH_MQ_RX_NONE:
4216                 default:
4217                         /* if mq_mode is none, disable rss mode.*/
4218                         ixgbe_rss_disable(dev);
4219                         break;
4220                 }
4221         } else {
4222                 /* SRIOV active scheme
4223                  * Support RSS together with SRIOV.
4224                  */
4225                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4226                 case ETH_MQ_RX_RSS:
4227                 case ETH_MQ_RX_VMDQ_RSS:
4228                         ixgbe_config_vf_rss(dev);
4229                         break;
4230                 case ETH_MQ_RX_VMDQ_DCB:
4231                 case ETH_MQ_RX_DCB:
4232                 /* In SRIOV, the configuration is the same as VMDq case */
4233                         ixgbe_vmdq_dcb_configure(dev);
4234                         break;
4235                 /* DCB/RSS together with SRIOV is not supported */
4236                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4237                 case ETH_MQ_RX_DCB_RSS:
4238                         PMD_INIT_LOG(ERR,
4239                                 "Could not support DCB/RSS with VMDq & SRIOV");
4240                         return -1;
4241                 default:
4242                         ixgbe_config_vf_default(dev);
4243                         break;
4244                 }
4245         }
4246
4247         return 0;
4248 }
4249
4250 static int
4251 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4252 {
4253         struct ixgbe_hw *hw =
4254                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4255         uint32_t mtqc;
4256         uint32_t rttdcs;
4257
4258         if (hw->mac.type == ixgbe_mac_82598EB)
4259                 return 0;
4260
4261         /* disable arbiter before setting MTQC */
4262         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4263         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4264         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4265
4266         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4267                 /*
4268                  * SRIOV inactive scheme
4269                  * any DCB w/o VMDq multi-queue setting
4270                  */
4271                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4272                         ixgbe_vmdq_tx_hw_configure(hw);
4273                 else {
4274                         mtqc = IXGBE_MTQC_64Q_1PB;
4275                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4276                 }
4277         } else {
4278                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4279
4280                 /*
4281                  * SRIOV active scheme
4282                  * FIXME if support DCB together with VMDq & SRIOV
4283                  */
4284                 case ETH_64_POOLS:
4285                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4286                         break;
4287                 case ETH_32_POOLS:
4288                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4289                         break;
4290                 case ETH_16_POOLS:
4291                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4292                                 IXGBE_MTQC_8TC_8TQ;
4293                         break;
4294                 default:
4295                         mtqc = IXGBE_MTQC_64Q_1PB;
4296                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4297                 }
4298                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4299         }
4300
4301         /* re-enable arbiter */
4302         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4303         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4304
4305         return 0;
4306 }
4307
4308 /**
4309  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4310  *
4311  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4312  * spec rev. 3.0 chapter 8.2.3.8.13.
4313  *
4314  * @pool Memory pool of the Rx queue
4315  */
4316 static inline uint32_t
4317 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4318 {
4319         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4320
4321         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4322         uint16_t maxdesc =
4323                 IPV4_MAX_PKT_LEN /
4324                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4325
4326         if (maxdesc >= 16)
4327                 return IXGBE_RSCCTL_MAXDESC_16;
4328         else if (maxdesc >= 8)
4329                 return IXGBE_RSCCTL_MAXDESC_8;
4330         else if (maxdesc >= 4)
4331                 return IXGBE_RSCCTL_MAXDESC_4;
4332         else
4333                 return IXGBE_RSCCTL_MAXDESC_1;
4334 }
4335
4336 /**
4337  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4338  * interrupt
4339  *
4340  * (Taken from FreeBSD tree)
4341  * (yes this is all very magic and confusing :)
4342  *
4343  * @dev port handle
4344  * @entry the register array entry
4345  * @vector the MSIX vector for this queue
4346  * @type RX/TX/MISC
4347  */
4348 static void
4349 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4350 {
4351         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4352         u32 ivar, index;
4353
4354         vector |= IXGBE_IVAR_ALLOC_VAL;
4355
4356         switch (hw->mac.type) {
4357
4358         case ixgbe_mac_82598EB:
4359                 if (type == -1)
4360                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4361                 else
4362                         entry += (type * 64);
4363                 index = (entry >> 2) & 0x1F;
4364                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4365                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4366                 ivar |= (vector << (8 * (entry & 0x3)));
4367                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4368                 break;
4369
4370         case ixgbe_mac_82599EB:
4371         case ixgbe_mac_X540:
4372                 if (type == -1) { /* MISC IVAR */
4373                         index = (entry & 1) * 8;
4374                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4375                         ivar &= ~(0xFF << index);
4376                         ivar |= (vector << index);
4377                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4378                 } else {        /* RX/TX IVARS */
4379                         index = (16 * (entry & 1)) + (8 * type);
4380                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4381                         ivar &= ~(0xFF << index);
4382                         ivar |= (vector << index);
4383                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4384                 }
4385
4386                 break;
4387
4388         default:
4389                 break;
4390         }
4391 }
4392
4393 void __attribute__((cold))
4394 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4395 {
4396         uint16_t i, rx_using_sse;
4397         struct ixgbe_adapter *adapter =
4398                 (struct ixgbe_adapter *)dev->data->dev_private;
4399
4400         /*
4401          * In order to allow Vector Rx there are a few configuration
4402          * conditions to be met and Rx Bulk Allocation should be allowed.
4403          */
4404         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4405             !adapter->rx_bulk_alloc_allowed) {
4406                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4407                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4408                                     "not enabled",
4409                              dev->data->port_id);
4410
4411                 adapter->rx_vec_allowed = false;
4412         }
4413
4414         /*
4415          * Initialize the appropriate LRO callback.
4416          *
4417          * If all queues satisfy the bulk allocation preconditions
4418          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4419          * Otherwise use a single allocation version.
4420          */
4421         if (dev->data->lro) {
4422                 if (adapter->rx_bulk_alloc_allowed) {
4423                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4424                                            "allocation version");
4425                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4426                 } else {
4427                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4428                                            "allocation version");
4429                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4430                 }
4431         } else if (dev->data->scattered_rx) {
4432                 /*
4433                  * Set the non-LRO scattered callback: there are Vector and
4434                  * single allocation versions.
4435                  */
4436                 if (adapter->rx_vec_allowed) {
4437                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4438                                             "callback (port=%d).",
4439                                      dev->data->port_id);
4440
4441                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4442                 } else if (adapter->rx_bulk_alloc_allowed) {
4443                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4444                                            "allocation callback (port=%d).",
4445                                      dev->data->port_id);
4446                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4447                 } else {
4448                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4449                                             "single allocation) "
4450                                             "Scattered Rx callback "
4451                                             "(port=%d).",
4452                                      dev->data->port_id);
4453
4454                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4455                 }
4456         /*
4457          * Below we set "simple" callbacks according to port/queues parameters.
4458          * If parameters allow we are going to choose between the following
4459          * callbacks:
4460          *    - Vector
4461          *    - Bulk Allocation
4462          *    - Single buffer allocation (the simplest one)
4463          */
4464         } else if (adapter->rx_vec_allowed) {
4465                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4466                                     "burst size no less than %d (port=%d).",
4467                              RTE_IXGBE_DESCS_PER_LOOP,
4468                              dev->data->port_id);
4469
4470                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4471         } else if (adapter->rx_bulk_alloc_allowed) {
4472                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4473                                     "satisfied. Rx Burst Bulk Alloc function "
4474                                     "will be used on port=%d.",
4475                              dev->data->port_id);
4476
4477                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4478         } else {
4479                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4480                                     "satisfied, or Scattered Rx is requested "
4481                                     "(port=%d).",
4482                              dev->data->port_id);
4483
4484                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4485         }
4486
4487         /* Propagate information about RX function choice through all queues. */
4488
4489         rx_using_sse =
4490                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4491                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4492
4493         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4494                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4495
4496                 rxq->rx_using_sse = rx_using_sse;
4497         }
4498 }
4499
4500 /**
4501  * ixgbe_set_rsc - configure RSC related port HW registers
4502  *
4503  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4504  * of 82599 Spec (x540 configuration is virtually the same).
4505  *
4506  * @dev port handle
4507  *
4508  * Returns 0 in case of success or a non-zero error code
4509  */
4510 static int
4511 ixgbe_set_rsc(struct rte_eth_dev *dev)
4512 {
4513         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4514         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4515         struct rte_eth_dev_info dev_info = { 0 };
4516         bool rsc_capable = false;
4517         uint16_t i;
4518         uint32_t rdrxctl;
4519         uint32_t rfctl;
4520
4521         /* Sanity check */
4522         dev->dev_ops->dev_infos_get(dev, &dev_info);
4523         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4524                 rsc_capable = true;
4525
4526         if (!rsc_capable && rx_conf->enable_lro) {
4527                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4528                                    "support it");
4529                 return -EINVAL;
4530         }
4531
4532         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4533
4534         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4535                 /*
4536                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4537                  * 3.0 RSC configuration requires HW CRC stripping being
4538                  * enabled. If user requested both HW CRC stripping off
4539                  * and RSC on - return an error.
4540                  */
4541                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4542                                     "is disabled");
4543                 return -EINVAL;
4544         }
4545
4546         /* RFCTL configuration  */
4547         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4548         if ((rsc_capable) && (rx_conf->enable_lro))
4549                 /*
4550                  * Since NFS packets coalescing is not supported - clear
4551                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4552                  * enabled.
4553                  */
4554                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4555                            IXGBE_RFCTL_NFSR_DIS);
4556         else
4557                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4558         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4559
4560         /* If LRO hasn't been requested - we are done here. */
4561         if (!rx_conf->enable_lro)
4562                 return 0;
4563
4564         /* Set RDRXCTL.RSCACKC bit */
4565         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4566         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4567         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4568
4569         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4570         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4571                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4572                 uint32_t srrctl =
4573                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4574                 uint32_t rscctl =
4575                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4576                 uint32_t psrtype =
4577                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4578                 uint32_t eitr =
4579                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4580
4581                 /*
4582                  * ixgbe PMD doesn't support header-split at the moment.
4583                  *
4584                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4585                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4586                  * should be configured even if header split is not
4587                  * enabled. We will configure it 128 bytes following the
4588                  * recommendation in the spec.
4589                  */
4590                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4591                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4592                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4593
4594                 /*
4595                  * TODO: Consider setting the Receive Descriptor Minimum
4596                  * Threshold Size for an RSC case. This is not an obviously
4597                  * beneficiary option but the one worth considering...
4598                  */
4599
4600                 rscctl |= IXGBE_RSCCTL_RSCEN;
4601                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4602                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4603
4604                 /*
4605                  * RSC: Set ITR interval corresponding to 2K ints/s.
4606                  *
4607                  * Full-sized RSC aggregations for a 10Gb/s link will
4608                  * arrive at about 20K aggregation/s rate.
4609                  *
4610                  * 2K inst/s rate will make only 10% of the
4611                  * aggregations to be closed due to the interrupt timer
4612                  * expiration for a streaming at wire-speed case.
4613                  *
4614                  * For a sparse streaming case this setting will yield
4615                  * at most 500us latency for a single RSC aggregation.
4616                  */
4617                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4618                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4619
4620                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4621                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4622                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4623                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4624
4625                 /*
4626                  * RSC requires the mapping of the queue to the
4627                  * interrupt vector.
4628                  */
4629                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4630         }
4631
4632         dev->data->lro = 1;
4633
4634         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4635
4636         return 0;
4637 }
4638
4639 /*
4640  * Initializes Receive Unit.
4641  */
4642 int __attribute__((cold))
4643 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4644 {
4645         struct ixgbe_hw     *hw;
4646         struct ixgbe_rx_queue *rxq;
4647         uint64_t bus_addr;
4648         uint32_t rxctrl;
4649         uint32_t fctrl;
4650         uint32_t hlreg0;
4651         uint32_t maxfrs;
4652         uint32_t srrctl;
4653         uint32_t rdrxctl;
4654         uint32_t rxcsum;
4655         uint16_t buf_size;
4656         uint16_t i;
4657         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4658         int rc;
4659
4660         PMD_INIT_FUNC_TRACE();
4661         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4662
4663         /*
4664          * Make sure receives are disabled while setting
4665          * up the RX context (registers, descriptor rings, etc.).
4666          */
4667         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4668         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4669
4670         /* Enable receipt of broadcasted frames */
4671         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4672         fctrl |= IXGBE_FCTRL_BAM;
4673         fctrl |= IXGBE_FCTRL_DPF;
4674         fctrl |= IXGBE_FCTRL_PMCF;
4675         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4676
4677         /*
4678          * Configure CRC stripping, if any.
4679          */
4680         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4681         if (rx_conf->hw_strip_crc)
4682                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4683         else
4684                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4685
4686         /*
4687          * Configure jumbo frame support, if any.
4688          */
4689         if (rx_conf->jumbo_frame == 1) {
4690                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4691                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4692                 maxfrs &= 0x0000FFFF;
4693                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4694                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4695         } else
4696                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4697
4698         /*
4699          * If loopback mode is configured for 82599, set LPBK bit.
4700          */
4701         if (hw->mac.type == ixgbe_mac_82599EB &&
4702                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4703                 hlreg0 |= IXGBE_HLREG0_LPBK;
4704         else
4705                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4706
4707         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4708
4709         /* Setup RX queues */
4710         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4711                 rxq = dev->data->rx_queues[i];
4712
4713                 /*
4714                  * Reset crc_len in case it was changed after queue setup by a
4715                  * call to configure.
4716                  */
4717                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4718
4719                 /* Setup the Base and Length of the Rx Descriptor Rings */
4720                 bus_addr = rxq->rx_ring_phys_addr;
4721                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4722                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4723                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4724                                 (uint32_t)(bus_addr >> 32));
4725                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4726                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4727                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4728                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4729
4730                 /* Configure the SRRCTL register */
4731 #ifdef RTE_HEADER_SPLIT_ENABLE
4732                 /*
4733                  * Configure Header Split
4734                  */
4735                 if (rx_conf->header_split) {
4736                         if (hw->mac.type == ixgbe_mac_82599EB) {
4737                                 /* Must setup the PSRTYPE register */
4738                                 uint32_t psrtype;
4739
4740                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4741                                         IXGBE_PSRTYPE_UDPHDR   |
4742                                         IXGBE_PSRTYPE_IPV4HDR  |
4743                                         IXGBE_PSRTYPE_IPV6HDR;
4744                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4745                         }
4746                         srrctl = ((rx_conf->split_hdr_size <<
4747                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4748                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4749                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4750                 } else
4751 #endif
4752                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4753
4754                 /* Set if packets are dropped when no descriptors available */
4755                 if (rxq->drop_en)
4756                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4757
4758                 /*
4759                  * Configure the RX buffer size in the BSIZEPACKET field of
4760                  * the SRRCTL register of the queue.
4761                  * The value is in 1 KB resolution. Valid values can be from
4762                  * 1 KB to 16 KB.
4763                  */
4764                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4765                         RTE_PKTMBUF_HEADROOM);
4766                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4767                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4768
4769                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4770
4771                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4772                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4773
4774                 /* It adds dual VLAN length for supporting dual VLAN */
4775                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4776                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4777                         dev->data->scattered_rx = 1;
4778         }
4779
4780         if (rx_conf->enable_scatter)
4781                 dev->data->scattered_rx = 1;
4782
4783         /*
4784          * Device configured with multiple RX queues.
4785          */
4786         ixgbe_dev_mq_rx_configure(dev);
4787
4788         /*
4789          * Setup the Checksum Register.
4790          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4791          * Enable IP/L4 checkum computation by hardware if requested to do so.
4792          */
4793         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4794         rxcsum |= IXGBE_RXCSUM_PCSD;
4795         if (rx_conf->hw_ip_checksum)
4796                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4797         else
4798                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4799
4800         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4801
4802         if (hw->mac.type == ixgbe_mac_82599EB ||
4803             hw->mac.type == ixgbe_mac_X540) {
4804                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4805                 if (rx_conf->hw_strip_crc)
4806                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4807                 else
4808                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4809                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4810                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4811         }
4812
4813         rc = ixgbe_set_rsc(dev);
4814         if (rc)
4815                 return rc;
4816
4817         ixgbe_set_rx_function(dev);
4818
4819         return 0;
4820 }
4821
4822 /*
4823  * Initializes Transmit Unit.
4824  */
4825 void __attribute__((cold))
4826 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4827 {
4828         struct ixgbe_hw     *hw;
4829         struct ixgbe_tx_queue *txq;
4830         uint64_t bus_addr;
4831         uint32_t hlreg0;
4832         uint32_t txctrl;
4833         uint16_t i;
4834
4835         PMD_INIT_FUNC_TRACE();
4836         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4837
4838         /* Enable TX CRC (checksum offload requirement) and hw padding
4839          * (TSO requirement)
4840          */
4841         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4842         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4843         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4844
4845         /* Setup the Base and Length of the Tx Descriptor Rings */
4846         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4847                 txq = dev->data->tx_queues[i];
4848
4849                 bus_addr = txq->tx_ring_phys_addr;
4850                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4851                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4852                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4853                                 (uint32_t)(bus_addr >> 32));
4854                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4855                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4856                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4857                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4858                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4859
4860                 /*
4861                  * Disable Tx Head Writeback RO bit, since this hoses
4862                  * bookkeeping if things aren't delivered in order.
4863                  */
4864                 switch (hw->mac.type) {
4865                 case ixgbe_mac_82598EB:
4866                         txctrl = IXGBE_READ_REG(hw,
4867                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4868                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4869                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4870                                         txctrl);
4871                         break;
4872
4873                 case ixgbe_mac_82599EB:
4874                 case ixgbe_mac_X540:
4875                 case ixgbe_mac_X550:
4876                 case ixgbe_mac_X550EM_x:
4877                 case ixgbe_mac_X550EM_a:
4878                 default:
4879                         txctrl = IXGBE_READ_REG(hw,
4880                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4881                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4882                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4883                                         txctrl);
4884                         break;
4885                 }
4886         }
4887
4888         /* Device configured with multiple TX queues. */
4889         ixgbe_dev_mq_tx_configure(dev);
4890 }
4891
4892 /*
4893  * Set up link for 82599 loopback mode Tx->Rx.
4894  */
4895 static inline void __attribute__((cold))
4896 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4897 {
4898         PMD_INIT_FUNC_TRACE();
4899
4900         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4901                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4902                                 IXGBE_SUCCESS) {
4903                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4904                         /* ignore error */
4905                         return;
4906                 }
4907         }
4908
4909         /* Restart link */
4910         IXGBE_WRITE_REG(hw,
4911                         IXGBE_AUTOC,
4912                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4913         ixgbe_reset_pipeline_82599(hw);
4914
4915         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4916         msec_delay(50);
4917 }
4918
4919
4920 /*
4921  * Start Transmit and Receive Units.
4922  */
4923 int __attribute__((cold))
4924 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4925 {
4926         struct ixgbe_hw     *hw;
4927         struct ixgbe_tx_queue *txq;
4928         struct ixgbe_rx_queue *rxq;
4929         uint32_t txdctl;
4930         uint32_t dmatxctl;
4931         uint32_t rxctrl;
4932         uint16_t i;
4933         int ret = 0;
4934
4935         PMD_INIT_FUNC_TRACE();
4936         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4937
4938         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4939                 txq = dev->data->tx_queues[i];
4940                 /* Setup Transmit Threshold Registers */
4941                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4942                 txdctl |= txq->pthresh & 0x7F;
4943                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4944                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4945                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4946         }
4947
4948         if (hw->mac.type != ixgbe_mac_82598EB) {
4949                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4950                 dmatxctl |= IXGBE_DMATXCTL_TE;
4951                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4952         }
4953
4954         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4955                 txq = dev->data->tx_queues[i];
4956                 if (!txq->tx_deferred_start) {
4957                         ret = ixgbe_dev_tx_queue_start(dev, i);
4958                         if (ret < 0)
4959                                 return ret;
4960                 }
4961         }
4962
4963         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4964                 rxq = dev->data->rx_queues[i];
4965                 if (!rxq->rx_deferred_start) {
4966                         ret = ixgbe_dev_rx_queue_start(dev, i);
4967                         if (ret < 0)
4968                                 return ret;
4969                 }
4970         }
4971
4972         /* Enable Receive engine */
4973         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4974         if (hw->mac.type == ixgbe_mac_82598EB)
4975                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4976         rxctrl |= IXGBE_RXCTRL_RXEN;
4977         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4978
4979         /* If loopback mode is enabled for 82599, set up the link accordingly */
4980         if (hw->mac.type == ixgbe_mac_82599EB &&
4981                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4982                 ixgbe_setup_loopback_link_82599(hw);
4983
4984         return 0;
4985 }
4986
4987 /*
4988  * Start Receive Units for specified queue.
4989  */
4990 int __attribute__((cold))
4991 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4992 {
4993         struct ixgbe_hw     *hw;
4994         struct ixgbe_rx_queue *rxq;
4995         uint32_t rxdctl;
4996         int poll_ms;
4997
4998         PMD_INIT_FUNC_TRACE();
4999         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5000
5001         if (rx_queue_id < dev->data->nb_rx_queues) {
5002                 rxq = dev->data->rx_queues[rx_queue_id];
5003
5004                 /* Allocate buffers for descriptor rings */
5005                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5006                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5007                                      rx_queue_id);
5008                         return -1;
5009                 }
5010                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5011                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5012                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5013
5014                 /* Wait until RX Enable ready */
5015                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5016                 do {
5017                         rte_delay_ms(1);
5018                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5019                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5020                 if (!poll_ms)
5021                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
5022                                      rx_queue_id);
5023                 rte_wmb();
5024                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5025                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5026                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5027         } else
5028                 return -1;
5029
5030         return 0;
5031 }
5032
5033 /*
5034  * Stop Receive Units for specified queue.
5035  */
5036 int __attribute__((cold))
5037 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5038 {
5039         struct ixgbe_hw     *hw;
5040         struct ixgbe_adapter *adapter =
5041                 (struct ixgbe_adapter *)dev->data->dev_private;
5042         struct ixgbe_rx_queue *rxq;
5043         uint32_t rxdctl;
5044         int poll_ms;
5045
5046         PMD_INIT_FUNC_TRACE();
5047         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5048
5049         if (rx_queue_id < dev->data->nb_rx_queues) {
5050                 rxq = dev->data->rx_queues[rx_queue_id];
5051
5052                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5053                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5054                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5055
5056                 /* Wait until RX Enable bit clear */
5057                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5058                 do {
5059                         rte_delay_ms(1);
5060                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5061                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5062                 if (!poll_ms)
5063                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
5064                                      rx_queue_id);
5065
5066                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5067
5068                 ixgbe_rx_queue_release_mbufs(rxq);
5069                 ixgbe_reset_rx_queue(adapter, rxq);
5070                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5071         } else
5072                 return -1;
5073
5074         return 0;
5075 }
5076
5077
5078 /*
5079  * Start Transmit Units for specified queue.
5080  */
5081 int __attribute__((cold))
5082 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5083 {
5084         struct ixgbe_hw     *hw;
5085         struct ixgbe_tx_queue *txq;
5086         uint32_t txdctl;
5087         int poll_ms;
5088
5089         PMD_INIT_FUNC_TRACE();
5090         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5091
5092         if (tx_queue_id < dev->data->nb_tx_queues) {
5093                 txq = dev->data->tx_queues[tx_queue_id];
5094                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5095                 txdctl |= IXGBE_TXDCTL_ENABLE;
5096                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5097
5098                 /* Wait until TX Enable ready */
5099                 if (hw->mac.type == ixgbe_mac_82599EB) {
5100                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5101                         do {
5102                                 rte_delay_ms(1);
5103                                 txdctl = IXGBE_READ_REG(hw,
5104                                         IXGBE_TXDCTL(txq->reg_idx));
5105                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5106                         if (!poll_ms)
5107                                 PMD_INIT_LOG(ERR, "Could not enable "
5108                                              "Tx Queue %d", tx_queue_id);
5109                 }
5110                 rte_wmb();
5111                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5112                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5113                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5114         } else
5115                 return -1;
5116
5117         return 0;
5118 }
5119
5120 /*
5121  * Stop Transmit Units for specified queue.
5122  */
5123 int __attribute__((cold))
5124 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5125 {
5126         struct ixgbe_hw     *hw;
5127         struct ixgbe_tx_queue *txq;
5128         uint32_t txdctl;
5129         uint32_t txtdh, txtdt;
5130         int poll_ms;
5131
5132         PMD_INIT_FUNC_TRACE();
5133         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5134
5135         if (tx_queue_id >= dev->data->nb_tx_queues)
5136                 return -1;
5137
5138         txq = dev->data->tx_queues[tx_queue_id];
5139
5140         /* Wait until TX queue is empty */
5141         if (hw->mac.type == ixgbe_mac_82599EB) {
5142                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5143                 do {
5144                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5145                         txtdh = IXGBE_READ_REG(hw,
5146                                                IXGBE_TDH(txq->reg_idx));
5147                         txtdt = IXGBE_READ_REG(hw,
5148                                                IXGBE_TDT(txq->reg_idx));
5149                 } while (--poll_ms && (txtdh != txtdt));
5150                 if (!poll_ms)
5151                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5152                                      "when stopping.", tx_queue_id);
5153         }
5154
5155         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5156         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5157         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5158
5159         /* Wait until TX Enable bit clear */
5160         if (hw->mac.type == ixgbe_mac_82599EB) {
5161                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5162                 do {
5163                         rte_delay_ms(1);
5164                         txdctl = IXGBE_READ_REG(hw,
5165                                                 IXGBE_TXDCTL(txq->reg_idx));
5166                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5167                 if (!poll_ms)
5168                         PMD_INIT_LOG(ERR, "Could not disable "
5169                                      "Tx Queue %d", tx_queue_id);
5170         }
5171
5172         if (txq->ops != NULL) {
5173                 txq->ops->release_mbufs(txq);
5174                 txq->ops->reset(txq);
5175         }
5176         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5177
5178         return 0;
5179 }
5180
5181 void
5182 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5183         struct rte_eth_rxq_info *qinfo)
5184 {
5185         struct ixgbe_rx_queue *rxq;
5186
5187         rxq = dev->data->rx_queues[queue_id];
5188
5189         qinfo->mp = rxq->mb_pool;
5190         qinfo->scattered_rx = dev->data->scattered_rx;
5191         qinfo->nb_desc = rxq->nb_rx_desc;
5192
5193         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5194         qinfo->conf.rx_drop_en = rxq->drop_en;
5195         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5196 }
5197
5198 void
5199 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5200         struct rte_eth_txq_info *qinfo)
5201 {
5202         struct ixgbe_tx_queue *txq;
5203
5204         txq = dev->data->tx_queues[queue_id];
5205
5206         qinfo->nb_desc = txq->nb_tx_desc;
5207
5208         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5209         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5210         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5211
5212         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5213         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5214         qinfo->conf.txq_flags = txq->txq_flags;
5215         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5216 }
5217
5218 /*
5219  * [VF] Initializes Receive Unit.
5220  */
5221 int __attribute__((cold))
5222 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5223 {
5224         struct ixgbe_hw     *hw;
5225         struct ixgbe_rx_queue *rxq;
5226         uint64_t bus_addr;
5227         uint32_t srrctl, psrtype = 0;
5228         uint16_t buf_size;
5229         uint16_t i;
5230         int ret;
5231
5232         PMD_INIT_FUNC_TRACE();
5233         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5234
5235         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5236                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5237                         "it should be power of 2");
5238                 return -1;
5239         }
5240
5241         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5242                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5243                         "it should be equal to or less than %d",
5244                         hw->mac.max_rx_queues);
5245                 return -1;
5246         }
5247
5248         /*
5249          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5250          * disables the VF receipt of packets if the PF MTU is > 1500.
5251          * This is done to deal with 82599 limitations that imposes
5252          * the PF and all VFs to share the same MTU.
5253          * Then, the PF driver enables again the VF receipt of packet when
5254          * the VF driver issues a IXGBE_VF_SET_LPE request.
5255          * In the meantime, the VF device cannot be used, even if the VF driver
5256          * and the Guest VM network stack are ready to accept packets with a
5257          * size up to the PF MTU.
5258          * As a work-around to this PF behaviour, force the call to
5259          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5260          * VF packets received can work in all cases.
5261          */
5262         ixgbevf_rlpml_set_vf(hw,
5263                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5264
5265         /* Setup RX queues */
5266         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5267                 rxq = dev->data->rx_queues[i];
5268
5269                 /* Allocate buffers for descriptor rings */
5270                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5271                 if (ret)
5272                         return ret;
5273
5274                 /* Setup the Base and Length of the Rx Descriptor Rings */
5275                 bus_addr = rxq->rx_ring_phys_addr;
5276
5277                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5278                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5279                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5280                                 (uint32_t)(bus_addr >> 32));
5281                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5282                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5283                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5284                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5285
5286
5287                 /* Configure the SRRCTL register */
5288 #ifdef RTE_HEADER_SPLIT_ENABLE
5289                 /*
5290                  * Configure Header Split
5291                  */
5292                 if (dev->data->dev_conf.rxmode.header_split) {
5293                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5294                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5295                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5296                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5297                 } else
5298 #endif
5299                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5300
5301                 /* Set if packets are dropped when no descriptors available */
5302                 if (rxq->drop_en)
5303                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5304
5305                 /*
5306                  * Configure the RX buffer size in the BSIZEPACKET field of
5307                  * the SRRCTL register of the queue.
5308                  * The value is in 1 KB resolution. Valid values can be from
5309                  * 1 KB to 16 KB.
5310                  */
5311                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5312                         RTE_PKTMBUF_HEADROOM);
5313                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5314                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5315
5316                 /*
5317                  * VF modification to write virtual function SRRCTL register
5318                  */
5319                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5320
5321                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5322                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5323
5324                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5325                     /* It adds dual VLAN length for supporting dual VLAN */
5326                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5327                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5328                         if (!dev->data->scattered_rx)
5329                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5330                         dev->data->scattered_rx = 1;
5331                 }
5332         }
5333
5334 #ifdef RTE_HEADER_SPLIT_ENABLE
5335         if (dev->data->dev_conf.rxmode.header_split)
5336                 /* Must setup the PSRTYPE register */
5337                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5338                         IXGBE_PSRTYPE_UDPHDR   |
5339                         IXGBE_PSRTYPE_IPV4HDR  |
5340                         IXGBE_PSRTYPE_IPV6HDR;
5341 #endif
5342
5343         /* Set RQPL for VF RSS according to max Rx queue */
5344         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5345                 IXGBE_PSRTYPE_RQPL_SHIFT;
5346         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5347
5348         ixgbe_set_rx_function(dev);
5349
5350         return 0;
5351 }
5352
5353 /*
5354  * [VF] Initializes Transmit Unit.
5355  */
5356 void __attribute__((cold))
5357 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5358 {
5359         struct ixgbe_hw     *hw;
5360         struct ixgbe_tx_queue *txq;
5361         uint64_t bus_addr;
5362         uint32_t txctrl;
5363         uint16_t i;
5364
5365         PMD_INIT_FUNC_TRACE();
5366         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5367
5368         /* Setup the Base and Length of the Tx Descriptor Rings */
5369         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5370                 txq = dev->data->tx_queues[i];
5371                 bus_addr = txq->tx_ring_phys_addr;
5372                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5373                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5374                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5375                                 (uint32_t)(bus_addr >> 32));
5376                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5377                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5378                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5379                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5380                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5381
5382                 /*
5383                  * Disable Tx Head Writeback RO bit, since this hoses
5384                  * bookkeeping if things aren't delivered in order.
5385                  */
5386                 txctrl = IXGBE_READ_REG(hw,
5387                                 IXGBE_VFDCA_TXCTRL(i));
5388                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5389                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5390                                 txctrl);
5391         }
5392 }
5393
5394 /*
5395  * [VF] Start Transmit and Receive Units.
5396  */
5397 void __attribute__((cold))
5398 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5399 {
5400         struct ixgbe_hw     *hw;
5401         struct ixgbe_tx_queue *txq;
5402         struct ixgbe_rx_queue *rxq;
5403         uint32_t txdctl;
5404         uint32_t rxdctl;
5405         uint16_t i;
5406         int poll_ms;
5407
5408         PMD_INIT_FUNC_TRACE();
5409         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5410
5411         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5412                 txq = dev->data->tx_queues[i];
5413                 /* Setup Transmit Threshold Registers */
5414                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5415                 txdctl |= txq->pthresh & 0x7F;
5416                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5417                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5418                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5419         }
5420
5421         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5422
5423                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5424                 txdctl |= IXGBE_TXDCTL_ENABLE;
5425                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5426
5427                 poll_ms = 10;
5428                 /* Wait until TX Enable ready */
5429                 do {
5430                         rte_delay_ms(1);
5431                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5432                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5433                 if (!poll_ms)
5434                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5435         }
5436         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5437
5438                 rxq = dev->data->rx_queues[i];
5439
5440                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5441                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5442                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5443
5444                 /* Wait until RX Enable ready */
5445                 poll_ms = 10;
5446                 do {
5447                         rte_delay_ms(1);
5448                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5449                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5450                 if (!poll_ms)
5451                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5452                 rte_wmb();
5453                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5454
5455         }
5456 }
5457
5458 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5459 int __attribute__((weak))
5460 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5461 {
5462         return -1;
5463 }
5464
5465 uint16_t __attribute__((weak))
5466 ixgbe_recv_pkts_vec(
5467         void __rte_unused *rx_queue,
5468         struct rte_mbuf __rte_unused **rx_pkts,
5469         uint16_t __rte_unused nb_pkts)
5470 {
5471         return 0;
5472 }
5473
5474 uint16_t __attribute__((weak))
5475 ixgbe_recv_scattered_pkts_vec(
5476         void __rte_unused *rx_queue,
5477         struct rte_mbuf __rte_unused **rx_pkts,
5478         uint16_t __rte_unused nb_pkts)
5479 {
5480         return 0;
5481 }
5482
5483 int __attribute__((weak))
5484 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5485 {
5486         return -1;
5487 }