New upstream version 16.11.9
[deb_dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73
74 #include "ixgbe_logs.h"
75 #include "base/ixgbe_api.h"
76 #include "base/ixgbe_vf.h"
77 #include "ixgbe_ethdev.h"
78 #include "base/ixgbe_dcb.h"
79 #include "base/ixgbe_common.h"
80 #include "ixgbe_rxtx.h"
81
82 /* Bit Mask to indicate what bits required for building TX context */
83 #define IXGBE_TX_OFFLOAD_MASK (                  \
84                 PKT_TX_OUTER_IPV6 |              \
85                 PKT_TX_OUTER_IPV4 |              \
86                 PKT_TX_IPV6 |                    \
87                 PKT_TX_IPV4 |                    \
88                 PKT_TX_VLAN_PKT |                \
89                 PKT_TX_IP_CKSUM |                \
90                 PKT_TX_L4_MASK |                 \
91                 PKT_TX_TCP_SEG |                 \
92                 PKT_TX_OUTER_IP_CKSUM)
93
94 #if 1
95 #define RTE_PMD_USE_PREFETCH
96 #endif
97
98 #ifdef RTE_PMD_USE_PREFETCH
99 /*
100  * Prefetch a cache line into all cache levels.
101  */
102 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
103 #else
104 #define rte_ixgbe_prefetch(p)   do {} while (0)
105 #endif
106
107 /*********************************************************************
108  *
109  *  TX functions
110  *
111  **********************************************************************/
112
113 /*
114  * Check for descriptors with their DD bit set and free mbufs.
115  * Return the total number of buffers freed.
116  */
117 static inline int __attribute__((always_inline))
118 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
119 {
120         struct ixgbe_tx_entry *txep;
121         uint32_t status;
122         int i, nb_free = 0;
123         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
124
125         /* check DD bit on threshold descriptor */
126         status = txq->tx_ring[txq->tx_next_dd].wb.status;
127         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
128                 return 0;
129
130         /*
131          * first buffer to free from S/W ring is at index
132          * tx_next_dd - (tx_rs_thresh-1)
133          */
134         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
135
136         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
137                 /* free buffers one at a time */
138                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
139                 txep->mbuf = NULL;
140
141                 if (unlikely(m == NULL))
142                         continue;
143
144                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
145                     (nb_free > 0 && m->pool != free[0]->pool)) {
146                         rte_mempool_put_bulk(free[0]->pool,
147                                              (void **)free, nb_free);
148                         nb_free = 0;
149                 }
150
151                 free[nb_free++] = m;
152         }
153
154         if (nb_free > 0)
155                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
156
157         /* buffers were freed, update counters */
158         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
159         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
160         if (txq->tx_next_dd >= txq->nb_tx_desc)
161                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
162
163         return txq->tx_rs_thresh;
164 }
165
166 /* Populate 4 descriptors with data from 4 mbufs */
167 static inline void
168 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
169 {
170         uint64_t buf_dma_addr;
171         uint32_t pkt_len;
172         int i;
173
174         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
175                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
176                 pkt_len = (*pkts)->data_len;
177
178                 /* write data to descriptor */
179                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
180
181                 txdp->read.cmd_type_len =
182                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
183
184                 txdp->read.olinfo_status =
185                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
186
187                 rte_prefetch0(&(*pkts)->pool);
188         }
189 }
190
191 /* Populate 1 descriptor with data from 1 mbuf */
192 static inline void
193 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
194 {
195         uint64_t buf_dma_addr;
196         uint32_t pkt_len;
197
198         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
199         pkt_len = (*pkts)->data_len;
200
201         /* write data to descriptor */
202         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
203         txdp->read.cmd_type_len =
204                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
205         txdp->read.olinfo_status =
206                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
207         rte_prefetch0(&(*pkts)->pool);
208 }
209
210 /*
211  * Fill H/W descriptor ring with mbuf data.
212  * Copy mbuf pointers to the S/W ring.
213  */
214 static inline void
215 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
216                       uint16_t nb_pkts)
217 {
218         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
219         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
220         const int N_PER_LOOP = 4;
221         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
222         int mainpart, leftover;
223         int i, j;
224
225         /*
226          * Process most of the packets in chunks of N pkts.  Any
227          * leftover packets will get processed one at a time.
228          */
229         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
230         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
231         for (i = 0; i < mainpart; i += N_PER_LOOP) {
232                 /* Copy N mbuf pointers to the S/W ring */
233                 for (j = 0; j < N_PER_LOOP; ++j) {
234                         (txep + i + j)->mbuf = *(pkts + i + j);
235                 }
236                 tx4(txdp + i, pkts + i);
237         }
238
239         if (unlikely(leftover > 0)) {
240                 for (i = 0; i < leftover; ++i) {
241                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
242                         tx1(txdp + mainpart + i, pkts + mainpart + i);
243                 }
244         }
245 }
246
247 static inline uint16_t
248 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
249              uint16_t nb_pkts)
250 {
251         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
252         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
253         uint16_t n = 0;
254
255         /*
256          * Begin scanning the H/W ring for done descriptors when the
257          * number of available descriptors drops below tx_free_thresh.  For
258          * each done descriptor, free the associated buffer.
259          */
260         if (txq->nb_tx_free < txq->tx_free_thresh)
261                 ixgbe_tx_free_bufs(txq);
262
263         /* Only use descriptors that are available */
264         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
265         if (unlikely(nb_pkts == 0))
266                 return 0;
267
268         /* Use exactly nb_pkts descriptors */
269         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
270
271         /*
272          * At this point, we know there are enough descriptors in the
273          * ring to transmit all the packets.  This assumes that each
274          * mbuf contains a single segment, and that no new offloads
275          * are expected, which would require a new context descriptor.
276          */
277
278         /*
279          * See if we're going to wrap-around. If so, handle the top
280          * of the descriptor ring first, then do the bottom.  If not,
281          * the processing looks just like the "bottom" part anyway...
282          */
283         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
284                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
285                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
286
287                 /*
288                  * We know that the last descriptor in the ring will need to
289                  * have its RS bit set because tx_rs_thresh has to be
290                  * a divisor of the ring size
291                  */
292                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
293                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
294                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
295
296                 txq->tx_tail = 0;
297         }
298
299         /* Fill H/W descriptor ring with mbuf data */
300         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
301         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
302
303         /*
304          * Determine if RS bit should be set
305          * This is what we actually want:
306          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
307          * but instead of subtracting 1 and doing >=, we can just do
308          * greater than without subtracting.
309          */
310         if (txq->tx_tail > txq->tx_next_rs) {
311                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
312                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
313                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
314                                                 txq->tx_rs_thresh);
315                 if (txq->tx_next_rs >= txq->nb_tx_desc)
316                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
317         }
318
319         /*
320          * Check for wrap-around. This would only happen if we used
321          * up to the last descriptor in the ring, no more, no less.
322          */
323         if (txq->tx_tail >= txq->nb_tx_desc)
324                 txq->tx_tail = 0;
325
326         /* update tail pointer */
327         rte_wmb();
328         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
329
330         return nb_pkts;
331 }
332
333 uint16_t
334 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
335                        uint16_t nb_pkts)
336 {
337         uint16_t nb_tx;
338
339         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
340         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
341                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
342
343         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
344         nb_tx = 0;
345         while (nb_pkts) {
346                 uint16_t ret, n;
347
348                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
349                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
350                 nb_tx = (uint16_t)(nb_tx + ret);
351                 nb_pkts = (uint16_t)(nb_pkts - ret);
352                 if (ret < n)
353                         break;
354         }
355
356         return nb_tx;
357 }
358
359 static inline void
360 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
361                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
362                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
363 {
364         uint32_t type_tucmd_mlhl;
365         uint32_t mss_l4len_idx = 0;
366         uint32_t ctx_idx;
367         uint32_t vlan_macip_lens;
368         union ixgbe_tx_offload tx_offload_mask;
369         uint32_t seqnum_seed = 0;
370
371         ctx_idx = txq->ctx_curr;
372         tx_offload_mask.data[0] = 0;
373         tx_offload_mask.data[1] = 0;
374         type_tucmd_mlhl = 0;
375
376         /* Specify which HW CTX to upload. */
377         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
378
379         if (ol_flags & PKT_TX_VLAN_PKT) {
380                 tx_offload_mask.vlan_tci |= ~0;
381         }
382
383         /* check if TCP segmentation required for this packet */
384         if (ol_flags & PKT_TX_TCP_SEG) {
385                 /* implies IP cksum in IPv4 */
386                 if (ol_flags & PKT_TX_IP_CKSUM)
387                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
388                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
390                 else
391                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
392                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
393                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
394
395                 tx_offload_mask.l2_len |= ~0;
396                 tx_offload_mask.l3_len |= ~0;
397                 tx_offload_mask.l4_len |= ~0;
398                 tx_offload_mask.tso_segsz |= ~0;
399                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
400                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
401         } else { /* no TSO, check if hardware checksum is needed */
402                 if (ol_flags & PKT_TX_IP_CKSUM) {
403                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
404                         tx_offload_mask.l2_len |= ~0;
405                         tx_offload_mask.l3_len |= ~0;
406                 }
407
408                 switch (ol_flags & PKT_TX_L4_MASK) {
409                 case PKT_TX_UDP_CKSUM:
410                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
411                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
412                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
413                         tx_offload_mask.l2_len |= ~0;
414                         tx_offload_mask.l3_len |= ~0;
415                         break;
416                 case PKT_TX_TCP_CKSUM:
417                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
418                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
419                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
420                         tx_offload_mask.l2_len |= ~0;
421                         tx_offload_mask.l3_len |= ~0;
422                         break;
423                 case PKT_TX_SCTP_CKSUM:
424                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
427                         tx_offload_mask.l2_len |= ~0;
428                         tx_offload_mask.l3_len |= ~0;
429                         break;
430                 default:
431                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
432                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
433                         break;
434                 }
435         }
436
437         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
438                 tx_offload_mask.outer_l2_len |= ~0;
439                 tx_offload_mask.outer_l3_len |= ~0;
440                 tx_offload_mask.l2_len |= ~0;
441                 seqnum_seed |= tx_offload.outer_l3_len
442                                << IXGBE_ADVTXD_OUTER_IPLEN;
443                 seqnum_seed |= tx_offload.l2_len
444                                << IXGBE_ADVTXD_TUNNEL_LEN;
445         }
446
447         txq->ctx_cache[ctx_idx].flags = ol_flags;
448         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
449                 tx_offload_mask.data[0] & tx_offload.data[0];
450         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
451                 tx_offload_mask.data[1] & tx_offload.data[1];
452         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
453
454         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
455         vlan_macip_lens = tx_offload.l3_len;
456         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
457                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
458                                     IXGBE_ADVTXD_MACLEN_SHIFT);
459         else
460                 vlan_macip_lens |= (tx_offload.l2_len <<
461                                     IXGBE_ADVTXD_MACLEN_SHIFT);
462         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
463         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
464         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
465         ctx_txd->seqnum_seed     = seqnum_seed;
466 }
467
468 /*
469  * Check which hardware context can be used. Use the existing match
470  * or create a new context descriptor.
471  */
472 static inline uint32_t
473 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
474                    union ixgbe_tx_offload tx_offload)
475 {
476         /* If match with the current used context */
477         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
478                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
479                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
480                      & tx_offload.data[0])) &&
481                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
482                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
483                      & tx_offload.data[1]))))
484                 return txq->ctx_curr;
485
486         /* What if match with the next context  */
487         txq->ctx_curr ^= 1;
488         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
489                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
490                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
491                      & tx_offload.data[0])) &&
492                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
493                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
494                      & tx_offload.data[1]))))
495                 return txq->ctx_curr;
496
497         /* Mismatch, use the previous context */
498         return IXGBE_CTX_NUM;
499 }
500
501 static inline uint32_t
502 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
503 {
504         uint32_t tmp = 0;
505
506         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
507                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
508         if (ol_flags & PKT_TX_IP_CKSUM)
509                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
510         if (ol_flags & PKT_TX_TCP_SEG)
511                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
512         return tmp;
513 }
514
515 static inline uint32_t
516 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
517 {
518         uint32_t cmdtype = 0;
519
520         if (ol_flags & PKT_TX_VLAN_PKT)
521                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
522         if (ol_flags & PKT_TX_TCP_SEG)
523                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
524         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
525                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
526         return cmdtype;
527 }
528
529 /* Default RS bit threshold values */
530 #ifndef DEFAULT_TX_RS_THRESH
531 #define DEFAULT_TX_RS_THRESH   32
532 #endif
533 #ifndef DEFAULT_TX_FREE_THRESH
534 #define DEFAULT_TX_FREE_THRESH 32
535 #endif
536
537 /* Reset transmit descriptors after they have been used */
538 static inline int
539 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
540 {
541         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
542         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
543         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
544         uint16_t nb_tx_desc = txq->nb_tx_desc;
545         uint16_t desc_to_clean_to;
546         uint16_t nb_tx_to_clean;
547         uint32_t status;
548
549         /* Determine the last descriptor needing to be cleaned */
550         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
551         if (desc_to_clean_to >= nb_tx_desc)
552                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
553
554         /* Check to make sure the last descriptor to clean is done */
555         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
556         status = txr[desc_to_clean_to].wb.status;
557         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
558                 PMD_TX_FREE_LOG(DEBUG,
559                                 "TX descriptor %4u is not done"
560                                 "(port=%d queue=%d)",
561                                 desc_to_clean_to,
562                                 txq->port_id, txq->queue_id);
563                 /* Failed to clean any descriptors, better luck next time */
564                 return -(1);
565         }
566
567         /* Figure out how many descriptors will be cleaned */
568         if (last_desc_cleaned > desc_to_clean_to)
569                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
570                                                         desc_to_clean_to);
571         else
572                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
573                                                 last_desc_cleaned);
574
575         PMD_TX_FREE_LOG(DEBUG,
576                         "Cleaning %4u TX descriptors: %4u to %4u "
577                         "(port=%d queue=%d)",
578                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
579                         txq->port_id, txq->queue_id);
580
581         /*
582          * The last descriptor to clean is done, so that means all the
583          * descriptors from the last descriptor that was cleaned
584          * up to the last descriptor with the RS bit set
585          * are done. Only reset the threshold descriptor.
586          */
587         txr[desc_to_clean_to].wb.status = 0;
588
589         /* Update the txq to reflect the last descriptor that was cleaned */
590         txq->last_desc_cleaned = desc_to_clean_to;
591         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
592
593         /* No Error */
594         return 0;
595 }
596
597 uint16_t
598 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
599                 uint16_t nb_pkts)
600 {
601         struct ixgbe_tx_queue *txq;
602         struct ixgbe_tx_entry *sw_ring;
603         struct ixgbe_tx_entry *txe, *txn;
604         volatile union ixgbe_adv_tx_desc *txr;
605         volatile union ixgbe_adv_tx_desc *txd, *txp;
606         struct rte_mbuf     *tx_pkt;
607         struct rte_mbuf     *m_seg;
608         uint64_t buf_dma_addr;
609         uint32_t olinfo_status;
610         uint32_t cmd_type_len;
611         uint32_t pkt_len;
612         uint16_t slen;
613         uint64_t ol_flags;
614         uint16_t tx_id;
615         uint16_t tx_last;
616         uint16_t nb_tx;
617         uint16_t nb_used;
618         uint64_t tx_ol_req;
619         uint32_t ctx = 0;
620         uint32_t new_ctx;
621         union ixgbe_tx_offload tx_offload;
622
623         tx_offload.data[0] = 0;
624         tx_offload.data[1] = 0;
625         txq = tx_queue;
626         sw_ring = txq->sw_ring;
627         txr     = txq->tx_ring;
628         tx_id   = txq->tx_tail;
629         txe = &sw_ring[tx_id];
630         txp = NULL;
631
632         /* Determine if the descriptor ring needs to be cleaned. */
633         if (txq->nb_tx_free < txq->tx_free_thresh)
634                 ixgbe_xmit_cleanup(txq);
635
636         rte_prefetch0(&txe->mbuf->pool);
637
638         /* TX loop */
639         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
640                 new_ctx = 0;
641                 tx_pkt = *tx_pkts++;
642                 pkt_len = tx_pkt->pkt_len;
643
644                 /*
645                  * Determine how many (if any) context descriptors
646                  * are needed for offload functionality.
647                  */
648                 ol_flags = tx_pkt->ol_flags;
649
650                 /* If hardware offload required */
651                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
652                 if (tx_ol_req) {
653                         tx_offload.l2_len = tx_pkt->l2_len;
654                         tx_offload.l3_len = tx_pkt->l3_len;
655                         tx_offload.l4_len = tx_pkt->l4_len;
656                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
657                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
658                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
659                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
660
661                         /* If new context need be built or reuse the exist ctx. */
662                         ctx = what_advctx_update(txq, tx_ol_req,
663                                 tx_offload);
664                         /* Only allocate context descriptor if required*/
665                         new_ctx = (ctx == IXGBE_CTX_NUM);
666                         ctx = txq->ctx_curr;
667                 }
668
669                 /*
670                  * Keep track of how many descriptors are used this loop
671                  * This will always be the number of segments + the number of
672                  * Context descriptors required to transmit the packet
673                  */
674                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
675
676                 if (txp != NULL &&
677                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
678                         /* set RS on the previous packet in the burst */
679                         txp->read.cmd_type_len |=
680                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
681
682                 /*
683                  * The number of descriptors that must be allocated for a
684                  * packet is the number of segments of that packet, plus 1
685                  * Context Descriptor for the hardware offload, if any.
686                  * Determine the last TX descriptor to allocate in the TX ring
687                  * for the packet, starting from the current position (tx_id)
688                  * in the ring.
689                  */
690                 tx_last = (uint16_t) (tx_id + nb_used - 1);
691
692                 /* Circular ring */
693                 if (tx_last >= txq->nb_tx_desc)
694                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
695
696                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
697                            " tx_first=%u tx_last=%u",
698                            (unsigned) txq->port_id,
699                            (unsigned) txq->queue_id,
700                            (unsigned) pkt_len,
701                            (unsigned) tx_id,
702                            (unsigned) tx_last);
703
704                 /*
705                  * Make sure there are enough TX descriptors available to
706                  * transmit the entire packet.
707                  * nb_used better be less than or equal to txq->tx_rs_thresh
708                  */
709                 if (nb_used > txq->nb_tx_free) {
710                         PMD_TX_FREE_LOG(DEBUG,
711                                         "Not enough free TX descriptors "
712                                         "nb_used=%4u nb_free=%4u "
713                                         "(port=%d queue=%d)",
714                                         nb_used, txq->nb_tx_free,
715                                         txq->port_id, txq->queue_id);
716
717                         if (ixgbe_xmit_cleanup(txq) != 0) {
718                                 /* Could not clean any descriptors */
719                                 if (nb_tx == 0)
720                                         return 0;
721                                 goto end_of_tx;
722                         }
723
724                         /* nb_used better be <= txq->tx_rs_thresh */
725                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
726                                 PMD_TX_FREE_LOG(DEBUG,
727                                         "The number of descriptors needed to "
728                                         "transmit the packet exceeds the "
729                                         "RS bit threshold. This will impact "
730                                         "performance."
731                                         "nb_used=%4u nb_free=%4u "
732                                         "tx_rs_thresh=%4u. "
733                                         "(port=%d queue=%d)",
734                                         nb_used, txq->nb_tx_free,
735                                         txq->tx_rs_thresh,
736                                         txq->port_id, txq->queue_id);
737                                 /*
738                                  * Loop here until there are enough TX
739                                  * descriptors or until the ring cannot be
740                                  * cleaned.
741                                  */
742                                 while (nb_used > txq->nb_tx_free) {
743                                         if (ixgbe_xmit_cleanup(txq) != 0) {
744                                                 /*
745                                                  * Could not clean any
746                                                  * descriptors
747                                                  */
748                                                 if (nb_tx == 0)
749                                                         return 0;
750                                                 goto end_of_tx;
751                                         }
752                                 }
753                         }
754                 }
755
756                 /*
757                  * By now there are enough free TX descriptors to transmit
758                  * the packet.
759                  */
760
761                 /*
762                  * Set common flags of all TX Data Descriptors.
763                  *
764                  * The following bits must be set in all Data Descriptors:
765                  *   - IXGBE_ADVTXD_DTYP_DATA
766                  *   - IXGBE_ADVTXD_DCMD_DEXT
767                  *
768                  * The following bits must be set in the first Data Descriptor
769                  * and are ignored in the other ones:
770                  *   - IXGBE_ADVTXD_DCMD_IFCS
771                  *   - IXGBE_ADVTXD_MAC_1588
772                  *   - IXGBE_ADVTXD_DCMD_VLE
773                  *
774                  * The following bits must only be set in the last Data
775                  * Descriptor:
776                  *   - IXGBE_TXD_CMD_EOP
777                  *
778                  * The following bits can be set in any Data Descriptor, but
779                  * are only set in the last Data Descriptor:
780                  *   - IXGBE_TXD_CMD_RS
781                  */
782                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
783                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
784
785 #ifdef RTE_LIBRTE_IEEE1588
786                 if (ol_flags & PKT_TX_IEEE1588_TMST)
787                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
788 #endif
789
790                 olinfo_status = 0;
791                 if (tx_ol_req) {
792
793                         if (ol_flags & PKT_TX_TCP_SEG) {
794                                 /* when TSO is on, paylen in descriptor is the
795                                  * not the packet len but the tcp payload len */
796                                 pkt_len -= (tx_offload.l2_len +
797                                         tx_offload.l3_len + tx_offload.l4_len);
798                         }
799
800                         /*
801                          * Setup the TX Advanced Context Descriptor if required
802                          */
803                         if (new_ctx) {
804                                 volatile struct ixgbe_adv_tx_context_desc *
805                                     ctx_txd;
806
807                                 ctx_txd = (volatile struct
808                                     ixgbe_adv_tx_context_desc *)
809                                     &txr[tx_id];
810
811                                 txn = &sw_ring[txe->next_id];
812                                 rte_prefetch0(&txn->mbuf->pool);
813
814                                 if (txe->mbuf != NULL) {
815                                         rte_pktmbuf_free_seg(txe->mbuf);
816                                         txe->mbuf = NULL;
817                                 }
818
819                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
820                                         tx_offload);
821
822                                 txe->last_id = tx_last;
823                                 tx_id = txe->next_id;
824                                 txe = txn;
825                         }
826
827                         /*
828                          * Setup the TX Advanced Data Descriptor,
829                          * This path will go through
830                          * whatever new/reuse the context descriptor
831                          */
832                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
833                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
834                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
835                 }
836
837                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
838
839                 m_seg = tx_pkt;
840                 do {
841                         txd = &txr[tx_id];
842                         txn = &sw_ring[txe->next_id];
843                         rte_prefetch0(&txn->mbuf->pool);
844
845                         if (txe->mbuf != NULL)
846                                 rte_pktmbuf_free_seg(txe->mbuf);
847                         txe->mbuf = m_seg;
848
849                         /*
850                          * Set up Transmit Data Descriptor.
851                          */
852                         slen = m_seg->data_len;
853                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
854                         txd->read.buffer_addr =
855                                 rte_cpu_to_le_64(buf_dma_addr);
856                         txd->read.cmd_type_len =
857                                 rte_cpu_to_le_32(cmd_type_len | slen);
858                         txd->read.olinfo_status =
859                                 rte_cpu_to_le_32(olinfo_status);
860                         txe->last_id = tx_last;
861                         tx_id = txe->next_id;
862                         txe = txn;
863                         m_seg = m_seg->next;
864                 } while (m_seg != NULL);
865
866                 /*
867                  * The last packet data descriptor needs End Of Packet (EOP)
868                  */
869                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
870                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
871                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
872
873                 /* Set RS bit only on threshold packets' last descriptor */
874                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
875                         PMD_TX_FREE_LOG(DEBUG,
876                                         "Setting RS bit on TXD id="
877                                         "%4u (port=%d queue=%d)",
878                                         tx_last, txq->port_id, txq->queue_id);
879
880                         cmd_type_len |= IXGBE_TXD_CMD_RS;
881
882                         /* Update txq RS bit counters */
883                         txq->nb_tx_used = 0;
884                         txp = NULL;
885                 } else
886                         txp = txd;
887
888                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
889         }
890
891 end_of_tx:
892         /* set RS on last packet in the burst */
893         if (txp != NULL)
894                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
895
896         rte_wmb();
897
898         /*
899          * Set the Transmit Descriptor Tail (TDT)
900          */
901         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
902                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
903                    (unsigned) tx_id, (unsigned) nb_tx);
904         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
905         txq->tx_tail = tx_id;
906
907         return nb_tx;
908 }
909
910 /*********************************************************************
911  *
912  *  RX functions
913  *
914  **********************************************************************/
915
916 #define IXGBE_PACKET_TYPE_ETHER                         0X00
917 #define IXGBE_PACKET_TYPE_IPV4                          0X01
918 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
919 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
920 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
921 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
922 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
923 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
924 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
925 #define IXGBE_PACKET_TYPE_IPV6                          0X04
926 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
927 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
928 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
929 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
930 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
931 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
932 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
933 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
934 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
935 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
936 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
937 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
938 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
939 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
940 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
941 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
942 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
943 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
944 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
945 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
946 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
947 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
948 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
949
950 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
951 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
952 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
953 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
954 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
955 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
956 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
957 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
958 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
959 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
960 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
961 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
962 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
963 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
964 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
965 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
966 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
967 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
968 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
969 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
970 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
971 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
972 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
973
974 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
975 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
976 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
977 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
978 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
979 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
980 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
981 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
982 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
983 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
984 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
985 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
986 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
987 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
988 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
989 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
990 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
991 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
992 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
993 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
994 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
995 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
996 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
997
998 #define IXGBE_PACKET_TYPE_MAX               0X80
999 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
1000 #define IXGBE_PACKET_TYPE_SHIFT             0X04
1001
1002 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1003 static inline uint32_t
1004 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1005 {
1006         /**
1007          * Use 2 different table for normal packet and tunnel packet
1008          * to save the space.
1009          */
1010         static const uint32_t
1011                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1012                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1013                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1014                         RTE_PTYPE_L3_IPV4,
1015                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1016                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1017                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1018                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1019                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1020                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1021                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1022                         RTE_PTYPE_L3_IPV4_EXT,
1023                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1024                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1025                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1026                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1027                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1028                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1029                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1030                         RTE_PTYPE_L3_IPV6,
1031                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1032                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1033                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1034                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1035                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1036                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1037                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1038                         RTE_PTYPE_L3_IPV6_EXT,
1039                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1040                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1041                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1042                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1043                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1044                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1045                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1046                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1047                         RTE_PTYPE_INNER_L3_IPV6,
1048                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1049                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1050                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1051                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1052                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1053                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1054                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1055                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1056                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1057                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1058                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1059                         RTE_PTYPE_INNER_L3_IPV6,
1060                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1061                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1062                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1063                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1064                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1065                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1066                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1067                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1068                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1069                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1070                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1071                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1072                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1073                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1074                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1075                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1076                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1077                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1078                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1079                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1080                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1081                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1082                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1083                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1084                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1085                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1086                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1087                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1089                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1090                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1091                         RTE_PTYPE_L2_ETHER |
1092                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1093                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1094         };
1095
1096         static const uint32_t
1097                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1098                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1099                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1100                         RTE_PTYPE_INNER_L2_ETHER,
1101                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1102                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1103                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1104                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1105                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1106                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1107                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1108                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1109                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1110                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1111                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1112                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1113                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1114                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1115                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1116                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1117                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1118                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1119                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1120                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1121                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1122                         RTE_PTYPE_INNER_L4_TCP,
1123                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1125                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1126                         RTE_PTYPE_INNER_L4_TCP,
1127                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1128                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1129                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1130                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1131                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1132                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1133                         RTE_PTYPE_INNER_L4_TCP,
1134                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1135                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1136                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1137                         RTE_PTYPE_INNER_L3_IPV4,
1138                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1140                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1141                         RTE_PTYPE_INNER_L4_UDP,
1142                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1143                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1144                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1145                         RTE_PTYPE_INNER_L4_UDP,
1146                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1147                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1148                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1149                         RTE_PTYPE_INNER_L4_SCTP,
1150                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1151                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1152                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1153                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1154                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1155                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1156                         RTE_PTYPE_INNER_L4_UDP,
1157                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1158                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1159                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1160                         RTE_PTYPE_INNER_L4_SCTP,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1162                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1163                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1164                         RTE_PTYPE_INNER_L3_IPV4,
1165                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1166                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1167                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1168                         RTE_PTYPE_INNER_L4_SCTP,
1169                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1170                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1171                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1172                         RTE_PTYPE_INNER_L4_SCTP,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1176                         RTE_PTYPE_INNER_L4_TCP,
1177                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1178                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1179                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1180                         RTE_PTYPE_INNER_L4_UDP,
1181
1182                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1183                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1184                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1185                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1186                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1187                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1188                         RTE_PTYPE_INNER_L3_IPV4,
1189                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1190                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1191                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1192                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1193                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1194                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1195                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1196                         RTE_PTYPE_INNER_L3_IPV6,
1197                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1198                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1199                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1200                         RTE_PTYPE_INNER_L3_IPV4,
1201                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1202                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1203                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1204                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1205                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1206                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1207                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1208                         RTE_PTYPE_INNER_L3_IPV4,
1209                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1210                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1211                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1212                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1213                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1214                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1215                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1216                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1217                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1218                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1219                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1220                         RTE_PTYPE_INNER_L3_IPV4,
1221                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1222                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1223                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1224                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1225                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1226                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1227                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1228                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1229                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1230                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1231                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1232                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1233                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1234                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1235                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1236                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1237                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1238                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1239                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1240                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1241                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1242                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1243                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1244                         RTE_PTYPE_INNER_L3_IPV4,
1245                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1246                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1247                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1248                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1249                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1250                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1251                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1252                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1253                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1254                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1255                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1256                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1257                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1258                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1259                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1260                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1261                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1262                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1263                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1264                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1265                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1266                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1267                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1268                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1269                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1270                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1271                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1272                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1273         };
1274
1275         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1276                 return RTE_PTYPE_UNKNOWN;
1277
1278         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1279
1280         /* For tunnel packet */
1281         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1282                 /* Remove the tunnel bit to save the space. */
1283                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1284                 return ptype_table_tn[pkt_info];
1285         }
1286
1287         /**
1288          * For x550, if it's not tunnel,
1289          * tunnel type bit should be set to 0.
1290          * Reuse 82599's mask.
1291          */
1292         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1293
1294         return ptype_table[pkt_info];
1295 }
1296
1297 static inline uint64_t
1298 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1299 {
1300         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1301                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1302                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1303                 PKT_RX_RSS_HASH, 0, 0, 0,
1304                 0, 0, 0,  PKT_RX_FDIR,
1305         };
1306 #ifdef RTE_LIBRTE_IEEE1588
1307         static uint64_t ip_pkt_etqf_map[8] = {
1308                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1309                 0, 0, 0, 0,
1310         };
1311
1312         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1313                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1314                                 ip_rss_types_map[pkt_info & 0XF];
1315         else
1316                 return ip_rss_types_map[pkt_info & 0XF];
1317 #else
1318         return ip_rss_types_map[pkt_info & 0XF];
1319 #endif
1320 }
1321
1322 static inline uint64_t
1323 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1324 {
1325         uint64_t pkt_flags;
1326
1327         /*
1328          * Check if VLAN present only.
1329          * Do not check whether L3/L4 rx checksum done by NIC or not,
1330          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1331          */
1332         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1333
1334 #ifdef RTE_LIBRTE_IEEE1588
1335         if (rx_status & IXGBE_RXD_STAT_TMST)
1336                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1337 #endif
1338         return pkt_flags;
1339 }
1340
1341 static inline uint64_t
1342 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1343 {
1344         uint64_t pkt_flags;
1345
1346         /*
1347          * Bit 31: IPE, IPv4 checksum error
1348          * Bit 30: L4I, L4I integrity error
1349          */
1350         static uint64_t error_to_pkt_flags_map[4] = {
1351                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1352                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1353                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1354                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1355         };
1356         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1357                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1358
1359         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1360             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1361                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1362         }
1363
1364         return pkt_flags;
1365 }
1366
1367 /*
1368  * LOOK_AHEAD defines how many desc statuses to check beyond the
1369  * current descriptor.
1370  * It must be a pound define for optimal performance.
1371  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1372  * function only works with LOOK_AHEAD=8.
1373  */
1374 #define LOOK_AHEAD 8
1375 #if (LOOK_AHEAD != 8)
1376 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1377 #endif
1378 static inline int
1379 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1380 {
1381         volatile union ixgbe_adv_rx_desc *rxdp;
1382         struct ixgbe_rx_entry *rxep;
1383         struct rte_mbuf *mb;
1384         uint16_t pkt_len;
1385         uint64_t pkt_flags;
1386         int nb_dd;
1387         uint32_t s[LOOK_AHEAD];
1388         uint32_t pkt_info[LOOK_AHEAD];
1389         int i, j, nb_rx = 0;
1390         uint32_t status;
1391         uint64_t vlan_flags = rxq->vlan_flags;
1392
1393         /* get references to current descriptor and S/W ring entry */
1394         rxdp = &rxq->rx_ring[rxq->rx_tail];
1395         rxep = &rxq->sw_ring[rxq->rx_tail];
1396
1397         status = rxdp->wb.upper.status_error;
1398         /* check to make sure there is at least 1 packet to receive */
1399         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1400                 return 0;
1401
1402         /*
1403          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1404          * reference packets that are ready to be received.
1405          */
1406         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1407              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1408                 /* Read desc statuses backwards to avoid race condition */
1409                 for (j = 0; j < LOOK_AHEAD; j++)
1410                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1411
1412                 rte_smp_rmb();
1413
1414                 /* Compute how many status bits were set */
1415                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1416                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1417                         ;
1418
1419                 for (j = 0; j < nb_dd; j++)
1420                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1421                                                        lo_dword.data);
1422
1423                 nb_rx += nb_dd;
1424
1425                 /* Translate descriptor info to mbuf format */
1426                 for (j = 0; j < nb_dd; ++j) {
1427                         mb = rxep[j].mbuf;
1428                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1429                                   rxq->crc_len;
1430                         mb->data_len = pkt_len;
1431                         mb->pkt_len = pkt_len;
1432                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1433
1434                         /* convert descriptor fields to rte mbuf flags */
1435                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1436                                 vlan_flags);
1437                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1438                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1439                                         ((uint16_t)pkt_info[j]);
1440                         mb->ol_flags = pkt_flags;
1441                         mb->packet_type =
1442                                 ixgbe_rxd_pkt_info_to_pkt_type
1443                                         (pkt_info[j], rxq->pkt_type_mask);
1444
1445                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1446                                 mb->hash.rss = rte_le_to_cpu_32(
1447                                     rxdp[j].wb.lower.hi_dword.rss);
1448                         else if (pkt_flags & PKT_RX_FDIR) {
1449                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1450                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1451                                     IXGBE_ATR_HASH_MASK;
1452                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1453                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1454                         }
1455                 }
1456
1457                 /* Move mbuf pointers from the S/W ring to the stage */
1458                 for (j = 0; j < LOOK_AHEAD; ++j) {
1459                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1460                 }
1461
1462                 /* stop if all requested packets could not be received */
1463                 if (nb_dd != LOOK_AHEAD)
1464                         break;
1465         }
1466
1467         /* clear software ring entries so we can cleanup correctly */
1468         for (i = 0; i < nb_rx; ++i) {
1469                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1470         }
1471
1472
1473         return nb_rx;
1474 }
1475
1476 static inline int
1477 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1478 {
1479         volatile union ixgbe_adv_rx_desc *rxdp;
1480         struct ixgbe_rx_entry *rxep;
1481         struct rte_mbuf *mb;
1482         uint16_t alloc_idx;
1483         __le64 dma_addr;
1484         int diag, i;
1485
1486         /* allocate buffers in bulk directly into the S/W ring */
1487         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1488         rxep = &rxq->sw_ring[alloc_idx];
1489         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1490                                     rxq->rx_free_thresh);
1491         if (unlikely(diag != 0))
1492                 return -ENOMEM;
1493
1494         rxdp = &rxq->rx_ring[alloc_idx];
1495         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1496                 /* populate the static rte mbuf fields */
1497                 mb = rxep[i].mbuf;
1498                 if (reset_mbuf) {
1499                         mb->next = NULL;
1500                         mb->nb_segs = 1;
1501                         mb->port = rxq->port_id;
1502                 }
1503
1504                 rte_mbuf_refcnt_set(mb, 1);
1505                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1506
1507                 /* populate the descriptors */
1508                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1509                 rxdp[i].read.hdr_addr = 0;
1510                 rxdp[i].read.pkt_addr = dma_addr;
1511         }
1512
1513         /* update state of internal queue structure */
1514         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1515         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1516                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1517
1518         /* no errors */
1519         return 0;
1520 }
1521
1522 static inline uint16_t
1523 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1524                          uint16_t nb_pkts)
1525 {
1526         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1527         int i;
1528
1529         /* how many packets are ready to return? */
1530         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1531
1532         /* copy mbuf pointers to the application's packet list */
1533         for (i = 0; i < nb_pkts; ++i)
1534                 rx_pkts[i] = stage[i];
1535
1536         /* update internal queue state */
1537         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1538         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1539
1540         return nb_pkts;
1541 }
1542
1543 static inline uint16_t
1544 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1545              uint16_t nb_pkts)
1546 {
1547         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1548         uint16_t nb_rx = 0;
1549
1550         /* Any previously recv'd pkts will be returned from the Rx stage */
1551         if (rxq->rx_nb_avail)
1552                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1553
1554         /* Scan the H/W ring for packets to receive */
1555         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1556
1557         /* update internal queue state */
1558         rxq->rx_next_avail = 0;
1559         rxq->rx_nb_avail = nb_rx;
1560         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1561
1562         /* if required, allocate new buffers to replenish descriptors */
1563         if (rxq->rx_tail > rxq->rx_free_trigger) {
1564                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1565
1566                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1567                         int i, j;
1568
1569                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1570                                    "queue_id=%u", (unsigned) rxq->port_id,
1571                                    (unsigned) rxq->queue_id);
1572
1573                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1574                                 rxq->rx_free_thresh;
1575
1576                         /*
1577                          * Need to rewind any previous receives if we cannot
1578                          * allocate new buffers to replenish the old ones.
1579                          */
1580                         rxq->rx_nb_avail = 0;
1581                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1582                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1583                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1584
1585                         return 0;
1586                 }
1587
1588                 /* update tail pointer */
1589                 rte_wmb();
1590                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1591         }
1592
1593         if (rxq->rx_tail >= rxq->nb_rx_desc)
1594                 rxq->rx_tail = 0;
1595
1596         /* received any packets this loop? */
1597         if (rxq->rx_nb_avail)
1598                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1599
1600         return 0;
1601 }
1602
1603 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1604 uint16_t
1605 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1606                            uint16_t nb_pkts)
1607 {
1608         uint16_t nb_rx;
1609
1610         if (unlikely(nb_pkts == 0))
1611                 return 0;
1612
1613         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1614                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1615
1616         /* request is relatively large, chunk it up */
1617         nb_rx = 0;
1618         while (nb_pkts) {
1619                 uint16_t ret, n;
1620
1621                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1622                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1623                 nb_rx = (uint16_t)(nb_rx + ret);
1624                 nb_pkts = (uint16_t)(nb_pkts - ret);
1625                 if (ret < n)
1626                         break;
1627         }
1628
1629         return nb_rx;
1630 }
1631
1632 uint16_t
1633 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1634                 uint16_t nb_pkts)
1635 {
1636         struct ixgbe_rx_queue *rxq;
1637         volatile union ixgbe_adv_rx_desc *rx_ring;
1638         volatile union ixgbe_adv_rx_desc *rxdp;
1639         struct ixgbe_rx_entry *sw_ring;
1640         struct ixgbe_rx_entry *rxe;
1641         struct rte_mbuf *rxm;
1642         struct rte_mbuf *nmb;
1643         union ixgbe_adv_rx_desc rxd;
1644         uint64_t dma_addr;
1645         uint32_t staterr;
1646         uint32_t pkt_info;
1647         uint16_t pkt_len;
1648         uint16_t rx_id;
1649         uint16_t nb_rx;
1650         uint16_t nb_hold;
1651         uint64_t pkt_flags;
1652         uint64_t vlan_flags;
1653
1654         nb_rx = 0;
1655         nb_hold = 0;
1656         rxq = rx_queue;
1657         rx_id = rxq->rx_tail;
1658         rx_ring = rxq->rx_ring;
1659         sw_ring = rxq->sw_ring;
1660         vlan_flags = rxq->vlan_flags;
1661         while (nb_rx < nb_pkts) {
1662                 /*
1663                  * The order of operations here is important as the DD status
1664                  * bit must not be read after any other descriptor fields.
1665                  * rx_ring and rxdp are pointing to volatile data so the order
1666                  * of accesses cannot be reordered by the compiler. If they were
1667                  * not volatile, they could be reordered which could lead to
1668                  * using invalid descriptor fields when read from rxd.
1669                  */
1670                 rxdp = &rx_ring[rx_id];
1671                 staterr = rxdp->wb.upper.status_error;
1672                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1673                         break;
1674                 rxd = *rxdp;
1675
1676                 /*
1677                  * End of packet.
1678                  *
1679                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1680                  * is likely to be invalid and to be dropped by the various
1681                  * validation checks performed by the network stack.
1682                  *
1683                  * Allocate a new mbuf to replenish the RX ring descriptor.
1684                  * If the allocation fails:
1685                  *    - arrange for that RX descriptor to be the first one
1686                  *      being parsed the next time the receive function is
1687                  *      invoked [on the same queue].
1688                  *
1689                  *    - Stop parsing the RX ring and return immediately.
1690                  *
1691                  * This policy do not drop the packet received in the RX
1692                  * descriptor for which the allocation of a new mbuf failed.
1693                  * Thus, it allows that packet to be later retrieved if
1694                  * mbuf have been freed in the mean time.
1695                  * As a side effect, holding RX descriptors instead of
1696                  * systematically giving them back to the NIC may lead to
1697                  * RX ring exhaustion situations.
1698                  * However, the NIC can gracefully prevent such situations
1699                  * to happen by sending specific "back-pressure" flow control
1700                  * frames to its peer(s).
1701                  */
1702                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1703                            "ext_err_stat=0x%08x pkt_len=%u",
1704                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1705                            (unsigned) rx_id, (unsigned) staterr,
1706                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1707
1708                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1709                 if (nmb == NULL) {
1710                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1711                                    "queue_id=%u", (unsigned) rxq->port_id,
1712                                    (unsigned) rxq->queue_id);
1713                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1714                         break;
1715                 }
1716
1717                 nb_hold++;
1718                 rxe = &sw_ring[rx_id];
1719                 rx_id++;
1720                 if (rx_id == rxq->nb_rx_desc)
1721                         rx_id = 0;
1722
1723                 /* Prefetch next mbuf while processing current one. */
1724                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1725
1726                 /*
1727                  * When next RX descriptor is on a cache-line boundary,
1728                  * prefetch the next 4 RX descriptors and the next 8 pointers
1729                  * to mbufs.
1730                  */
1731                 if ((rx_id & 0x3) == 0) {
1732                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1733                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1734                 }
1735
1736                 rxm = rxe->mbuf;
1737                 rxe->mbuf = nmb;
1738                 dma_addr =
1739                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1740                 rxdp->read.hdr_addr = 0;
1741                 rxdp->read.pkt_addr = dma_addr;
1742
1743                 /*
1744                  * Initialize the returned mbuf.
1745                  * 1) setup generic mbuf fields:
1746                  *    - number of segments,
1747                  *    - next segment,
1748                  *    - packet length,
1749                  *    - RX port identifier.
1750                  * 2) integrate hardware offload data, if any:
1751                  *    - RSS flag & hash,
1752                  *    - IP checksum flag,
1753                  *    - VLAN TCI, if any,
1754                  *    - error flags.
1755                  */
1756                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1757                                       rxq->crc_len);
1758                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1759                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1760                 rxm->nb_segs = 1;
1761                 rxm->next = NULL;
1762                 rxm->pkt_len = pkt_len;
1763                 rxm->data_len = pkt_len;
1764                 rxm->port = rxq->port_id;
1765
1766                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1767                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1768                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1769
1770                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1771                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1772                 pkt_flags = pkt_flags |
1773                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1774                 rxm->ol_flags = pkt_flags;
1775                 rxm->packet_type =
1776                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1777                                                        rxq->pkt_type_mask);
1778
1779                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1780                         rxm->hash.rss = rte_le_to_cpu_32(
1781                                                 rxd.wb.lower.hi_dword.rss);
1782                 else if (pkt_flags & PKT_RX_FDIR) {
1783                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1784                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1785                                         IXGBE_ATR_HASH_MASK;
1786                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1787                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1788                 }
1789                 /*
1790                  * Store the mbuf address into the next entry of the array
1791                  * of returned packets.
1792                  */
1793                 rx_pkts[nb_rx++] = rxm;
1794         }
1795         rxq->rx_tail = rx_id;
1796
1797         /*
1798          * If the number of free RX descriptors is greater than the RX free
1799          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1800          * register.
1801          * Update the RDT with the value of the last processed RX descriptor
1802          * minus 1, to guarantee that the RDT register is never equal to the
1803          * RDH register, which creates a "full" ring situtation from the
1804          * hardware point of view...
1805          */
1806         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1807         if (nb_hold > rxq->rx_free_thresh) {
1808                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1809                            "nb_hold=%u nb_rx=%u",
1810                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1811                            (unsigned) rx_id, (unsigned) nb_hold,
1812                            (unsigned) nb_rx);
1813                 rx_id = (uint16_t) ((rx_id == 0) ?
1814                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1815                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1816                 nb_hold = 0;
1817         }
1818         rxq->nb_rx_hold = nb_hold;
1819         return nb_rx;
1820 }
1821
1822 /**
1823  * Detect an RSC descriptor.
1824  */
1825 static inline uint32_t
1826 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1827 {
1828         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1829                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1830 }
1831
1832 /**
1833  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1834  *
1835  * Fill the following info in the HEAD buffer of the Rx cluster:
1836  *    - RX port identifier
1837  *    - hardware offload data, if any:
1838  *      - RSS flag & hash
1839  *      - IP checksum flag
1840  *      - VLAN TCI, if any
1841  *      - error flags
1842  * @head HEAD of the packet cluster
1843  * @desc HW descriptor to get data from
1844  * @rxq Pointer to the Rx queue
1845  */
1846 static inline void
1847 ixgbe_fill_cluster_head_buf(
1848         struct rte_mbuf *head,
1849         union ixgbe_adv_rx_desc *desc,
1850         struct ixgbe_rx_queue *rxq,
1851         uint32_t staterr)
1852 {
1853         uint32_t pkt_info;
1854         uint64_t pkt_flags;
1855
1856         head->port = rxq->port_id;
1857
1858         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1859          * set in the pkt_flags field.
1860          */
1861         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1862         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1863         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1864         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1865         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1866         head->ol_flags = pkt_flags;
1867         head->packet_type =
1868                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1869
1870         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1871                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1872         else if (pkt_flags & PKT_RX_FDIR) {
1873                 head->hash.fdir.hash =
1874                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1875                                                           & IXGBE_ATR_HASH_MASK;
1876                 head->hash.fdir.id =
1877                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1878         }
1879 }
1880
1881 /**
1882  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1883  *
1884  * @rx_queue Rx queue handle
1885  * @rx_pkts table of received packets
1886  * @nb_pkts size of rx_pkts table
1887  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1888  *
1889  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1890  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1891  *
1892  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1893  * 1) When non-EOP RSC completion arrives:
1894  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1895  *       segment's data length.
1896  *    b) Set the "next" pointer of the current segment to point to the segment
1897  *       at the NEXTP index.
1898  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1899  *       in the sw_rsc_ring.
1900  * 2) When EOP arrives we just update the cluster's total length and offload
1901  *    flags and deliver the cluster up to the upper layers. In our case - put it
1902  *    in the rx_pkts table.
1903  *
1904  * Returns the number of received packets/clusters (according to the "bulk
1905  * receive" interface).
1906  */
1907 static inline uint16_t
1908 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1909                     bool bulk_alloc)
1910 {
1911         struct ixgbe_rx_queue *rxq = rx_queue;
1912         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1913         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1914         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1915         uint16_t rx_id = rxq->rx_tail;
1916         uint16_t nb_rx = 0;
1917         uint16_t nb_hold = rxq->nb_rx_hold;
1918         uint16_t prev_id = rxq->rx_tail;
1919
1920         while (nb_rx < nb_pkts) {
1921                 bool eop;
1922                 struct ixgbe_rx_entry *rxe;
1923                 struct ixgbe_scattered_rx_entry *sc_entry;
1924                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1925                 struct ixgbe_rx_entry *next_rxe = NULL;
1926                 struct rte_mbuf *first_seg;
1927                 struct rte_mbuf *rxm;
1928                 struct rte_mbuf *nmb;
1929                 union ixgbe_adv_rx_desc rxd;
1930                 uint16_t data_len;
1931                 uint16_t next_id;
1932                 volatile union ixgbe_adv_rx_desc *rxdp;
1933                 uint32_t staterr;
1934
1935 next_desc:
1936                 /*
1937                  * The code in this whole file uses the volatile pointer to
1938                  * ensure the read ordering of the status and the rest of the
1939                  * descriptor fields (on the compiler level only!!!). This is so
1940                  * UGLY - why not to just use the compiler barrier instead? DPDK
1941                  * even has the rte_compiler_barrier() for that.
1942                  *
1943                  * But most importantly this is just wrong because this doesn't
1944                  * ensure memory ordering in a general case at all. For
1945                  * instance, DPDK is supposed to work on Power CPUs where
1946                  * compiler barrier may just not be enough!
1947                  *
1948                  * I tried to write only this function properly to have a
1949                  * starting point (as a part of an LRO/RSC series) but the
1950                  * compiler cursed at me when I tried to cast away the
1951                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1952                  * keeping it the way it is for now.
1953                  *
1954                  * The code in this file is broken in so many other places and
1955                  * will just not work on a big endian CPU anyway therefore the
1956                  * lines below will have to be revisited together with the rest
1957                  * of the ixgbe PMD.
1958                  *
1959                  * TODO:
1960                  *    - Get rid of "volatile" crap and let the compiler do its
1961                  *      job.
1962                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1963                  *      memory ordering below.
1964                  */
1965                 rxdp = &rx_ring[rx_id];
1966                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1967
1968                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1969                         break;
1970
1971                 rxd = *rxdp;
1972
1973                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1974                                   "staterr=0x%x data_len=%u",
1975                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1976                            rte_le_to_cpu_16(rxd.wb.upper.length));
1977
1978                 if (!bulk_alloc) {
1979                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1980                         if (nmb == NULL) {
1981                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1982                                                   "port_id=%u queue_id=%u",
1983                                            rxq->port_id, rxq->queue_id);
1984
1985                                 rte_eth_devices[rxq->port_id].data->
1986                                                         rx_mbuf_alloc_failed++;
1987                                 break;
1988                         }
1989                 } else if (nb_hold > rxq->rx_free_thresh) {
1990                         uint16_t next_rdt = rxq->rx_free_trigger;
1991
1992                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1993                                 rte_wmb();
1994                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1995                                                     next_rdt);
1996                                 nb_hold -= rxq->rx_free_thresh;
1997                         } else {
1998                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1999                                                   "port_id=%u queue_id=%u",
2000                                            rxq->port_id, rxq->queue_id);
2001
2002                                 rte_eth_devices[rxq->port_id].data->
2003                                                         rx_mbuf_alloc_failed++;
2004                                 break;
2005                         }
2006                 }
2007
2008                 nb_hold++;
2009                 rxe = &sw_ring[rx_id];
2010                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2011
2012                 next_id = rx_id + 1;
2013                 if (next_id == rxq->nb_rx_desc)
2014                         next_id = 0;
2015
2016                 /* Prefetch next mbuf while processing current one. */
2017                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2018
2019                 /*
2020                  * When next RX descriptor is on a cache-line boundary,
2021                  * prefetch the next 4 RX descriptors and the next 4 pointers
2022                  * to mbufs.
2023                  */
2024                 if ((next_id & 0x3) == 0) {
2025                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2026                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2027                 }
2028
2029                 rxm = rxe->mbuf;
2030
2031                 if (!bulk_alloc) {
2032                         __le64 dma =
2033                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2034                         /*
2035                          * Update RX descriptor with the physical address of the
2036                          * new data buffer of the new allocated mbuf.
2037                          */
2038                         rxe->mbuf = nmb;
2039
2040                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2041                         rxdp->read.hdr_addr = 0;
2042                         rxdp->read.pkt_addr = dma;
2043                 } else
2044                         rxe->mbuf = NULL;
2045
2046                 /*
2047                  * Set data length & data buffer address of mbuf.
2048                  */
2049                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2050                 rxm->data_len = data_len;
2051
2052                 if (!eop) {
2053                         uint16_t nextp_id;
2054                         /*
2055                          * Get next descriptor index:
2056                          *  - For RSC it's in the NEXTP field.
2057                          *  - For a scattered packet - it's just a following
2058                          *    descriptor.
2059                          */
2060                         if (ixgbe_rsc_count(&rxd))
2061                                 nextp_id =
2062                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2063                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2064                         else
2065                                 nextp_id = next_id;
2066
2067                         next_sc_entry = &sw_sc_ring[nextp_id];
2068                         next_rxe = &sw_ring[nextp_id];
2069                         rte_ixgbe_prefetch(next_rxe);
2070                 }
2071
2072                 sc_entry = &sw_sc_ring[rx_id];
2073                 first_seg = sc_entry->fbuf;
2074                 sc_entry->fbuf = NULL;
2075
2076                 /*
2077                  * If this is the first buffer of the received packet,
2078                  * set the pointer to the first mbuf of the packet and
2079                  * initialize its context.
2080                  * Otherwise, update the total length and the number of segments
2081                  * of the current scattered packet, and update the pointer to
2082                  * the last mbuf of the current packet.
2083                  */
2084                 if (first_seg == NULL) {
2085                         first_seg = rxm;
2086                         first_seg->pkt_len = data_len;
2087                         first_seg->nb_segs = 1;
2088                 } else {
2089                         first_seg->pkt_len += data_len;
2090                         first_seg->nb_segs++;
2091                 }
2092
2093                 prev_id = rx_id;
2094                 rx_id = next_id;
2095
2096                 /*
2097                  * If this is not the last buffer of the received packet, update
2098                  * the pointer to the first mbuf at the NEXTP entry in the
2099                  * sw_sc_ring and continue to parse the RX ring.
2100                  */
2101                 if (!eop && next_rxe) {
2102                         rxm->next = next_rxe->mbuf;
2103                         next_sc_entry->fbuf = first_seg;
2104                         goto next_desc;
2105                 }
2106
2107                 /*
2108                  * This is the last buffer of the received packet - return
2109                  * the current cluster to the user.
2110                  */
2111                 rxm->next = NULL;
2112
2113                 /* Initialize the first mbuf of the returned packet */
2114                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2115
2116                 /*
2117                  * Deal with the case, when HW CRC srip is disabled.
2118                  * That can't happen when LRO is enabled, but still could
2119                  * happen for scattered RX mode.
2120                  */
2121                 first_seg->pkt_len -= rxq->crc_len;
2122                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2123                         struct rte_mbuf *lp;
2124
2125                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2126                                 ;
2127
2128                         first_seg->nb_segs--;
2129                         lp->data_len -= rxq->crc_len - rxm->data_len;
2130                         lp->next = NULL;
2131                         rte_pktmbuf_free_seg(rxm);
2132                 } else
2133                         rxm->data_len -= rxq->crc_len;
2134
2135                 /* Prefetch data of first segment, if configured to do so. */
2136                 rte_packet_prefetch((char *)first_seg->buf_addr +
2137                         first_seg->data_off);
2138
2139                 /*
2140                  * Store the mbuf address into the next entry of the array
2141                  * of returned packets.
2142                  */
2143                 rx_pkts[nb_rx++] = first_seg;
2144         }
2145
2146         /*
2147          * Record index of the next RX descriptor to probe.
2148          */
2149         rxq->rx_tail = rx_id;
2150
2151         /*
2152          * If the number of free RX descriptors is greater than the RX free
2153          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2154          * register.
2155          * Update the RDT with the value of the last processed RX descriptor
2156          * minus 1, to guarantee that the RDT register is never equal to the
2157          * RDH register, which creates a "full" ring situtation from the
2158          * hardware point of view...
2159          */
2160         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2161                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2162                            "nb_hold=%u nb_rx=%u",
2163                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2164
2165                 rte_wmb();
2166                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
2167                 nb_hold = 0;
2168         }
2169
2170         rxq->nb_rx_hold = nb_hold;
2171         return nb_rx;
2172 }
2173
2174 uint16_t
2175 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2176                                  uint16_t nb_pkts)
2177 {
2178         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2179 }
2180
2181 uint16_t
2182 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2183                                uint16_t nb_pkts)
2184 {
2185         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2186 }
2187
2188 /*********************************************************************
2189  *
2190  *  Queue management functions
2191  *
2192  **********************************************************************/
2193
2194 static void __attribute__((cold))
2195 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2196 {
2197         unsigned i;
2198
2199         if (txq->sw_ring != NULL) {
2200                 for (i = 0; i < txq->nb_tx_desc; i++) {
2201                         if (txq->sw_ring[i].mbuf != NULL) {
2202                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2203                                 txq->sw_ring[i].mbuf = NULL;
2204                         }
2205                 }
2206         }
2207 }
2208
2209 static void __attribute__((cold))
2210 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2211 {
2212         if (txq != NULL &&
2213             txq->sw_ring != NULL)
2214                 rte_free(txq->sw_ring);
2215 }
2216
2217 static void __attribute__((cold))
2218 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2219 {
2220         if (txq != NULL && txq->ops != NULL) {
2221                 txq->ops->release_mbufs(txq);
2222                 txq->ops->free_swring(txq);
2223                 rte_free(txq);
2224         }
2225 }
2226
2227 void __attribute__((cold))
2228 ixgbe_dev_tx_queue_release(void *txq)
2229 {
2230         ixgbe_tx_queue_release(txq);
2231 }
2232
2233 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2234 static void __attribute__((cold))
2235 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2236 {
2237         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2238         struct ixgbe_tx_entry *txe = txq->sw_ring;
2239         uint16_t prev, i;
2240
2241         /* Zero out HW ring memory */
2242         for (i = 0; i < txq->nb_tx_desc; i++) {
2243                 txq->tx_ring[i] = zeroed_desc;
2244         }
2245
2246         /* Initialize SW ring entries */
2247         prev = (uint16_t) (txq->nb_tx_desc - 1);
2248         for (i = 0; i < txq->nb_tx_desc; i++) {
2249                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2250
2251                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2252                 txe[i].mbuf = NULL;
2253                 txe[i].last_id = i;
2254                 txe[prev].next_id = i;
2255                 prev = i;
2256         }
2257
2258         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2259         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2260
2261         txq->tx_tail = 0;
2262         txq->nb_tx_used = 0;
2263         /*
2264          * Always allow 1 descriptor to be un-allocated to avoid
2265          * a H/W race condition
2266          */
2267         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2268         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2269         txq->ctx_curr = 0;
2270         memset((void *)&txq->ctx_cache, 0,
2271                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2272 }
2273
2274 static const struct ixgbe_txq_ops def_txq_ops = {
2275         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2276         .free_swring = ixgbe_tx_free_swring,
2277         .reset = ixgbe_reset_tx_queue,
2278 };
2279
2280 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2281  * the queue parameters. Used in tx_queue_setup by primary process and then
2282  * in dev_init by secondary process when attaching to an existing ethdev.
2283  */
2284 void __attribute__((cold))
2285 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2286 {
2287         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2288         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2289                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2290                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2291 #ifdef RTE_IXGBE_INC_VECTOR
2292                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2293                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2294                                         ixgbe_txq_vec_setup(txq) == 0)) {
2295                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2296                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2297                 } else
2298 #endif
2299                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2300         } else {
2301                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2302                 PMD_INIT_LOG(DEBUG,
2303                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2304                                 (unsigned long)txq->txq_flags,
2305                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2306                 PMD_INIT_LOG(DEBUG,
2307                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2308                                 (unsigned long)txq->tx_rs_thresh,
2309                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2310                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2311         }
2312 }
2313
2314 int __attribute__((cold))
2315 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2316                          uint16_t queue_idx,
2317                          uint16_t nb_desc,
2318                          unsigned int socket_id,
2319                          const struct rte_eth_txconf *tx_conf)
2320 {
2321         const struct rte_memzone *tz;
2322         struct ixgbe_tx_queue *txq;
2323         struct ixgbe_hw     *hw;
2324         uint16_t tx_rs_thresh, tx_free_thresh;
2325
2326         PMD_INIT_FUNC_TRACE();
2327         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2328
2329         /*
2330          * Validate number of transmit descriptors.
2331          * It must not exceed hardware maximum, and must be multiple
2332          * of IXGBE_ALIGN.
2333          */
2334         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2335                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2336                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2337                 return -EINVAL;
2338         }
2339
2340         /*
2341          * The following two parameters control the setting of the RS bit on
2342          * transmit descriptors.
2343          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2344          * descriptors have been used.
2345          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2346          * descriptors are used or if the number of descriptors required
2347          * to transmit a packet is greater than the number of free TX
2348          * descriptors.
2349          * The following constraints must be satisfied:
2350          *  tx_rs_thresh must be greater than 0.
2351          *  tx_rs_thresh must be less than the size of the ring minus 2.
2352          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2353          *  tx_rs_thresh must be a divisor of the ring size.
2354          *  tx_free_thresh must be greater than 0.
2355          *  tx_free_thresh must be less than the size of the ring minus 3.
2356          * One descriptor in the TX ring is used as a sentinel to avoid a
2357          * H/W race condition, hence the maximum threshold constraints.
2358          * When set to zero use default values.
2359          */
2360         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2361                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2362         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2363                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2364         if (tx_rs_thresh >= (nb_desc - 2)) {
2365                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2366                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2367                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2368                         (int)dev->data->port_id, (int)queue_idx);
2369                 return -(EINVAL);
2370         }
2371         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2372                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2373                         "(tx_rs_thresh=%u port=%d queue=%d)",
2374                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2375                         (int)dev->data->port_id, (int)queue_idx);
2376                 return -(EINVAL);
2377         }
2378         if (tx_free_thresh >= (nb_desc - 3)) {
2379                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2380                              "tx_free_thresh must be less than the number of "
2381                              "TX descriptors minus 3. (tx_free_thresh=%u "
2382                              "port=%d queue=%d)",
2383                              (unsigned int)tx_free_thresh,
2384                              (int)dev->data->port_id, (int)queue_idx);
2385                 return -(EINVAL);
2386         }
2387         if (tx_rs_thresh > tx_free_thresh) {
2388                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2389                              "tx_free_thresh. (tx_free_thresh=%u "
2390                              "tx_rs_thresh=%u port=%d queue=%d)",
2391                              (unsigned int)tx_free_thresh,
2392                              (unsigned int)tx_rs_thresh,
2393                              (int)dev->data->port_id,
2394                              (int)queue_idx);
2395                 return -(EINVAL);
2396         }
2397         if ((nb_desc % tx_rs_thresh) != 0) {
2398                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2399                              "number of TX descriptors. (tx_rs_thresh=%u "
2400                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2401                              (int)dev->data->port_id, (int)queue_idx);
2402                 return -(EINVAL);
2403         }
2404
2405         /*
2406          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2407          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2408          * by the NIC and all descriptors are written back after the NIC
2409          * accumulates WTHRESH descriptors.
2410          */
2411         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2412                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2413                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2414                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2415                              (int)dev->data->port_id, (int)queue_idx);
2416                 return -(EINVAL);
2417         }
2418
2419         /* Free memory prior to re-allocation if needed... */
2420         if (dev->data->tx_queues[queue_idx] != NULL) {
2421                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2422                 dev->data->tx_queues[queue_idx] = NULL;
2423         }
2424
2425         /* First allocate the tx queue data structure */
2426         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2427                                  RTE_CACHE_LINE_SIZE, socket_id);
2428         if (txq == NULL)
2429                 return -ENOMEM;
2430
2431         /*
2432          * Allocate TX ring hardware descriptors. A memzone large enough to
2433          * handle the maximum ring size is allocated in order to allow for
2434          * resizing in later calls to the queue setup function.
2435          */
2436         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2437                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2438                         IXGBE_ALIGN, socket_id);
2439         if (tz == NULL) {
2440                 ixgbe_tx_queue_release(txq);
2441                 return -ENOMEM;
2442         }
2443
2444         txq->nb_tx_desc = nb_desc;
2445         txq->tx_rs_thresh = tx_rs_thresh;
2446         txq->tx_free_thresh = tx_free_thresh;
2447         txq->pthresh = tx_conf->tx_thresh.pthresh;
2448         txq->hthresh = tx_conf->tx_thresh.hthresh;
2449         txq->wthresh = tx_conf->tx_thresh.wthresh;
2450         txq->queue_id = queue_idx;
2451         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2452                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2453         txq->port_id = dev->data->port_id;
2454         txq->txq_flags = tx_conf->txq_flags;
2455         txq->ops = &def_txq_ops;
2456         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2457
2458         /*
2459          * Modification to set VFTDT for virtual function if vf is detected
2460          */
2461         if (hw->mac.type == ixgbe_mac_82599_vf ||
2462             hw->mac.type == ixgbe_mac_X540_vf ||
2463             hw->mac.type == ixgbe_mac_X550_vf ||
2464             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2465             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2466                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2467         else
2468                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2469
2470         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2471         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2472
2473         /* Allocate software ring */
2474         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2475                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2476                                 RTE_CACHE_LINE_SIZE, socket_id);
2477         if (txq->sw_ring == NULL) {
2478                 ixgbe_tx_queue_release(txq);
2479                 return -ENOMEM;
2480         }
2481         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2482                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2483
2484         /* set up vector or scalar TX function as appropriate */
2485         ixgbe_set_tx_function(dev, txq);
2486
2487         txq->ops->reset(txq);
2488
2489         dev->data->tx_queues[queue_idx] = txq;
2490
2491
2492         return 0;
2493 }
2494
2495 /**
2496  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2497  *
2498  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2499  * in the sw_rsc_ring is not set to NULL but rather points to the next
2500  * mbuf of this RSC aggregation (that has not been completed yet and still
2501  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2502  * will just free first "nb_segs" segments of the cluster explicitly by calling
2503  * an rte_pktmbuf_free_seg().
2504  *
2505  * @m scattered cluster head
2506  */
2507 static void __attribute__((cold))
2508 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2509 {
2510         uint8_t i, nb_segs = m->nb_segs;
2511         struct rte_mbuf *next_seg;
2512
2513         for (i = 0; i < nb_segs; i++) {
2514                 next_seg = m->next;
2515                 rte_pktmbuf_free_seg(m);
2516                 m = next_seg;
2517         }
2518 }
2519
2520 static void __attribute__((cold))
2521 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2522 {
2523         unsigned i;
2524
2525 #ifdef RTE_IXGBE_INC_VECTOR
2526         /* SSE Vector driver has a different way of releasing mbufs. */
2527         if (rxq->rx_using_sse) {
2528                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2529                 return;
2530         }
2531 #endif
2532
2533         if (rxq->sw_ring != NULL) {
2534                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2535                         if (rxq->sw_ring[i].mbuf != NULL) {
2536                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2537                                 rxq->sw_ring[i].mbuf = NULL;
2538                         }
2539                 }
2540                 if (rxq->rx_nb_avail) {
2541                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2542                                 struct rte_mbuf *mb;
2543
2544                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2545                                 rte_pktmbuf_free_seg(mb);
2546                         }
2547                         rxq->rx_nb_avail = 0;
2548                 }
2549         }
2550
2551         if (rxq->sw_sc_ring)
2552                 for (i = 0; i < rxq->nb_rx_desc; i++)
2553                         if (rxq->sw_sc_ring[i].fbuf) {
2554                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2555                                 rxq->sw_sc_ring[i].fbuf = NULL;
2556                         }
2557 }
2558
2559 static void __attribute__((cold))
2560 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2561 {
2562         if (rxq != NULL) {
2563                 ixgbe_rx_queue_release_mbufs(rxq);
2564                 rte_free(rxq->sw_ring);
2565                 rte_free(rxq->sw_sc_ring);
2566                 rte_free(rxq);
2567         }
2568 }
2569
2570 void __attribute__((cold))
2571 ixgbe_dev_rx_queue_release(void *rxq)
2572 {
2573         ixgbe_rx_queue_release(rxq);
2574 }
2575
2576 /*
2577  * Check if Rx Burst Bulk Alloc function can be used.
2578  * Return
2579  *        0: the preconditions are satisfied and the bulk allocation function
2580  *           can be used.
2581  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2582  *           function must be used.
2583  */
2584 static inline int __attribute__((cold))
2585 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2586 {
2587         int ret = 0;
2588
2589         /*
2590          * Make sure the following pre-conditions are satisfied:
2591          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2592          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2593          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2594          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2595          * Scattered packets are not supported.  This should be checked
2596          * outside of this function.
2597          */
2598         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2599                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2600                              "rxq->rx_free_thresh=%d, "
2601                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2602                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2603                 ret = -EINVAL;
2604         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2605                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2606                              "rxq->rx_free_thresh=%d, "
2607                              "rxq->nb_rx_desc=%d",
2608                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2609                 ret = -EINVAL;
2610         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2611                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2612                              "rxq->nb_rx_desc=%d, "
2613                              "rxq->rx_free_thresh=%d",
2614                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2615                 ret = -EINVAL;
2616         } else if (!(rxq->nb_rx_desc <
2617                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2618                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2619                              "rxq->nb_rx_desc=%d, "
2620                              "IXGBE_MAX_RING_DESC=%d, "
2621                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2622                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2623                              RTE_PMD_IXGBE_RX_MAX_BURST);
2624                 ret = -EINVAL;
2625         }
2626
2627         return ret;
2628 }
2629
2630 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2631 static void __attribute__((cold))
2632 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2633 {
2634         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2635         unsigned i;
2636         uint16_t len = rxq->nb_rx_desc;
2637
2638         /*
2639          * By default, the Rx queue setup function allocates enough memory for
2640          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2641          * extra memory at the end of the descriptor ring to be zero'd out. A
2642          * pre-condition for using the Rx burst bulk alloc function is that the
2643          * number of descriptors is less than or equal to
2644          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2645          * constraints here to see if we need to zero out memory after the end
2646          * of the H/W descriptor ring.
2647          */
2648         if (adapter->rx_bulk_alloc_allowed)
2649                 /* zero out extra memory */
2650                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2651
2652         /*
2653          * Zero out HW ring memory. Zero out extra memory at the end of
2654          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2655          * reads extra memory as zeros.
2656          */
2657         for (i = 0; i < len; i++) {
2658                 rxq->rx_ring[i] = zeroed_desc;
2659         }
2660
2661         /*
2662          * initialize extra software ring entries. Space for these extra
2663          * entries is always allocated
2664          */
2665         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2666         for (i = rxq->nb_rx_desc; i < len; ++i) {
2667                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2668         }
2669
2670         rxq->rx_nb_avail = 0;
2671         rxq->rx_next_avail = 0;
2672         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2673         rxq->rx_tail = 0;
2674         rxq->nb_rx_hold = 0;
2675         rxq->pkt_first_seg = NULL;
2676         rxq->pkt_last_seg = NULL;
2677
2678 #ifdef RTE_IXGBE_INC_VECTOR
2679         rxq->rxrearm_start = 0;
2680         rxq->rxrearm_nb = 0;
2681 #endif
2682 }
2683
2684 int __attribute__((cold))
2685 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2686                          uint16_t queue_idx,
2687                          uint16_t nb_desc,
2688                          unsigned int socket_id,
2689                          const struct rte_eth_rxconf *rx_conf,
2690                          struct rte_mempool *mp)
2691 {
2692         const struct rte_memzone *rz;
2693         struct ixgbe_rx_queue *rxq;
2694         struct ixgbe_hw     *hw;
2695         uint16_t len;
2696         struct ixgbe_adapter *adapter =
2697                 (struct ixgbe_adapter *)dev->data->dev_private;
2698
2699         PMD_INIT_FUNC_TRACE();
2700         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2701
2702         /*
2703          * Validate number of receive descriptors.
2704          * It must not exceed hardware maximum, and must be multiple
2705          * of IXGBE_ALIGN.
2706          */
2707         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2708                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2709                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2710                 return -EINVAL;
2711         }
2712
2713         /* Free memory prior to re-allocation if needed... */
2714         if (dev->data->rx_queues[queue_idx] != NULL) {
2715                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2716                 dev->data->rx_queues[queue_idx] = NULL;
2717         }
2718
2719         /* First allocate the rx queue data structure */
2720         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2721                                  RTE_CACHE_LINE_SIZE, socket_id);
2722         if (rxq == NULL)
2723                 return -ENOMEM;
2724         rxq->mb_pool = mp;
2725         rxq->nb_rx_desc = nb_desc;
2726         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2727         rxq->queue_id = queue_idx;
2728         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2729                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2730         rxq->port_id = dev->data->port_id;
2731         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2732                                                         0 : ETHER_CRC_LEN);
2733         rxq->drop_en = rx_conf->rx_drop_en;
2734         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2735
2736         /*
2737          * The packet type in RX descriptor is different for different NICs.
2738          * Some bits are used for x550 but reserved for other NICS.
2739          * So set different masks for different NICs.
2740          */
2741         if (hw->mac.type == ixgbe_mac_X550 ||
2742             hw->mac.type == ixgbe_mac_X550EM_x ||
2743             hw->mac.type == ixgbe_mac_X550EM_a ||
2744             hw->mac.type == ixgbe_mac_X550_vf ||
2745             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2746             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2747                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2748         else
2749                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2750
2751         /*
2752          * Allocate RX ring hardware descriptors. A memzone large enough to
2753          * handle the maximum ring size is allocated in order to allow for
2754          * resizing in later calls to the queue setup function.
2755          */
2756         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2757                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2758         if (rz == NULL) {
2759                 ixgbe_rx_queue_release(rxq);
2760                 return -ENOMEM;
2761         }
2762
2763         /*
2764          * Zero init all the descriptors in the ring.
2765          */
2766         memset(rz->addr, 0, RX_RING_SZ);
2767
2768         /*
2769          * Modified to setup VFRDT for Virtual Function
2770          */
2771         if (hw->mac.type == ixgbe_mac_82599_vf ||
2772             hw->mac.type == ixgbe_mac_X540_vf ||
2773             hw->mac.type == ixgbe_mac_X550_vf ||
2774             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2775             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2776                 rxq->rdt_reg_addr =
2777                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2778                 rxq->rdh_reg_addr =
2779                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2780         } else {
2781                 rxq->rdt_reg_addr =
2782                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2783                 rxq->rdh_reg_addr =
2784                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2785         }
2786
2787         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2788         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2789
2790         /*
2791          * Certain constraints must be met in order to use the bulk buffer
2792          * allocation Rx burst function. If any of Rx queues doesn't meet them
2793          * the feature should be disabled for the whole port.
2794          */
2795         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2796                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2797                                     "preconditions - canceling the feature for "
2798                                     "the whole port[%d]",
2799                              rxq->queue_id, rxq->port_id);
2800                 adapter->rx_bulk_alloc_allowed = false;
2801         }
2802
2803         /*
2804          * Allocate software ring. Allow for space at the end of the
2805          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2806          * function does not access an invalid memory region.
2807          */
2808         len = nb_desc;
2809         if (adapter->rx_bulk_alloc_allowed)
2810                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2811
2812         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2813                                           sizeof(struct ixgbe_rx_entry) * len,
2814                                           RTE_CACHE_LINE_SIZE, socket_id);
2815         if (!rxq->sw_ring) {
2816                 ixgbe_rx_queue_release(rxq);
2817                 return -ENOMEM;
2818         }
2819
2820         /*
2821          * Always allocate even if it's not going to be needed in order to
2822          * simplify the code.
2823          *
2824          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2825          * be requested in ixgbe_dev_rx_init(), which is called later from
2826          * dev_start() flow.
2827          */
2828         rxq->sw_sc_ring =
2829                 rte_zmalloc_socket("rxq->sw_sc_ring",
2830                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2831                                    RTE_CACHE_LINE_SIZE, socket_id);
2832         if (!rxq->sw_sc_ring) {
2833                 ixgbe_rx_queue_release(rxq);
2834                 return -ENOMEM;
2835         }
2836
2837         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2838                             "dma_addr=0x%"PRIx64,
2839                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2840                      rxq->rx_ring_phys_addr);
2841
2842         if (!rte_is_power_of_2(nb_desc)) {
2843                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2844                                     "preconditions - canceling the feature for "
2845                                     "the whole port[%d]",
2846                              rxq->queue_id, rxq->port_id);
2847                 adapter->rx_vec_allowed = false;
2848         } else
2849                 ixgbe_rxq_vec_setup(rxq);
2850
2851         dev->data->rx_queues[queue_idx] = rxq;
2852
2853         ixgbe_reset_rx_queue(adapter, rxq);
2854
2855         return 0;
2856 }
2857
2858 uint32_t
2859 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2860 {
2861 #define IXGBE_RXQ_SCAN_INTERVAL 4
2862         volatile union ixgbe_adv_rx_desc *rxdp;
2863         struct ixgbe_rx_queue *rxq;
2864         uint32_t desc = 0;
2865
2866         if (rx_queue_id >= dev->data->nb_rx_queues) {
2867                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2868                 return 0;
2869         }
2870
2871         rxq = dev->data->rx_queues[rx_queue_id];
2872         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2873
2874         while ((desc < rxq->nb_rx_desc) &&
2875                 (rxdp->wb.upper.status_error &
2876                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2877                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2878                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2879                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2880                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2881                                 desc - rxq->nb_rx_desc]);
2882         }
2883
2884         return desc;
2885 }
2886
2887 int
2888 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2889 {
2890         volatile union ixgbe_adv_rx_desc *rxdp;
2891         struct ixgbe_rx_queue *rxq = rx_queue;
2892         uint32_t desc;
2893
2894         if (unlikely(offset >= rxq->nb_rx_desc))
2895                 return 0;
2896         desc = rxq->rx_tail + offset;
2897         if (desc >= rxq->nb_rx_desc)
2898                 desc -= rxq->nb_rx_desc;
2899
2900         rxdp = &rxq->rx_ring[desc];
2901         return !!(rxdp->wb.upper.status_error &
2902                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2903 }
2904
2905 void __attribute__((cold))
2906 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2907 {
2908         unsigned i;
2909         struct ixgbe_adapter *adapter =
2910                 (struct ixgbe_adapter *)dev->data->dev_private;
2911
2912         PMD_INIT_FUNC_TRACE();
2913
2914         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2915                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2916
2917                 if (txq != NULL) {
2918                         txq->ops->release_mbufs(txq);
2919                         txq->ops->reset(txq);
2920                 }
2921         }
2922
2923         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2924                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2925
2926                 if (rxq != NULL) {
2927                         ixgbe_rx_queue_release_mbufs(rxq);
2928                         ixgbe_reset_rx_queue(adapter, rxq);
2929                 }
2930         }
2931 }
2932
2933 void
2934 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2935 {
2936         unsigned i;
2937
2938         PMD_INIT_FUNC_TRACE();
2939
2940         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2941                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2942                 dev->data->rx_queues[i] = NULL;
2943         }
2944         dev->data->nb_rx_queues = 0;
2945
2946         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2947                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2948                 dev->data->tx_queues[i] = NULL;
2949         }
2950         dev->data->nb_tx_queues = 0;
2951 }
2952
2953 /*********************************************************************
2954  *
2955  *  Device RX/TX init functions
2956  *
2957  **********************************************************************/
2958
2959 /**
2960  * Receive Side Scaling (RSS)
2961  * See section 7.1.2.8 in the following document:
2962  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2963  *
2964  * Principles:
2965  * The source and destination IP addresses of the IP header and the source
2966  * and destination ports of TCP/UDP headers, if any, of received packets are
2967  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2968  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2969  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2970  * RSS output index which is used as the RX queue index where to store the
2971  * received packets.
2972  * The following output is supplied in the RX write-back descriptor:
2973  *     - 32-bit result of the Microsoft RSS hash function,
2974  *     - 4-bit RSS type field.
2975  */
2976
2977 /*
2978  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2979  * Used as the default key.
2980  */
2981 static uint8_t rss_intel_key[40] = {
2982         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2983         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2984         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2985         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2986         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2987 };
2988
2989 static void
2990 ixgbe_rss_disable(struct rte_eth_dev *dev)
2991 {
2992         struct ixgbe_hw *hw;
2993         uint32_t mrqc;
2994         uint32_t mrqc_reg;
2995
2996         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2997         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2998         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2999         mrqc &= ~IXGBE_MRQC_RSSEN;
3000         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3001 }
3002
3003 static void
3004 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3005 {
3006         uint8_t  *hash_key;
3007         uint32_t mrqc;
3008         uint32_t rss_key;
3009         uint64_t rss_hf;
3010         uint16_t i;
3011         uint32_t mrqc_reg;
3012         uint32_t rssrk_reg;
3013
3014         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3015         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3016
3017         hash_key = rss_conf->rss_key;
3018         if (hash_key != NULL) {
3019                 /* Fill in RSS hash key */
3020                 for (i = 0; i < 10; i++) {
3021                         rss_key  = hash_key[(i * 4)];
3022                         rss_key |= hash_key[(i * 4) + 1] << 8;
3023                         rss_key |= hash_key[(i * 4) + 2] << 16;
3024                         rss_key |= hash_key[(i * 4) + 3] << 24;
3025                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3026                 }
3027         }
3028
3029         /* Set configured hashing protocols in MRQC register */
3030         rss_hf = rss_conf->rss_hf;
3031         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3032         if (rss_hf & ETH_RSS_IPV4)
3033                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3034         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3035                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3036         if (rss_hf & ETH_RSS_IPV6)
3037                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3038         if (rss_hf & ETH_RSS_IPV6_EX)
3039                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3040         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3041                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3042         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3043                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3044         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3045                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3046         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3047                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3048         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3049                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3050         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3051 }
3052
3053 int
3054 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3055                           struct rte_eth_rss_conf *rss_conf)
3056 {
3057         struct ixgbe_hw *hw;
3058         uint32_t mrqc;
3059         uint64_t rss_hf;
3060         uint32_t mrqc_reg;
3061
3062         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3063
3064         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3065                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3066                         "NIC.");
3067                 return -ENOTSUP;
3068         }
3069         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3070
3071         /*
3072          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3073          *     "RSS enabling cannot be done dynamically while it must be
3074          *      preceded by a software reset"
3075          * Before changing anything, first check that the update RSS operation
3076          * does not attempt to disable RSS, if RSS was enabled at
3077          * initialization time, or does not attempt to enable RSS, if RSS was
3078          * disabled at initialization time.
3079          */
3080         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3081         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3082         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3083                 if (rss_hf != 0) /* Enable RSS */
3084                         return -(EINVAL);
3085                 return 0; /* Nothing to do */
3086         }
3087         /* RSS enabled */
3088         if (rss_hf == 0) /* Disable RSS */
3089                 return -(EINVAL);
3090         ixgbe_hw_rss_hash_set(hw, rss_conf);
3091         return 0;
3092 }
3093
3094 int
3095 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3096                             struct rte_eth_rss_conf *rss_conf)
3097 {
3098         struct ixgbe_hw *hw;
3099         uint8_t *hash_key;
3100         uint32_t mrqc;
3101         uint32_t rss_key;
3102         uint64_t rss_hf;
3103         uint16_t i;
3104         uint32_t mrqc_reg;
3105         uint32_t rssrk_reg;
3106
3107         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3108         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3109         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3110         hash_key = rss_conf->rss_key;
3111         if (hash_key != NULL) {
3112                 /* Return RSS hash key */
3113                 for (i = 0; i < 10; i++) {
3114                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3115                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3116                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3117                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3118                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3119                 }
3120         }
3121
3122         /* Get RSS functions configured in MRQC register */
3123         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3124         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3125                 rss_conf->rss_hf = 0;
3126                 return 0;
3127         }
3128         rss_hf = 0;
3129         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3130                 rss_hf |= ETH_RSS_IPV4;
3131         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3132                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3133         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3134                 rss_hf |= ETH_RSS_IPV6;
3135         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3136                 rss_hf |= ETH_RSS_IPV6_EX;
3137         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3138                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3139         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3140                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3141         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3142                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3143         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3144                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3145         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3146                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3147         rss_conf->rss_hf = rss_hf;
3148         return 0;
3149 }
3150
3151 static void
3152 ixgbe_rss_configure(struct rte_eth_dev *dev)
3153 {
3154         struct rte_eth_rss_conf rss_conf;
3155         struct ixgbe_hw *hw;
3156         uint32_t reta;
3157         uint16_t i;
3158         uint16_t j;
3159         uint16_t sp_reta_size;
3160         uint32_t reta_reg;
3161
3162         PMD_INIT_FUNC_TRACE();
3163         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3164
3165         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3166
3167         /*
3168          * Fill in redirection table
3169          * The byte-swap is needed because NIC registers are in
3170          * little-endian order.
3171          */
3172         reta = 0;
3173         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3174                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3175
3176                 if (j == dev->data->nb_rx_queues)
3177                         j = 0;
3178                 reta = (reta << 8) | j;
3179                 if ((i & 3) == 3)
3180                         IXGBE_WRITE_REG(hw, reta_reg,
3181                                         rte_bswap32(reta));
3182         }
3183
3184         /*
3185          * Configure the RSS key and the RSS protocols used to compute
3186          * the RSS hash of input packets.
3187          */
3188         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3189         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3190                 ixgbe_rss_disable(dev);
3191                 return;
3192         }
3193         if (rss_conf.rss_key == NULL)
3194                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3195         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3196 }
3197
3198 #define NUM_VFTA_REGISTERS 128
3199 #define NIC_RX_BUFFER_SIZE 0x200
3200 #define X550_RX_BUFFER_SIZE 0x180
3201
3202 static void
3203 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3204 {
3205         struct rte_eth_vmdq_dcb_conf *cfg;
3206         struct ixgbe_hw *hw;
3207         enum rte_eth_nb_pools num_pools;
3208         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3209         uint16_t pbsize;
3210         uint8_t nb_tcs; /* number of traffic classes */
3211         int i;
3212
3213         PMD_INIT_FUNC_TRACE();
3214         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3215         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3216         num_pools = cfg->nb_queue_pools;
3217         /* Check we have a valid number of pools */
3218         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3219                 ixgbe_rss_disable(dev);
3220                 return;
3221         }
3222         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3223         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3224
3225         /*
3226          * RXPBSIZE
3227          * split rx buffer up into sections, each for 1 traffic class
3228          */
3229         switch (hw->mac.type) {
3230         case ixgbe_mac_X550:
3231         case ixgbe_mac_X550EM_x:
3232         case ixgbe_mac_X550EM_a:
3233                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3234                 break;
3235         default:
3236                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3237                 break;
3238         }
3239         for (i = 0; i < nb_tcs; i++) {
3240                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3241
3242                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3243                 /* clear 10 bits. */
3244                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3245                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3246         }
3247         /* zero alloc all unused TCs */
3248         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3249                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3250
3251                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3252                 /* clear 10 bits. */
3253                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3254         }
3255
3256         /* MRQC: enable vmdq and dcb */
3257         mrqc = (num_pools == ETH_16_POOLS) ?
3258                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3259         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3260
3261         /* PFVTCTL: turn on virtualisation and set the default pool */
3262         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3263         if (cfg->enable_default_pool) {
3264                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3265         } else {
3266                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3267         }
3268
3269         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3270
3271         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3272         queue_mapping = 0;
3273         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3274                 /*
3275                  * mapping is done with 3 bits per priority,
3276                  * so shift by i*3 each time
3277                  */
3278                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3279
3280         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3281
3282         /* RTRPCS: DCB related */
3283         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3284
3285         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3286         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3287         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3288         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3289
3290         /* VFTA - enable all vlan filters */
3291         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3292                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3293         }
3294
3295         /* VFRE: pool enabling for receive - 16 or 32 */
3296         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3297                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3298
3299         /*
3300          * MPSAR - allow pools to read specific mac addresses
3301          * In this case, all pools should be able to read from mac addr 0
3302          */
3303         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3304         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3305
3306         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3307         for (i = 0; i < cfg->nb_pool_maps; i++) {
3308                 /* set vlan id in VF register and set the valid bit */
3309                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3310                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3311                 /*
3312                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3313                  * pools, we only need to use the first half of the register
3314                  * i.e. bits 0-31
3315                  */
3316                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3317         }
3318 }
3319
3320 /**
3321  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3322  * @dev: pointer to eth_dev structure
3323  * @dcb_config: pointer to ixgbe_dcb_config structure
3324  */
3325 static void
3326 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3327                        struct ixgbe_dcb_config *dcb_config)
3328 {
3329         uint32_t reg;
3330         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3331
3332         PMD_INIT_FUNC_TRACE();
3333         if (hw->mac.type != ixgbe_mac_82598EB) {
3334                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3335                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3336                 reg |= IXGBE_RTTDCS_ARBDIS;
3337                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3338
3339                 /* Enable DCB for Tx with 8 TCs */
3340                 if (dcb_config->num_tcs.pg_tcs == 8) {
3341                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3342                 } else {
3343                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3344                 }
3345                 if (dcb_config->vt_mode)
3346                         reg |= IXGBE_MTQC_VT_ENA;
3347                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3348
3349                 /* Enable the Tx desc arbiter */
3350                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3351                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3352                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3353
3354                 /* Enable Security TX Buffer IFG for DCB */
3355                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3356                 reg |= IXGBE_SECTX_DCB;
3357                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3358         }
3359 }
3360
3361 /**
3362  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3363  * @dev: pointer to rte_eth_dev structure
3364  * @dcb_config: pointer to ixgbe_dcb_config structure
3365  */
3366 static void
3367 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3368                         struct ixgbe_dcb_config *dcb_config)
3369 {
3370         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3371                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3372         struct ixgbe_hw *hw =
3373                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3374
3375         PMD_INIT_FUNC_TRACE();
3376         if (hw->mac.type != ixgbe_mac_82598EB)
3377                 /*PF VF Transmit Enable*/
3378                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3379                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3380
3381         /*Configure general DCB TX parameters*/
3382         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3383 }
3384
3385 static void
3386 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3387                         struct ixgbe_dcb_config *dcb_config)
3388 {
3389         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3390                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3391         struct ixgbe_dcb_tc_config *tc;
3392         uint8_t i, j;
3393
3394         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3395         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3396                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3397                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3398         } else {
3399                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3400                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3401         }
3402
3403         /* Initialize User Priority to Traffic Class mapping */
3404         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3405                 tc = &dcb_config->tc_config[j];
3406                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3407         }
3408
3409         /* User Priority to Traffic Class mapping */
3410         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3411                 j = vmdq_rx_conf->dcb_tc[i];
3412                 tc = &dcb_config->tc_config[j];
3413                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3414                                                 (uint8_t)(1 << i);
3415         }
3416 }
3417
3418 static void
3419 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3420                         struct ixgbe_dcb_config *dcb_config)
3421 {
3422         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3423                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3424         struct ixgbe_dcb_tc_config *tc;
3425         uint8_t i, j;
3426
3427         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3428         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3429                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3430                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3431         } else {
3432                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3433                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3434         }
3435
3436         /* Initialize User Priority to Traffic Class mapping */
3437         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3438                 tc = &dcb_config->tc_config[j];
3439                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3440         }
3441
3442         /* User Priority to Traffic Class mapping */
3443         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3444                 j = vmdq_tx_conf->dcb_tc[i];
3445                 tc = &dcb_config->tc_config[j];
3446                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3447                                                 (uint8_t)(1 << i);
3448         }
3449 }
3450
3451 static void
3452 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3453                 struct ixgbe_dcb_config *dcb_config)
3454 {
3455         struct rte_eth_dcb_rx_conf *rx_conf =
3456                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3457         struct ixgbe_dcb_tc_config *tc;
3458         uint8_t i, j;
3459
3460         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3461         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3462
3463         /* Initialize User Priority to Traffic Class mapping */
3464         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3465                 tc = &dcb_config->tc_config[j];
3466                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3467         }
3468
3469         /* User Priority to Traffic Class mapping */
3470         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3471                 j = rx_conf->dcb_tc[i];
3472                 tc = &dcb_config->tc_config[j];
3473                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3474                                                 (uint8_t)(1 << i);
3475         }
3476 }
3477
3478 static void
3479 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3480                 struct ixgbe_dcb_config *dcb_config)
3481 {
3482         struct rte_eth_dcb_tx_conf *tx_conf =
3483                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3484         struct ixgbe_dcb_tc_config *tc;
3485         uint8_t i, j;
3486
3487         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3488         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3489
3490         /* Initialize User Priority to Traffic Class mapping */
3491         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3492                 tc = &dcb_config->tc_config[j];
3493                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3494         }
3495
3496         /* User Priority to Traffic Class mapping */
3497         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3498                 j = tx_conf->dcb_tc[i];
3499                 tc = &dcb_config->tc_config[j];
3500                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3501                                                 (uint8_t)(1 << i);
3502         }
3503 }
3504
3505 /**
3506  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3507  * @dev: pointer to eth_dev structure
3508  * @dcb_config: pointer to ixgbe_dcb_config structure
3509  */
3510 static void
3511 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3512                        struct ixgbe_dcb_config *dcb_config)
3513 {
3514         uint32_t reg;
3515         uint32_t vlanctrl;
3516         uint8_t i;
3517         uint32_t q;
3518         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3519
3520         PMD_INIT_FUNC_TRACE();
3521         /*
3522          * Disable the arbiter before changing parameters
3523          * (always enable recycle mode; WSP)
3524          */
3525         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3526         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3527
3528         if (hw->mac.type != ixgbe_mac_82598EB) {
3529                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3530                 if (dcb_config->num_tcs.pg_tcs == 4) {
3531                         if (dcb_config->vt_mode)
3532                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3533                                         IXGBE_MRQC_VMDQRT4TCEN;
3534                         else {
3535                                 /* no matter the mode is DCB or DCB_RSS, just
3536                                  * set the MRQE to RSSXTCEN. RSS is controlled
3537                                  * by RSS_FIELD
3538                                  */
3539                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3540                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3541                                         IXGBE_MRQC_RTRSS4TCEN;
3542                         }
3543                 }
3544                 if (dcb_config->num_tcs.pg_tcs == 8) {
3545                         if (dcb_config->vt_mode)
3546                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3547                                         IXGBE_MRQC_VMDQRT8TCEN;
3548                         else {
3549                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3550                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3551                                         IXGBE_MRQC_RTRSS8TCEN;
3552                         }
3553                 }
3554
3555                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3556
3557                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3558                         /* Disable drop for all queues in VMDQ mode*/
3559                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3560                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3561                                                 (IXGBE_QDE_WRITE |
3562                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3563                 } else {
3564                         /* Enable drop for all queues in SRIOV mode */
3565                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3566                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3567                                                 (IXGBE_QDE_WRITE |
3568                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3569                                                  IXGBE_QDE_ENABLE));
3570                 }
3571         }
3572
3573         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3574         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3575         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3576         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3577
3578         /* VFTA - enable all vlan filters */
3579         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3580                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3581         }
3582
3583         /*
3584          * Configure Rx packet plane (recycle mode; WSP) and
3585          * enable arbiter
3586          */
3587         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3588         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3589 }
3590
3591 static void
3592 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3593                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3594 {
3595         switch (hw->mac.type) {
3596         case ixgbe_mac_82598EB:
3597                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3598                 break;
3599         case ixgbe_mac_82599EB:
3600         case ixgbe_mac_X540:
3601         case ixgbe_mac_X550:
3602         case ixgbe_mac_X550EM_x:
3603         case ixgbe_mac_X550EM_a:
3604                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3605                                                   tsa, map);
3606                 break;
3607         default:
3608                 break;
3609         }
3610 }
3611
3612 static void
3613 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3614                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3615 {
3616         switch (hw->mac.type) {
3617         case ixgbe_mac_82598EB:
3618                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3619                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3620                 break;
3621         case ixgbe_mac_82599EB:
3622         case ixgbe_mac_X540:
3623         case ixgbe_mac_X550:
3624         case ixgbe_mac_X550EM_x:
3625         case ixgbe_mac_X550EM_a:
3626                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3627                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3628                 break;
3629         default:
3630                 break;
3631         }
3632 }
3633
3634 #define DCB_RX_CONFIG  1
3635 #define DCB_TX_CONFIG  1
3636 #define DCB_TX_PB      1024
3637 /**
3638  * ixgbe_dcb_hw_configure - Enable DCB and configure
3639  * general DCB in VT mode and non-VT mode parameters
3640  * @dev: pointer to rte_eth_dev structure
3641  * @dcb_config: pointer to ixgbe_dcb_config structure
3642  */
3643 static int
3644 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3645                         struct ixgbe_dcb_config *dcb_config)
3646 {
3647         int     ret = 0;
3648         uint8_t i, pfc_en, nb_tcs;
3649         uint16_t pbsize, rx_buffer_size;
3650         uint8_t config_dcb_rx = 0;
3651         uint8_t config_dcb_tx = 0;
3652         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3653         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3654         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3655         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3656         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3657         struct ixgbe_dcb_tc_config *tc;
3658         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3659         struct ixgbe_hw *hw =
3660                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3661
3662         switch (dev->data->dev_conf.rxmode.mq_mode) {
3663         case ETH_MQ_RX_VMDQ_DCB:
3664                 dcb_config->vt_mode = true;
3665                 if (hw->mac.type != ixgbe_mac_82598EB) {
3666                         config_dcb_rx = DCB_RX_CONFIG;
3667                         /*
3668                          *get dcb and VT rx configuration parameters
3669                          *from rte_eth_conf
3670                          */
3671                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3672                         /*Configure general VMDQ and DCB RX parameters*/
3673                         ixgbe_vmdq_dcb_configure(dev);
3674                 }
3675                 break;
3676         case ETH_MQ_RX_DCB:
3677         case ETH_MQ_RX_DCB_RSS:
3678                 dcb_config->vt_mode = false;
3679                 config_dcb_rx = DCB_RX_CONFIG;
3680                 /* Get dcb TX configuration parameters from rte_eth_conf */
3681                 ixgbe_dcb_rx_config(dev, dcb_config);
3682                 /*Configure general DCB RX parameters*/
3683                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3684                 break;
3685         default:
3686                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3687                 break;
3688         }
3689         switch (dev->data->dev_conf.txmode.mq_mode) {
3690         case ETH_MQ_TX_VMDQ_DCB:
3691                 dcb_config->vt_mode = true;
3692                 config_dcb_tx = DCB_TX_CONFIG;
3693                 /* get DCB and VT TX configuration parameters
3694                  * from rte_eth_conf
3695                  */
3696                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3697                 /*Configure general VMDQ and DCB TX parameters*/
3698                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3699                 break;
3700
3701         case ETH_MQ_TX_DCB:
3702                 dcb_config->vt_mode = false;
3703                 config_dcb_tx = DCB_TX_CONFIG;
3704                 /*get DCB TX configuration parameters from rte_eth_conf*/
3705                 ixgbe_dcb_tx_config(dev, dcb_config);
3706                 /*Configure general DCB TX parameters*/
3707                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3708                 break;
3709         default:
3710                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3711                 break;
3712         }
3713
3714         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3715         /* Unpack map */
3716         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3717         if (nb_tcs == ETH_4_TCS) {
3718                 /* Avoid un-configured priority mapping to TC0 */
3719                 uint8_t j = 4;
3720                 uint8_t mask = 0xFF;
3721
3722                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3723                         mask = (uint8_t)(mask & (~(1 << map[i])));
3724                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3725                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3726                                 map[j++] = i;
3727                         mask >>= 1;
3728                 }
3729                 /* Re-configure 4 TCs BW */
3730                 for (i = 0; i < nb_tcs; i++) {
3731                         tc = &dcb_config->tc_config[i];
3732                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3733                                                 (uint8_t)(100 / nb_tcs);
3734                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3735                                                 (uint8_t)(100 / nb_tcs);
3736                 }
3737                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3738                         tc = &dcb_config->tc_config[i];
3739                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3740                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3741                 }
3742         } else {
3743                 /* Re-configure 8 TCs BW */
3744                 for (i = 0; i < nb_tcs; i++) {
3745                         tc = &dcb_config->tc_config[i];
3746                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3747                                 (uint8_t)(100 / nb_tcs + (i & 1));
3748                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3749                                 (uint8_t)(100 / nb_tcs + (i & 1));
3750                 }
3751         }
3752
3753         switch (hw->mac.type) {
3754         case ixgbe_mac_X550:
3755         case ixgbe_mac_X550EM_x:
3756         case ixgbe_mac_X550EM_a:
3757                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3758                 break;
3759         default:
3760                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3761                 break;
3762         }
3763
3764         if (config_dcb_rx) {
3765                 /* Set RX buffer size */
3766                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3767                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3768
3769                 for (i = 0; i < nb_tcs; i++) {
3770                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3771                 }
3772                 /* zero alloc all unused TCs */
3773                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3774                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3775                 }
3776         }
3777         if (config_dcb_tx) {
3778                 /* Only support an equally distributed
3779                  *  Tx packet buffer strategy.
3780                  */
3781                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3782                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3783
3784                 for (i = 0; i < nb_tcs; i++) {
3785                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3786                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3787                 }
3788                 /* Clear unused TCs, if any, to zero buffer size*/
3789                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3790                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3791                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3792                 }
3793         }
3794
3795         /*Calculates traffic class credits*/
3796         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3797                                 IXGBE_DCB_TX_CONFIG);
3798         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3799                                 IXGBE_DCB_RX_CONFIG);
3800
3801         if (config_dcb_rx) {
3802                 /* Unpack CEE standard containers */
3803                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3804                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3805                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3806                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3807                 /* Configure PG(ETS) RX */
3808                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3809         }
3810
3811         if (config_dcb_tx) {
3812                 /* Unpack CEE standard containers */
3813                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3814                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3815                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3816                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3817                 /* Configure PG(ETS) TX */
3818                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3819         }
3820
3821         /*Configure queue statistics registers*/
3822         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3823
3824         /* Check if the PFC is supported */
3825         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3826                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3827                 for (i = 0; i < nb_tcs; i++) {
3828                         /*
3829                         * If the TC count is 8,and the default high_water is 48,
3830                         * the low_water is 16 as default.
3831                         */
3832                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3833                         hw->fc.low_water[i] = pbsize / 4;
3834                         /* Enable pfc for this TC */
3835                         tc = &dcb_config->tc_config[i];
3836                         tc->pfc = ixgbe_dcb_pfc_enabled;
3837                 }
3838                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3839                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3840                         pfc_en &= 0x0F;
3841                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3842         }
3843
3844         return ret;
3845 }
3846
3847 /**
3848  * ixgbe_configure_dcb - Configure DCB  Hardware
3849  * @dev: pointer to rte_eth_dev
3850  */
3851 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3852 {
3853         struct ixgbe_dcb_config *dcb_cfg =
3854                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3855         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3856
3857         PMD_INIT_FUNC_TRACE();
3858
3859         /* check support mq_mode for DCB */
3860         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3861             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3862             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3863                 return;
3864
3865         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3866                 return;
3867
3868         /** Configure DCB hardware **/
3869         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3870 }
3871
3872 /*
3873  * VMDq only support for 10 GbE NIC.
3874  */
3875 static void
3876 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3877 {
3878         struct rte_eth_vmdq_rx_conf *cfg;
3879         struct ixgbe_hw *hw;
3880         enum rte_eth_nb_pools num_pools;
3881         uint32_t mrqc, vt_ctl, vlanctrl;
3882         uint32_t vmolr = 0;
3883         int i;
3884
3885         PMD_INIT_FUNC_TRACE();
3886         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3887         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3888         num_pools = cfg->nb_queue_pools;
3889
3890         ixgbe_rss_disable(dev);
3891
3892         /* MRQC: enable vmdq */
3893         mrqc = IXGBE_MRQC_VMDQEN;
3894         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3895
3896         /* PFVTCTL: turn on virtualisation and set the default pool */
3897         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3898         if (cfg->enable_default_pool)
3899                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3900         else
3901                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3902
3903         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3904
3905         for (i = 0; i < (int)num_pools; i++) {
3906                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3907                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3908         }
3909
3910         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3911         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3912         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3913         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3914
3915         /* VFTA - enable all vlan filters */
3916         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3917                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3918
3919         /* VFRE: pool enabling for receive - 64 */
3920         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3921         if (num_pools == ETH_64_POOLS)
3922                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3923
3924         /*
3925          * MPSAR - allow pools to read specific mac addresses
3926          * In this case, all pools should be able to read from mac addr 0
3927          */
3928         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3929         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3930
3931         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3932         for (i = 0; i < cfg->nb_pool_maps; i++) {
3933                 /* set vlan id in VF register and set the valid bit */
3934                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3935                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3936                 /*
3937                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3938                  * pools, we only need to use the first half of the register
3939                  * i.e. bits 0-31
3940                  */
3941                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3942                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3943                                         (cfg->pool_map[i].pools & UINT32_MAX));
3944                 else
3945                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3946                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3947
3948         }
3949
3950         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3951         if (cfg->enable_loop_back) {
3952                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3953                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3954                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3955         }
3956
3957         IXGBE_WRITE_FLUSH(hw);
3958 }
3959
3960 /*
3961  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3962  * @hw: pointer to hardware structure
3963  */
3964 static void
3965 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3966 {
3967         uint32_t reg;
3968         uint32_t q;
3969
3970         PMD_INIT_FUNC_TRACE();
3971         /*PF VF Transmit Enable*/
3972         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3973         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3974
3975         /* Disable the Tx desc arbiter so that MTQC can be changed */
3976         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3977         reg |= IXGBE_RTTDCS_ARBDIS;
3978         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3979
3980         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3981         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3982
3983         /* Disable drop for all queues */
3984         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3985                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3986                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3987
3988         /* Enable the Tx desc arbiter */
3989         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3990         reg &= ~IXGBE_RTTDCS_ARBDIS;
3991         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3992
3993         IXGBE_WRITE_FLUSH(hw);
3994 }
3995
3996 static int __attribute__((cold))
3997 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3998 {
3999         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4000         uint64_t dma_addr;
4001         unsigned int i;
4002
4003         /* Initialize software ring entries */
4004         for (i = 0; i < rxq->nb_rx_desc; i++) {
4005                 volatile union ixgbe_adv_rx_desc *rxd;
4006                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4007
4008                 if (mbuf == NULL) {
4009                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4010                                      (unsigned) rxq->queue_id);
4011                         return -ENOMEM;
4012                 }
4013
4014                 rte_mbuf_refcnt_set(mbuf, 1);
4015                 mbuf->next = NULL;
4016                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4017                 mbuf->nb_segs = 1;
4018                 mbuf->port = rxq->port_id;
4019
4020                 dma_addr =
4021                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4022                 rxd = &rxq->rx_ring[i];
4023                 rxd->read.hdr_addr = 0;
4024                 rxd->read.pkt_addr = dma_addr;
4025                 rxe[i].mbuf = mbuf;
4026         }
4027
4028         return 0;
4029 }
4030
4031 static int
4032 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4033 {
4034         struct ixgbe_hw *hw;
4035         uint32_t mrqc;
4036
4037         ixgbe_rss_configure(dev);
4038
4039         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4040
4041         /* MRQC: enable VF RSS */
4042         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4043         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4044         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4045         case ETH_64_POOLS:
4046                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4047                 break;
4048
4049         case ETH_32_POOLS:
4050                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4051                 break;
4052
4053         default:
4054                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4055                 return -EINVAL;
4056         }
4057
4058         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4059
4060         return 0;
4061 }
4062
4063 static int
4064 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4065 {
4066         struct ixgbe_hw *hw =
4067                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4068
4069         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4070         case ETH_64_POOLS:
4071                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4072                         IXGBE_MRQC_VMDQEN);
4073                 break;
4074
4075         case ETH_32_POOLS:
4076                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4077                         IXGBE_MRQC_VMDQRT4TCEN);
4078                 break;
4079
4080         case ETH_16_POOLS:
4081                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4082                         IXGBE_MRQC_VMDQRT8TCEN);
4083                 break;
4084         default:
4085                 PMD_INIT_LOG(ERR,
4086                         "invalid pool number in IOV mode");
4087                 break;
4088         }
4089         return 0;
4090 }
4091
4092 static int
4093 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4094 {
4095         struct ixgbe_hw *hw =
4096                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4097
4098         if (hw->mac.type == ixgbe_mac_82598EB)
4099                 return 0;
4100
4101         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4102                 /*
4103                  * SRIOV inactive scheme
4104                  * any DCB/RSS w/o VMDq multi-queue setting
4105                  */
4106                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4107                 case ETH_MQ_RX_RSS:
4108                 case ETH_MQ_RX_DCB_RSS:
4109                 case ETH_MQ_RX_VMDQ_RSS:
4110                         ixgbe_rss_configure(dev);
4111                         break;
4112
4113                 case ETH_MQ_RX_VMDQ_DCB:
4114                         ixgbe_vmdq_dcb_configure(dev);
4115                         break;
4116
4117                 case ETH_MQ_RX_VMDQ_ONLY:
4118                         ixgbe_vmdq_rx_hw_configure(dev);
4119                         break;
4120
4121                 case ETH_MQ_RX_NONE:
4122                 default:
4123                         /* if mq_mode is none, disable rss mode.*/
4124                         ixgbe_rss_disable(dev);
4125                         break;
4126                 }
4127         } else {
4128                 /* SRIOV active scheme
4129                  * Support RSS together with SRIOV.
4130                  */
4131                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4132                 case ETH_MQ_RX_RSS:
4133                 case ETH_MQ_RX_VMDQ_RSS:
4134                         ixgbe_config_vf_rss(dev);
4135                         break;
4136                 case ETH_MQ_RX_VMDQ_DCB:
4137                 case ETH_MQ_RX_DCB:
4138                 /* In SRIOV, the configuration is the same as VMDq case */
4139                         ixgbe_vmdq_dcb_configure(dev);
4140                         break;
4141                 /* DCB/RSS together with SRIOV is not supported */
4142                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4143                 case ETH_MQ_RX_DCB_RSS:
4144                         PMD_INIT_LOG(ERR,
4145                                 "Could not support DCB/RSS with VMDq & SRIOV");
4146                         return -1;
4147                 default:
4148                         ixgbe_config_vf_default(dev);
4149                         break;
4150                 }
4151         }
4152
4153         return 0;
4154 }
4155
4156 static int
4157 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4158 {
4159         struct ixgbe_hw *hw =
4160                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4161         uint32_t mtqc;
4162         uint32_t rttdcs;
4163
4164         if (hw->mac.type == ixgbe_mac_82598EB)
4165                 return 0;
4166
4167         /* disable arbiter before setting MTQC */
4168         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4169         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4170         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4171
4172         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4173                 /*
4174                  * SRIOV inactive scheme
4175                  * any DCB w/o VMDq multi-queue setting
4176                  */
4177                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4178                         ixgbe_vmdq_tx_hw_configure(hw);
4179                 else {
4180                         mtqc = IXGBE_MTQC_64Q_1PB;
4181                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4182                 }
4183         } else {
4184                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4185
4186                 /*
4187                  * SRIOV active scheme
4188                  * FIXME if support DCB together with VMDq & SRIOV
4189                  */
4190                 case ETH_64_POOLS:
4191                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4192                         break;
4193                 case ETH_32_POOLS:
4194                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4195                         break;
4196                 case ETH_16_POOLS:
4197                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4198                                 IXGBE_MTQC_8TC_8TQ;
4199                         break;
4200                 default:
4201                         mtqc = IXGBE_MTQC_64Q_1PB;
4202                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4203                 }
4204                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4205         }
4206
4207         /* re-enable arbiter */
4208         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4209         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4210
4211         return 0;
4212 }
4213
4214 /**
4215  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4216  *
4217  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4218  * spec rev. 3.0 chapter 8.2.3.8.13.
4219  *
4220  * @pool Memory pool of the Rx queue
4221  */
4222 static inline uint32_t
4223 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4224 {
4225         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4226
4227         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4228         uint16_t maxdesc =
4229                 IPV4_MAX_PKT_LEN /
4230                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4231
4232         if (maxdesc >= 16)
4233                 return IXGBE_RSCCTL_MAXDESC_16;
4234         else if (maxdesc >= 8)
4235                 return IXGBE_RSCCTL_MAXDESC_8;
4236         else if (maxdesc >= 4)
4237                 return IXGBE_RSCCTL_MAXDESC_4;
4238         else
4239                 return IXGBE_RSCCTL_MAXDESC_1;
4240 }
4241
4242 /**
4243  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4244  * interrupt
4245  *
4246  * (Taken from FreeBSD tree)
4247  * (yes this is all very magic and confusing :)
4248  *
4249  * @dev port handle
4250  * @entry the register array entry
4251  * @vector the MSIX vector for this queue
4252  * @type RX/TX/MISC
4253  */
4254 static void
4255 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4256 {
4257         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4258         u32 ivar, index;
4259
4260         vector |= IXGBE_IVAR_ALLOC_VAL;
4261
4262         switch (hw->mac.type) {
4263
4264         case ixgbe_mac_82598EB:
4265                 if (type == -1)
4266                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4267                 else
4268                         entry += (type * 64);
4269                 index = (entry >> 2) & 0x1F;
4270                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4271                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4272                 ivar |= (vector << (8 * (entry & 0x3)));
4273                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4274                 break;
4275
4276         case ixgbe_mac_82599EB:
4277         case ixgbe_mac_X540:
4278                 if (type == -1) { /* MISC IVAR */
4279                         index = (entry & 1) * 8;
4280                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4281                         ivar &= ~(0xFF << index);
4282                         ivar |= (vector << index);
4283                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4284                 } else {        /* RX/TX IVARS */
4285                         index = (16 * (entry & 1)) + (8 * type);
4286                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4287                         ivar &= ~(0xFF << index);
4288                         ivar |= (vector << index);
4289                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4290                 }
4291
4292                 break;
4293
4294         default:
4295                 break;
4296         }
4297 }
4298
4299 void __attribute__((cold))
4300 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4301 {
4302         uint16_t i, rx_using_sse;
4303         struct ixgbe_adapter *adapter =
4304                 (struct ixgbe_adapter *)dev->data->dev_private;
4305
4306         /*
4307          * In order to allow Vector Rx there are a few configuration
4308          * conditions to be met and Rx Bulk Allocation should be allowed.
4309          */
4310         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4311             !adapter->rx_bulk_alloc_allowed) {
4312                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4313                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4314                                     "not enabled",
4315                              dev->data->port_id);
4316
4317                 adapter->rx_vec_allowed = false;
4318         }
4319
4320         /*
4321          * Initialize the appropriate LRO callback.
4322          *
4323          * If all queues satisfy the bulk allocation preconditions
4324          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4325          * Otherwise use a single allocation version.
4326          */
4327         if (dev->data->lro) {
4328                 if (adapter->rx_bulk_alloc_allowed) {
4329                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4330                                            "allocation version");
4331                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4332                 } else {
4333                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4334                                            "allocation version");
4335                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4336                 }
4337         } else if (dev->data->scattered_rx) {
4338                 /*
4339                  * Set the non-LRO scattered callback: there are Vector and
4340                  * single allocation versions.
4341                  */
4342                 if (adapter->rx_vec_allowed) {
4343                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4344                                             "callback (port=%d).",
4345                                      dev->data->port_id);
4346
4347                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4348                 } else if (adapter->rx_bulk_alloc_allowed) {
4349                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4350                                            "allocation callback (port=%d).",
4351                                      dev->data->port_id);
4352                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4353                 } else {
4354                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4355                                             "single allocation) "
4356                                             "Scattered Rx callback "
4357                                             "(port=%d).",
4358                                      dev->data->port_id);
4359
4360                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4361                 }
4362         /*
4363          * Below we set "simple" callbacks according to port/queues parameters.
4364          * If parameters allow we are going to choose between the following
4365          * callbacks:
4366          *    - Vector
4367          *    - Bulk Allocation
4368          *    - Single buffer allocation (the simplest one)
4369          */
4370         } else if (adapter->rx_vec_allowed) {
4371                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4372                                     "burst size no less than %d (port=%d).",
4373                              RTE_IXGBE_DESCS_PER_LOOP,
4374                              dev->data->port_id);
4375
4376                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4377         } else if (adapter->rx_bulk_alloc_allowed) {
4378                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4379                                     "satisfied. Rx Burst Bulk Alloc function "
4380                                     "will be used on port=%d.",
4381                              dev->data->port_id);
4382
4383                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4384         } else {
4385                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4386                                     "satisfied, or Scattered Rx is requested "
4387                                     "(port=%d).",
4388                              dev->data->port_id);
4389
4390                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4391         }
4392
4393         /* Propagate information about RX function choice through all queues. */
4394
4395         rx_using_sse =
4396                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4397                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4398
4399         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4400                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4401
4402                 rxq->rx_using_sse = rx_using_sse;
4403         }
4404 }
4405
4406 /**
4407  * ixgbe_set_rsc - configure RSC related port HW registers
4408  *
4409  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4410  * of 82599 Spec (x540 configuration is virtually the same).
4411  *
4412  * @dev port handle
4413  *
4414  * Returns 0 in case of success or a non-zero error code
4415  */
4416 static int
4417 ixgbe_set_rsc(struct rte_eth_dev *dev)
4418 {
4419         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4420         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4421         struct rte_eth_dev_info dev_info = { 0 };
4422         bool rsc_capable = false;
4423         uint16_t i;
4424         uint32_t rdrxctl;
4425
4426         /* Sanity check */
4427         dev->dev_ops->dev_infos_get(dev, &dev_info);
4428         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4429                 rsc_capable = true;
4430
4431         if (!rsc_capable && rx_conf->enable_lro) {
4432                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4433                                    "support it");
4434                 return -EINVAL;
4435         }
4436
4437         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4438
4439         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4440                 /*
4441                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4442                  * 3.0 RSC configuration requires HW CRC stripping being
4443                  * enabled. If user requested both HW CRC stripping off
4444                  * and RSC on - return an error.
4445                  */
4446                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4447                                     "is disabled");
4448                 return -EINVAL;
4449         }
4450
4451         /* RFCTL configuration  */
4452         if (rsc_capable) {
4453                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4454
4455                 if (rx_conf->enable_lro)
4456                         /*
4457                          * Since NFS packets coalescing is not supported - clear
4458                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4459                          * enabled.
4460                          */
4461                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4462                                    IXGBE_RFCTL_NFSR_DIS);
4463                 else
4464                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4465
4466                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4467         }
4468
4469         /* If LRO hasn't been requested - we are done here. */
4470         if (!rx_conf->enable_lro)
4471                 return 0;
4472
4473         /* Set RDRXCTL.RSCACKC bit */
4474         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4475         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4476         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4477
4478         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4479         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4480                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4481                 uint32_t srrctl =
4482                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4483                 uint32_t rscctl =
4484                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4485                 uint32_t psrtype =
4486                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4487                 uint32_t eitr =
4488                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4489
4490                 /*
4491                  * ixgbe PMD doesn't support header-split at the moment.
4492                  *
4493                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4494                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4495                  * should be configured even if header split is not
4496                  * enabled. We will configure it 128 bytes following the
4497                  * recommendation in the spec.
4498                  */
4499                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4500                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4501                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4502
4503                 /*
4504                  * TODO: Consider setting the Receive Descriptor Minimum
4505                  * Threshold Size for an RSC case. This is not an obviously
4506                  * beneficiary option but the one worth considering...
4507                  */
4508
4509                 rscctl |= IXGBE_RSCCTL_RSCEN;
4510                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4511                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4512
4513                 /*
4514                  * RSC: Set ITR interval corresponding to 2K ints/s.
4515                  *
4516                  * Full-sized RSC aggregations for a 10Gb/s link will
4517                  * arrive at about 20K aggregation/s rate.
4518                  *
4519                  * 2K inst/s rate will make only 10% of the
4520                  * aggregations to be closed due to the interrupt timer
4521                  * expiration for a streaming at wire-speed case.
4522                  *
4523                  * For a sparse streaming case this setting will yield
4524                  * at most 500us latency for a single RSC aggregation.
4525                  */
4526                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4527                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4528
4529                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4530                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4531                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4532                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4533
4534                 /*
4535                  * RSC requires the mapping of the queue to the
4536                  * interrupt vector.
4537                  */
4538                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4539         }
4540
4541         dev->data->lro = 1;
4542
4543         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4544
4545         return 0;
4546 }
4547
4548 /*
4549  * Initializes Receive Unit.
4550  */
4551 int __attribute__((cold))
4552 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4553 {
4554         struct ixgbe_hw     *hw;
4555         struct ixgbe_rx_queue *rxq;
4556         uint64_t bus_addr;
4557         uint32_t rxctrl;
4558         uint32_t fctrl;
4559         uint32_t hlreg0;
4560         uint32_t maxfrs;
4561         uint32_t srrctl;
4562         uint32_t rdrxctl;
4563         uint32_t rxcsum;
4564         uint16_t buf_size;
4565         uint16_t i;
4566         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4567         int rc;
4568
4569         PMD_INIT_FUNC_TRACE();
4570         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4571
4572         /*
4573          * Make sure receives are disabled while setting
4574          * up the RX context (registers, descriptor rings, etc.).
4575          */
4576         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4577         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4578
4579         /* Enable receipt of broadcasted frames */
4580         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4581         fctrl |= IXGBE_FCTRL_BAM;
4582         fctrl |= IXGBE_FCTRL_DPF;
4583         fctrl |= IXGBE_FCTRL_PMCF;
4584         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4585
4586         /*
4587          * Configure CRC stripping, if any.
4588          */
4589         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4590         if (rx_conf->hw_strip_crc)
4591                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4592         else
4593                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4594
4595         /*
4596          * Configure jumbo frame support, if any.
4597          */
4598         if (rx_conf->jumbo_frame == 1) {
4599                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4600                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4601                 maxfrs &= 0x0000FFFF;
4602                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4603                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4604         } else
4605                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4606
4607         /*
4608          * If loopback mode is configured for 82599, set LPBK bit.
4609          */
4610         if (hw->mac.type == ixgbe_mac_82599EB &&
4611                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4612                 hlreg0 |= IXGBE_HLREG0_LPBK;
4613         else
4614                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4615
4616         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4617
4618         /* Setup RX queues */
4619         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4620                 rxq = dev->data->rx_queues[i];
4621
4622                 /*
4623                  * Reset crc_len in case it was changed after queue setup by a
4624                  * call to configure.
4625                  */
4626                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4627
4628                 /* Setup the Base and Length of the Rx Descriptor Rings */
4629                 bus_addr = rxq->rx_ring_phys_addr;
4630                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4631                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4632                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4633                                 (uint32_t)(bus_addr >> 32));
4634                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4635                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4636                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4637                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4638
4639                 /* Configure the SRRCTL register */
4640 #ifdef RTE_HEADER_SPLIT_ENABLE
4641                 /*
4642                  * Configure Header Split
4643                  */
4644                 if (rx_conf->header_split) {
4645                         if (hw->mac.type == ixgbe_mac_82599EB) {
4646                                 /* Must setup the PSRTYPE register */
4647                                 uint32_t psrtype;
4648
4649                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4650                                         IXGBE_PSRTYPE_UDPHDR   |
4651                                         IXGBE_PSRTYPE_IPV4HDR  |
4652                                         IXGBE_PSRTYPE_IPV6HDR;
4653                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4654                         }
4655                         srrctl = ((rx_conf->split_hdr_size <<
4656                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4657                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4658                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4659                 } else
4660 #endif
4661                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4662
4663                 /* Set if packets are dropped when no descriptors available */
4664                 if (rxq->drop_en)
4665                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4666
4667                 /*
4668                  * Configure the RX buffer size in the BSIZEPACKET field of
4669                  * the SRRCTL register of the queue.
4670                  * The value is in 1 KB resolution. Valid values can be from
4671                  * 1 KB to 16 KB.
4672                  */
4673                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4674                         RTE_PKTMBUF_HEADROOM);
4675                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4676                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4677
4678                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4679
4680                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4681                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4682
4683                 /* It adds dual VLAN length for supporting dual VLAN */
4684                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4685                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4686                         dev->data->scattered_rx = 1;
4687         }
4688
4689         if (rx_conf->enable_scatter)
4690                 dev->data->scattered_rx = 1;
4691
4692         /*
4693          * Device configured with multiple RX queues.
4694          */
4695         ixgbe_dev_mq_rx_configure(dev);
4696
4697         /*
4698          * Setup the Checksum Register.
4699          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4700          * Enable IP/L4 checkum computation by hardware if requested to do so.
4701          */
4702         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4703         rxcsum |= IXGBE_RXCSUM_PCSD;
4704         if (rx_conf->hw_ip_checksum)
4705                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4706         else
4707                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4708
4709         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4710
4711         if (hw->mac.type == ixgbe_mac_82599EB ||
4712             hw->mac.type == ixgbe_mac_X540) {
4713                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4714                 if (rx_conf->hw_strip_crc)
4715                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4716                 else
4717                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4718                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4719                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4720         }
4721
4722         rc = ixgbe_set_rsc(dev);
4723         if (rc)
4724                 return rc;
4725
4726         ixgbe_set_rx_function(dev);
4727
4728         return 0;
4729 }
4730
4731 /*
4732  * Initializes Transmit Unit.
4733  */
4734 void __attribute__((cold))
4735 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4736 {
4737         struct ixgbe_hw     *hw;
4738         struct ixgbe_tx_queue *txq;
4739         uint64_t bus_addr;
4740         uint32_t hlreg0;
4741         uint32_t txctrl;
4742         uint16_t i;
4743
4744         PMD_INIT_FUNC_TRACE();
4745         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4746
4747         /* Enable TX CRC (checksum offload requirement) and hw padding
4748          * (TSO requirement)
4749          */
4750         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4751         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4752         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4753
4754         /* Setup the Base and Length of the Tx Descriptor Rings */
4755         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4756                 txq = dev->data->tx_queues[i];
4757
4758                 bus_addr = txq->tx_ring_phys_addr;
4759                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4760                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4761                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4762                                 (uint32_t)(bus_addr >> 32));
4763                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4764                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4765                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4766                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4767                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4768
4769                 /*
4770                  * Disable Tx Head Writeback RO bit, since this hoses
4771                  * bookkeeping if things aren't delivered in order.
4772                  */
4773                 switch (hw->mac.type) {
4774                 case ixgbe_mac_82598EB:
4775                         txctrl = IXGBE_READ_REG(hw,
4776                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4777                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4778                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4779                                         txctrl);
4780                         break;
4781
4782                 case ixgbe_mac_82599EB:
4783                 case ixgbe_mac_X540:
4784                 case ixgbe_mac_X550:
4785                 case ixgbe_mac_X550EM_x:
4786                 case ixgbe_mac_X550EM_a:
4787                 default:
4788                         txctrl = IXGBE_READ_REG(hw,
4789                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4790                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4791                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4792                                         txctrl);
4793                         break;
4794                 }
4795         }
4796
4797         /* Device configured with multiple TX queues. */
4798         ixgbe_dev_mq_tx_configure(dev);
4799 }
4800
4801 /*
4802  * Set up link for 82599 loopback mode Tx->Rx.
4803  */
4804 static inline void __attribute__((cold))
4805 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4806 {
4807         PMD_INIT_FUNC_TRACE();
4808
4809         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4810                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4811                                 IXGBE_SUCCESS) {
4812                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4813                         /* ignore error */
4814                         return;
4815                 }
4816         }
4817
4818         /* Restart link */
4819         IXGBE_WRITE_REG(hw,
4820                         IXGBE_AUTOC,
4821                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4822         ixgbe_reset_pipeline_82599(hw);
4823
4824         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4825         msec_delay(50);
4826 }
4827
4828
4829 /*
4830  * Start Transmit and Receive Units.
4831  */
4832 int __attribute__((cold))
4833 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4834 {
4835         struct ixgbe_hw     *hw;
4836         struct ixgbe_tx_queue *txq;
4837         struct ixgbe_rx_queue *rxq;
4838         uint32_t txdctl;
4839         uint32_t dmatxctl;
4840         uint32_t rxctrl;
4841         uint16_t i;
4842         int ret = 0;
4843
4844         PMD_INIT_FUNC_TRACE();
4845         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4846
4847         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4848                 txq = dev->data->tx_queues[i];
4849                 /* Setup Transmit Threshold Registers */
4850                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4851                 txdctl |= txq->pthresh & 0x7F;
4852                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4853                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4854                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4855         }
4856
4857         if (hw->mac.type != ixgbe_mac_82598EB) {
4858                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4859                 dmatxctl |= IXGBE_DMATXCTL_TE;
4860                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4861         }
4862
4863         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4864                 txq = dev->data->tx_queues[i];
4865                 if (!txq->tx_deferred_start) {
4866                         ret = ixgbe_dev_tx_queue_start(dev, i);
4867                         if (ret < 0)
4868                                 return ret;
4869                 }
4870         }
4871
4872         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4873                 rxq = dev->data->rx_queues[i];
4874                 if (!rxq->rx_deferred_start) {
4875                         ret = ixgbe_dev_rx_queue_start(dev, i);
4876                         if (ret < 0)
4877                                 return ret;
4878                 }
4879         }
4880
4881         /* Enable Receive engine */
4882         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4883         if (hw->mac.type == ixgbe_mac_82598EB)
4884                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4885         rxctrl |= IXGBE_RXCTRL_RXEN;
4886         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4887
4888         /* If loopback mode is enabled for 82599, set up the link accordingly */
4889         if (hw->mac.type == ixgbe_mac_82599EB &&
4890                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4891                 ixgbe_setup_loopback_link_82599(hw);
4892
4893         return 0;
4894 }
4895
4896 /*
4897  * Start Receive Units for specified queue.
4898  */
4899 int __attribute__((cold))
4900 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4901 {
4902         struct ixgbe_hw     *hw;
4903         struct ixgbe_rx_queue *rxq;
4904         uint32_t rxdctl;
4905         int poll_ms;
4906
4907         PMD_INIT_FUNC_TRACE();
4908         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4909
4910         if (rx_queue_id < dev->data->nb_rx_queues) {
4911                 rxq = dev->data->rx_queues[rx_queue_id];
4912
4913                 /* Allocate buffers for descriptor rings */
4914                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4915                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4916                                      rx_queue_id);
4917                         return -1;
4918                 }
4919                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4920                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4921                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4922
4923                 /* Wait until RX Enable ready */
4924                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4925                 do {
4926                         rte_delay_ms(1);
4927                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4928                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4929                 if (!poll_ms)
4930                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4931                                      rx_queue_id);
4932                 rte_wmb();
4933                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4934                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4935                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4936         } else
4937                 return -1;
4938
4939         return 0;
4940 }
4941
4942 /*
4943  * Stop Receive Units for specified queue.
4944  */
4945 int __attribute__((cold))
4946 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4947 {
4948         struct ixgbe_hw     *hw;
4949         struct ixgbe_adapter *adapter =
4950                 (struct ixgbe_adapter *)dev->data->dev_private;
4951         struct ixgbe_rx_queue *rxq;
4952         uint32_t rxdctl;
4953         int poll_ms;
4954
4955         PMD_INIT_FUNC_TRACE();
4956         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4957
4958         if (rx_queue_id < dev->data->nb_rx_queues) {
4959                 rxq = dev->data->rx_queues[rx_queue_id];
4960
4961                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4962                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4963                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4964
4965                 /* Wait until RX Enable bit clear */
4966                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4967                 do {
4968                         rte_delay_ms(1);
4969                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4970                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4971                 if (!poll_ms)
4972                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4973                                      rx_queue_id);
4974
4975                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4976
4977                 ixgbe_rx_queue_release_mbufs(rxq);
4978                 ixgbe_reset_rx_queue(adapter, rxq);
4979                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4980         } else
4981                 return -1;
4982
4983         return 0;
4984 }
4985
4986
4987 /*
4988  * Start Transmit Units for specified queue.
4989  */
4990 int __attribute__((cold))
4991 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4992 {
4993         struct ixgbe_hw     *hw;
4994         struct ixgbe_tx_queue *txq;
4995         uint32_t txdctl;
4996         int poll_ms;
4997
4998         PMD_INIT_FUNC_TRACE();
4999         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5000
5001         if (tx_queue_id < dev->data->nb_tx_queues) {
5002                 txq = dev->data->tx_queues[tx_queue_id];
5003                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5004                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5005                 txdctl |= IXGBE_TXDCTL_ENABLE;
5006                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5007
5008                 /* Wait until TX Enable ready */
5009                 if (hw->mac.type == ixgbe_mac_82599EB) {
5010                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5011                         do {
5012                                 rte_delay_ms(1);
5013                                 txdctl = IXGBE_READ_REG(hw,
5014                                         IXGBE_TXDCTL(txq->reg_idx));
5015                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5016                         if (!poll_ms)
5017                                 PMD_INIT_LOG(ERR, "Could not enable "
5018                                              "Tx Queue %d", tx_queue_id);
5019                 }
5020                 rte_wmb();
5021                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5022                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5023         } else
5024                 return -1;
5025
5026         return 0;
5027 }
5028
5029 /*
5030  * Stop Transmit Units for specified queue.
5031  */
5032 int __attribute__((cold))
5033 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5034 {
5035         struct ixgbe_hw     *hw;
5036         struct ixgbe_tx_queue *txq;
5037         uint32_t txdctl;
5038         uint32_t txtdh, txtdt;
5039         int poll_ms;
5040
5041         PMD_INIT_FUNC_TRACE();
5042         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5043
5044         if (tx_queue_id >= dev->data->nb_tx_queues)
5045                 return -1;
5046
5047         txq = dev->data->tx_queues[tx_queue_id];
5048
5049         /* Wait until TX queue is empty */
5050         if (hw->mac.type == ixgbe_mac_82599EB) {
5051                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5052                 do {
5053                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5054                         txtdh = IXGBE_READ_REG(hw,
5055                                                IXGBE_TDH(txq->reg_idx));
5056                         txtdt = IXGBE_READ_REG(hw,
5057                                                IXGBE_TDT(txq->reg_idx));
5058                 } while (--poll_ms && (txtdh != txtdt));
5059                 if (!poll_ms)
5060                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5061                                      "when stopping.", tx_queue_id);
5062         }
5063
5064         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5065         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5066         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5067
5068         /* Wait until TX Enable bit clear */
5069         if (hw->mac.type == ixgbe_mac_82599EB) {
5070                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5071                 do {
5072                         rte_delay_ms(1);
5073                         txdctl = IXGBE_READ_REG(hw,
5074                                                 IXGBE_TXDCTL(txq->reg_idx));
5075                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5076                 if (!poll_ms)
5077                         PMD_INIT_LOG(ERR, "Could not disable "
5078                                      "Tx Queue %d", tx_queue_id);
5079         }
5080
5081         if (txq->ops != NULL) {
5082                 txq->ops->release_mbufs(txq);
5083                 txq->ops->reset(txq);
5084         }
5085         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5086
5087         return 0;
5088 }
5089
5090 void
5091 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5092         struct rte_eth_rxq_info *qinfo)
5093 {
5094         struct ixgbe_rx_queue *rxq;
5095
5096         rxq = dev->data->rx_queues[queue_id];
5097
5098         qinfo->mp = rxq->mb_pool;
5099         qinfo->scattered_rx = dev->data->scattered_rx;
5100         qinfo->nb_desc = rxq->nb_rx_desc;
5101
5102         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5103         qinfo->conf.rx_drop_en = rxq->drop_en;
5104         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5105 }
5106
5107 void
5108 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5109         struct rte_eth_txq_info *qinfo)
5110 {
5111         struct ixgbe_tx_queue *txq;
5112
5113         txq = dev->data->tx_queues[queue_id];
5114
5115         qinfo->nb_desc = txq->nb_tx_desc;
5116
5117         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5118         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5119         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5120
5121         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5122         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5123         qinfo->conf.txq_flags = txq->txq_flags;
5124         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5125 }
5126
5127 /*
5128  * [VF] Initializes Receive Unit.
5129  */
5130 int __attribute__((cold))
5131 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5132 {
5133         struct ixgbe_hw     *hw;
5134         struct ixgbe_rx_queue *rxq;
5135         uint64_t bus_addr;
5136         uint32_t srrctl, psrtype = 0;
5137         uint16_t buf_size;
5138         uint16_t i;
5139         int ret;
5140
5141         PMD_INIT_FUNC_TRACE();
5142         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5143
5144         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5145                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5146                         "it should be power of 2");
5147                 return -1;
5148         }
5149
5150         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5151                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5152                         "it should be equal to or less than %d",
5153                         hw->mac.max_rx_queues);
5154                 return -1;
5155         }
5156
5157         /*
5158          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5159          * disables the VF receipt of packets if the PF MTU is > 1500.
5160          * This is done to deal with 82599 limitations that imposes
5161          * the PF and all VFs to share the same MTU.
5162          * Then, the PF driver enables again the VF receipt of packet when
5163          * the VF driver issues a IXGBE_VF_SET_LPE request.
5164          * In the meantime, the VF device cannot be used, even if the VF driver
5165          * and the Guest VM network stack are ready to accept packets with a
5166          * size up to the PF MTU.
5167          * As a work-around to this PF behaviour, force the call to
5168          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5169          * VF packets received can work in all cases.
5170          */
5171         ixgbevf_rlpml_set_vf(hw,
5172                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5173
5174         /* Setup RX queues */
5175         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5176                 rxq = dev->data->rx_queues[i];
5177
5178                 /* Allocate buffers for descriptor rings */
5179                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5180                 if (ret)
5181                         return ret;
5182
5183                 /* Setup the Base and Length of the Rx Descriptor Rings */
5184                 bus_addr = rxq->rx_ring_phys_addr;
5185
5186                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5187                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5188                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5189                                 (uint32_t)(bus_addr >> 32));
5190                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5191                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5192                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5193                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5194
5195
5196                 /* Configure the SRRCTL register */
5197 #ifdef RTE_HEADER_SPLIT_ENABLE
5198                 /*
5199                  * Configure Header Split
5200                  */
5201                 if (dev->data->dev_conf.rxmode.header_split) {
5202                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5203                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5204                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5205                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5206                 } else
5207 #endif
5208                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5209
5210                 /* Set if packets are dropped when no descriptors available */
5211                 if (rxq->drop_en)
5212                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5213
5214                 /*
5215                  * Configure the RX buffer size in the BSIZEPACKET field of
5216                  * the SRRCTL register of the queue.
5217                  * The value is in 1 KB resolution. Valid values can be from
5218                  * 1 KB to 16 KB.
5219                  */
5220                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5221                         RTE_PKTMBUF_HEADROOM);
5222                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5223                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5224
5225                 /*
5226                  * VF modification to write virtual function SRRCTL register
5227                  */
5228                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5229
5230                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5231                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5232
5233                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5234                     /* It adds dual VLAN length for supporting dual VLAN */
5235                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5236                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5237                         if (!dev->data->scattered_rx)
5238                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5239                         dev->data->scattered_rx = 1;
5240                 }
5241         }
5242
5243 #ifdef RTE_HEADER_SPLIT_ENABLE
5244         if (dev->data->dev_conf.rxmode.header_split)
5245                 /* Must setup the PSRTYPE register */
5246                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5247                         IXGBE_PSRTYPE_UDPHDR   |
5248                         IXGBE_PSRTYPE_IPV4HDR  |
5249                         IXGBE_PSRTYPE_IPV6HDR;
5250 #endif
5251
5252         /* Set RQPL for VF RSS according to max Rx queue */
5253         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5254                 IXGBE_PSRTYPE_RQPL_SHIFT;
5255         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5256
5257         ixgbe_set_rx_function(dev);
5258
5259         return 0;
5260 }
5261
5262 /*
5263  * [VF] Initializes Transmit Unit.
5264  */
5265 void __attribute__((cold))
5266 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5267 {
5268         struct ixgbe_hw     *hw;
5269         struct ixgbe_tx_queue *txq;
5270         uint64_t bus_addr;
5271         uint32_t txctrl;
5272         uint16_t i;
5273
5274         PMD_INIT_FUNC_TRACE();
5275         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5276
5277         /* Setup the Base and Length of the Tx Descriptor Rings */
5278         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5279                 txq = dev->data->tx_queues[i];
5280                 bus_addr = txq->tx_ring_phys_addr;
5281                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5282                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5283                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5284                                 (uint32_t)(bus_addr >> 32));
5285                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5286                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5287                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5288                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5289                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5290
5291                 /*
5292                  * Disable Tx Head Writeback RO bit, since this hoses
5293                  * bookkeeping if things aren't delivered in order.
5294                  */
5295                 txctrl = IXGBE_READ_REG(hw,
5296                                 IXGBE_VFDCA_TXCTRL(i));
5297                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5298                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5299                                 txctrl);
5300         }
5301 }
5302
5303 /*
5304  * [VF] Start Transmit and Receive Units.
5305  */
5306 void __attribute__((cold))
5307 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5308 {
5309         struct ixgbe_hw     *hw;
5310         struct ixgbe_tx_queue *txq;
5311         struct ixgbe_rx_queue *rxq;
5312         uint32_t txdctl;
5313         uint32_t rxdctl;
5314         uint16_t i;
5315         int poll_ms;
5316
5317         PMD_INIT_FUNC_TRACE();
5318         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5319
5320         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5321                 txq = dev->data->tx_queues[i];
5322                 /* Setup Transmit Threshold Registers */
5323                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5324                 txdctl |= txq->pthresh & 0x7F;
5325                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5326                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5327                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5328         }
5329
5330         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5331
5332                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5333                 txdctl |= IXGBE_TXDCTL_ENABLE;
5334                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5335
5336                 poll_ms = 10;
5337                 /* Wait until TX Enable ready */
5338                 do {
5339                         rte_delay_ms(1);
5340                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5341                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5342                 if (!poll_ms)
5343                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5344         }
5345         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5346
5347                 rxq = dev->data->rx_queues[i];
5348
5349                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5350                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5351                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5352
5353                 /* Wait until RX Enable ready */
5354                 poll_ms = 10;
5355                 do {
5356                         rte_delay_ms(1);
5357                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5358                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5359                 if (!poll_ms)
5360                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5361                 rte_wmb();
5362                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5363
5364         }
5365 }
5366
5367 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5368 int __attribute__((weak))
5369 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5370 {
5371         return -1;
5372 }
5373
5374 uint16_t __attribute__((weak))
5375 ixgbe_recv_pkts_vec(
5376         void __rte_unused *rx_queue,
5377         struct rte_mbuf __rte_unused **rx_pkts,
5378         uint16_t __rte_unused nb_pkts)
5379 {
5380         return 0;
5381 }
5382
5383 uint16_t __attribute__((weak))
5384 ixgbe_recv_scattered_pkts_vec(
5385         void __rte_unused *rx_queue,
5386         struct rte_mbuf __rte_unused **rx_pkts,
5387         uint16_t __rte_unused nb_pkts)
5388 {
5389         return 0;
5390 }
5391
5392 int __attribute__((weak))
5393 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5394 {
5395         return -1;
5396 }