New upstream version 17.11.5
[deb_dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
85 #else
86 #define IXGBE_TX_IEEE1588_TMST 0
87 #endif
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK (                  \
90                 PKT_TX_OUTER_IPV6 |              \
91                 PKT_TX_OUTER_IPV4 |              \
92                 PKT_TX_IPV6 |                    \
93                 PKT_TX_IPV4 |                    \
94                 PKT_TX_VLAN_PKT |                \
95                 PKT_TX_IP_CKSUM |                \
96                 PKT_TX_L4_MASK |                 \
97                 PKT_TX_TCP_SEG |                 \
98                 PKT_TX_MACSEC |                  \
99                 PKT_TX_OUTER_IP_CKSUM |          \
100                 PKT_TX_SEC_OFFLOAD |     \
101                 IXGBE_TX_IEEE1588_TMST)
102
103 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
104                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
105
106 #if 1
107 #define RTE_PMD_USE_PREFETCH
108 #endif
109
110 #ifdef RTE_PMD_USE_PREFETCH
111 /*
112  * Prefetch a cache line into all cache levels.
113  */
114 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
115 #else
116 #define rte_ixgbe_prefetch(p)   do {} while (0)
117 #endif
118
119 #ifdef RTE_IXGBE_INC_VECTOR
120 uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
121                                     uint16_t nb_pkts);
122 #endif
123
124 /*********************************************************************
125  *
126  *  TX functions
127  *
128  **********************************************************************/
129
130 /*
131  * Check for descriptors with their DD bit set and free mbufs.
132  * Return the total number of buffers freed.
133  */
134 static __rte_always_inline int
135 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
136 {
137         struct ixgbe_tx_entry *txep;
138         uint32_t status;
139         int i, nb_free = 0;
140         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
141
142         /* check DD bit on threshold descriptor */
143         status = txq->tx_ring[txq->tx_next_dd].wb.status;
144         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
145                 return 0;
146
147         /*
148          * first buffer to free from S/W ring is at index
149          * tx_next_dd - (tx_rs_thresh-1)
150          */
151         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
152
153         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
154                 /* free buffers one at a time */
155                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
156                 txep->mbuf = NULL;
157
158                 if (unlikely(m == NULL))
159                         continue;
160
161                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
162                     (nb_free > 0 && m->pool != free[0]->pool)) {
163                         rte_mempool_put_bulk(free[0]->pool,
164                                              (void **)free, nb_free);
165                         nb_free = 0;
166                 }
167
168                 free[nb_free++] = m;
169         }
170
171         if (nb_free > 0)
172                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
173
174         /* buffers were freed, update counters */
175         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
176         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
177         if (txq->tx_next_dd >= txq->nb_tx_desc)
178                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
179
180         return txq->tx_rs_thresh;
181 }
182
183 /* Populate 4 descriptors with data from 4 mbufs */
184 static inline void
185 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
186 {
187         uint64_t buf_dma_addr;
188         uint32_t pkt_len;
189         int i;
190
191         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
192                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
193                 pkt_len = (*pkts)->data_len;
194
195                 /* write data to descriptor */
196                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
197
198                 txdp->read.cmd_type_len =
199                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
200
201                 txdp->read.olinfo_status =
202                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
203
204                 rte_prefetch0(&(*pkts)->pool);
205         }
206 }
207
208 /* Populate 1 descriptor with data from 1 mbuf */
209 static inline void
210 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
211 {
212         uint64_t buf_dma_addr;
213         uint32_t pkt_len;
214
215         buf_dma_addr = rte_mbuf_data_iova(*pkts);
216         pkt_len = (*pkts)->data_len;
217
218         /* write data to descriptor */
219         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
220         txdp->read.cmd_type_len =
221                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
222         txdp->read.olinfo_status =
223                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
224         rte_prefetch0(&(*pkts)->pool);
225 }
226
227 /*
228  * Fill H/W descriptor ring with mbuf data.
229  * Copy mbuf pointers to the S/W ring.
230  */
231 static inline void
232 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
233                       uint16_t nb_pkts)
234 {
235         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
236         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
237         const int N_PER_LOOP = 4;
238         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
239         int mainpart, leftover;
240         int i, j;
241
242         /*
243          * Process most of the packets in chunks of N pkts.  Any
244          * leftover packets will get processed one at a time.
245          */
246         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
247         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
248         for (i = 0; i < mainpart; i += N_PER_LOOP) {
249                 /* Copy N mbuf pointers to the S/W ring */
250                 for (j = 0; j < N_PER_LOOP; ++j) {
251                         (txep + i + j)->mbuf = *(pkts + i + j);
252                 }
253                 tx4(txdp + i, pkts + i);
254         }
255
256         if (unlikely(leftover > 0)) {
257                 for (i = 0; i < leftover; ++i) {
258                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
259                         tx1(txdp + mainpart + i, pkts + mainpart + i);
260                 }
261         }
262 }
263
264 static inline uint16_t
265 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
266              uint16_t nb_pkts)
267 {
268         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
269         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
270         uint16_t n = 0;
271
272         /*
273          * Begin scanning the H/W ring for done descriptors when the
274          * number of available descriptors drops below tx_free_thresh.  For
275          * each done descriptor, free the associated buffer.
276          */
277         if (txq->nb_tx_free < txq->tx_free_thresh)
278                 ixgbe_tx_free_bufs(txq);
279
280         /* Only use descriptors that are available */
281         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
282         if (unlikely(nb_pkts == 0))
283                 return 0;
284
285         /* Use exactly nb_pkts descriptors */
286         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
287
288         /*
289          * At this point, we know there are enough descriptors in the
290          * ring to transmit all the packets.  This assumes that each
291          * mbuf contains a single segment, and that no new offloads
292          * are expected, which would require a new context descriptor.
293          */
294
295         /*
296          * See if we're going to wrap-around. If so, handle the top
297          * of the descriptor ring first, then do the bottom.  If not,
298          * the processing looks just like the "bottom" part anyway...
299          */
300         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
301                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
302                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
303
304                 /*
305                  * We know that the last descriptor in the ring will need to
306                  * have its RS bit set because tx_rs_thresh has to be
307                  * a divisor of the ring size
308                  */
309                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
310                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
311                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
312
313                 txq->tx_tail = 0;
314         }
315
316         /* Fill H/W descriptor ring with mbuf data */
317         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
318         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
319
320         /*
321          * Determine if RS bit should be set
322          * This is what we actually want:
323          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
324          * but instead of subtracting 1 and doing >=, we can just do
325          * greater than without subtracting.
326          */
327         if (txq->tx_tail > txq->tx_next_rs) {
328                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
329                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
330                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
331                                                 txq->tx_rs_thresh);
332                 if (txq->tx_next_rs >= txq->nb_tx_desc)
333                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
334         }
335
336         /*
337          * Check for wrap-around. This would only happen if we used
338          * up to the last descriptor in the ring, no more, no less.
339          */
340         if (txq->tx_tail >= txq->nb_tx_desc)
341                 txq->tx_tail = 0;
342
343         /* update tail pointer */
344         rte_wmb();
345         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
346
347         return nb_pkts;
348 }
349
350 uint16_t
351 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
352                        uint16_t nb_pkts)
353 {
354         uint16_t nb_tx;
355
356         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
357         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
358                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
359
360         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
361         nb_tx = 0;
362         while (nb_pkts) {
363                 uint16_t ret, n;
364
365                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
366                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
367                 nb_tx = (uint16_t)(nb_tx + ret);
368                 nb_pkts = (uint16_t)(nb_pkts - ret);
369                 if (ret < n)
370                         break;
371         }
372
373         return nb_tx;
374 }
375
376 #ifdef RTE_IXGBE_INC_VECTOR
377 static uint16_t
378 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
379                     uint16_t nb_pkts)
380 {
381         uint16_t nb_tx = 0;
382         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
383
384         while (nb_pkts) {
385                 uint16_t ret, num;
386
387                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
388                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
389                                                  num);
390                 nb_tx += ret;
391                 nb_pkts -= ret;
392                 if (ret < num)
393                         break;
394         }
395
396         return nb_tx;
397 }
398 #endif
399
400 static inline void
401 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
402                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
403                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
404                 __rte_unused uint64_t *mdata)
405 {
406         uint32_t type_tucmd_mlhl;
407         uint32_t mss_l4len_idx = 0;
408         uint32_t ctx_idx;
409         uint32_t vlan_macip_lens;
410         union ixgbe_tx_offload tx_offload_mask;
411         uint32_t seqnum_seed = 0;
412
413         ctx_idx = txq->ctx_curr;
414         tx_offload_mask.data[0] = 0;
415         tx_offload_mask.data[1] = 0;
416         type_tucmd_mlhl = 0;
417
418         /* Specify which HW CTX to upload. */
419         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
420
421         if (ol_flags & PKT_TX_VLAN_PKT) {
422                 tx_offload_mask.vlan_tci |= ~0;
423         }
424
425         /* check if TCP segmentation required for this packet */
426         if (ol_flags & PKT_TX_TCP_SEG) {
427                 /* implies IP cksum in IPv4 */
428                 if (ol_flags & PKT_TX_IP_CKSUM)
429                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
430                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
431                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
432                 else
433                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
434                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
435                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
436
437                 tx_offload_mask.l2_len |= ~0;
438                 tx_offload_mask.l3_len |= ~0;
439                 tx_offload_mask.l4_len |= ~0;
440                 tx_offload_mask.tso_segsz |= ~0;
441                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
442                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
443         } else { /* no TSO, check if hardware checksum is needed */
444                 if (ol_flags & PKT_TX_IP_CKSUM) {
445                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
446                         tx_offload_mask.l2_len |= ~0;
447                         tx_offload_mask.l3_len |= ~0;
448                 }
449
450                 switch (ol_flags & PKT_TX_L4_MASK) {
451                 case PKT_TX_UDP_CKSUM:
452                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
453                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
454                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
455                         tx_offload_mask.l2_len |= ~0;
456                         tx_offload_mask.l3_len |= ~0;
457                         break;
458                 case PKT_TX_TCP_CKSUM:
459                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
460                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
461                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
462                         tx_offload_mask.l2_len |= ~0;
463                         tx_offload_mask.l3_len |= ~0;
464                         break;
465                 case PKT_TX_SCTP_CKSUM:
466                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
467                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
468                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
469                         tx_offload_mask.l2_len |= ~0;
470                         tx_offload_mask.l3_len |= ~0;
471                         break;
472                 default:
473                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
474                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
475                         break;
476                 }
477         }
478
479         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
480                 tx_offload_mask.outer_l2_len |= ~0;
481                 tx_offload_mask.outer_l3_len |= ~0;
482                 tx_offload_mask.l2_len |= ~0;
483                 seqnum_seed |= tx_offload.outer_l3_len
484                                << IXGBE_ADVTXD_OUTER_IPLEN;
485                 seqnum_seed |= tx_offload.l2_len
486                                << IXGBE_ADVTXD_TUNNEL_LEN;
487         }
488 #ifdef RTE_LIBRTE_SECURITY
489         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
490                 union ixgbe_crypto_tx_desc_md *md =
491                                 (union ixgbe_crypto_tx_desc_md *)mdata;
492                 seqnum_seed |=
493                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
494                 type_tucmd_mlhl |= md->enc ?
495                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
496                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
497                 type_tucmd_mlhl |=
498                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
499                 tx_offload_mask.sa_idx |= ~0;
500                 tx_offload_mask.sec_pad_len |= ~0;
501         }
502 #endif
503
504         txq->ctx_cache[ctx_idx].flags = ol_flags;
505         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
506                 tx_offload_mask.data[0] & tx_offload.data[0];
507         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
508                 tx_offload_mask.data[1] & tx_offload.data[1];
509         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
510
511         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
512         vlan_macip_lens = tx_offload.l3_len;
513         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
514                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
515                                     IXGBE_ADVTXD_MACLEN_SHIFT);
516         else
517                 vlan_macip_lens |= (tx_offload.l2_len <<
518                                     IXGBE_ADVTXD_MACLEN_SHIFT);
519         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
520         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
521         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
522         ctx_txd->seqnum_seed     = seqnum_seed;
523 }
524
525 /*
526  * Check which hardware context can be used. Use the existing match
527  * or create a new context descriptor.
528  */
529 static inline uint32_t
530 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
531                    union ixgbe_tx_offload tx_offload)
532 {
533         /* If match with the current used context */
534         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
535                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
536                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
537                      & tx_offload.data[0])) &&
538                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
539                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
540                      & tx_offload.data[1]))))
541                 return txq->ctx_curr;
542
543         /* What if match with the next context  */
544         txq->ctx_curr ^= 1;
545         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
546                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
547                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
548                      & tx_offload.data[0])) &&
549                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
550                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
551                      & tx_offload.data[1]))))
552                 return txq->ctx_curr;
553
554         /* Mismatch, use the previous context */
555         return IXGBE_CTX_NUM;
556 }
557
558 static inline uint32_t
559 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
560 {
561         uint32_t tmp = 0;
562
563         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
564                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
565         if (ol_flags & PKT_TX_IP_CKSUM)
566                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
567         if (ol_flags & PKT_TX_TCP_SEG)
568                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
569         return tmp;
570 }
571
572 static inline uint32_t
573 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
574 {
575         uint32_t cmdtype = 0;
576
577         if (ol_flags & PKT_TX_VLAN_PKT)
578                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
579         if (ol_flags & PKT_TX_TCP_SEG)
580                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
581         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
582                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
583         if (ol_flags & PKT_TX_MACSEC)
584                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
585         return cmdtype;
586 }
587
588 /* Default RS bit threshold values */
589 #ifndef DEFAULT_TX_RS_THRESH
590 #define DEFAULT_TX_RS_THRESH   32
591 #endif
592 #ifndef DEFAULT_TX_FREE_THRESH
593 #define DEFAULT_TX_FREE_THRESH 32
594 #endif
595
596 /* Reset transmit descriptors after they have been used */
597 static inline int
598 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
599 {
600         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
601         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
602         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
603         uint16_t nb_tx_desc = txq->nb_tx_desc;
604         uint16_t desc_to_clean_to;
605         uint16_t nb_tx_to_clean;
606         uint32_t status;
607
608         /* Determine the last descriptor needing to be cleaned */
609         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
610         if (desc_to_clean_to >= nb_tx_desc)
611                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
612
613         /* Check to make sure the last descriptor to clean is done */
614         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
615         status = txr[desc_to_clean_to].wb.status;
616         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
617                 PMD_TX_FREE_LOG(DEBUG,
618                                 "TX descriptor %4u is not done"
619                                 "(port=%d queue=%d)",
620                                 desc_to_clean_to,
621                                 txq->port_id, txq->queue_id);
622                 /* Failed to clean any descriptors, better luck next time */
623                 return -(1);
624         }
625
626         /* Figure out how many descriptors will be cleaned */
627         if (last_desc_cleaned > desc_to_clean_to)
628                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
629                                                         desc_to_clean_to);
630         else
631                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
632                                                 last_desc_cleaned);
633
634         PMD_TX_FREE_LOG(DEBUG,
635                         "Cleaning %4u TX descriptors: %4u to %4u "
636                         "(port=%d queue=%d)",
637                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
638                         txq->port_id, txq->queue_id);
639
640         /*
641          * The last descriptor to clean is done, so that means all the
642          * descriptors from the last descriptor that was cleaned
643          * up to the last descriptor with the RS bit set
644          * are done. Only reset the threshold descriptor.
645          */
646         txr[desc_to_clean_to].wb.status = 0;
647
648         /* Update the txq to reflect the last descriptor that was cleaned */
649         txq->last_desc_cleaned = desc_to_clean_to;
650         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
651
652         /* No Error */
653         return 0;
654 }
655
656 uint16_t
657 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
658                 uint16_t nb_pkts)
659 {
660         struct ixgbe_tx_queue *txq;
661         struct ixgbe_tx_entry *sw_ring;
662         struct ixgbe_tx_entry *txe, *txn;
663         volatile union ixgbe_adv_tx_desc *txr;
664         volatile union ixgbe_adv_tx_desc *txd, *txp;
665         struct rte_mbuf     *tx_pkt;
666         struct rte_mbuf     *m_seg;
667         uint64_t buf_dma_addr;
668         uint32_t olinfo_status;
669         uint32_t cmd_type_len;
670         uint32_t pkt_len;
671         uint16_t slen;
672         uint64_t ol_flags;
673         uint16_t tx_id;
674         uint16_t tx_last;
675         uint16_t nb_tx;
676         uint16_t nb_used;
677         uint64_t tx_ol_req;
678         uint32_t ctx = 0;
679         uint32_t new_ctx;
680         union ixgbe_tx_offload tx_offload;
681 #ifdef RTE_LIBRTE_SECURITY
682         uint8_t use_ipsec;
683 #endif
684
685         tx_offload.data[0] = 0;
686         tx_offload.data[1] = 0;
687         txq = tx_queue;
688         sw_ring = txq->sw_ring;
689         txr     = txq->tx_ring;
690         tx_id   = txq->tx_tail;
691         txe = &sw_ring[tx_id];
692         txp = NULL;
693
694         /* Determine if the descriptor ring needs to be cleaned. */
695         if (txq->nb_tx_free < txq->tx_free_thresh)
696                 ixgbe_xmit_cleanup(txq);
697
698         rte_prefetch0(&txe->mbuf->pool);
699
700         /* TX loop */
701         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
702                 new_ctx = 0;
703                 tx_pkt = *tx_pkts++;
704                 pkt_len = tx_pkt->pkt_len;
705
706                 /*
707                  * Determine how many (if any) context descriptors
708                  * are needed for offload functionality.
709                  */
710                 ol_flags = tx_pkt->ol_flags;
711 #ifdef RTE_LIBRTE_SECURITY
712                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
713 #endif
714
715                 /* If hardware offload required */
716                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
717                 if (tx_ol_req) {
718                         tx_offload.l2_len = tx_pkt->l2_len;
719                         tx_offload.l3_len = tx_pkt->l3_len;
720                         tx_offload.l4_len = tx_pkt->l4_len;
721                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
722                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
723                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
724                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
725 #ifdef RTE_LIBRTE_SECURITY
726                         if (use_ipsec) {
727                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
728                                         (union ixgbe_crypto_tx_desc_md *)
729                                                         &tx_pkt->udata64;
730                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
731                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
732                         }
733 #endif
734
735                         /* If new context need be built or reuse the exist ctx. */
736                         ctx = what_advctx_update(txq, tx_ol_req,
737                                 tx_offload);
738                         /* Only allocate context descriptor if required*/
739                         new_ctx = (ctx == IXGBE_CTX_NUM);
740                         ctx = txq->ctx_curr;
741                 }
742
743                 /*
744                  * Keep track of how many descriptors are used this loop
745                  * This will always be the number of segments + the number of
746                  * Context descriptors required to transmit the packet
747                  */
748                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
749
750                 if (txp != NULL &&
751                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
752                         /* set RS on the previous packet in the burst */
753                         txp->read.cmd_type_len |=
754                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
755
756                 /*
757                  * The number of descriptors that must be allocated for a
758                  * packet is the number of segments of that packet, plus 1
759                  * Context Descriptor for the hardware offload, if any.
760                  * Determine the last TX descriptor to allocate in the TX ring
761                  * for the packet, starting from the current position (tx_id)
762                  * in the ring.
763                  */
764                 tx_last = (uint16_t) (tx_id + nb_used - 1);
765
766                 /* Circular ring */
767                 if (tx_last >= txq->nb_tx_desc)
768                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
769
770                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
771                            " tx_first=%u tx_last=%u",
772                            (unsigned) txq->port_id,
773                            (unsigned) txq->queue_id,
774                            (unsigned) pkt_len,
775                            (unsigned) tx_id,
776                            (unsigned) tx_last);
777
778                 /*
779                  * Make sure there are enough TX descriptors available to
780                  * transmit the entire packet.
781                  * nb_used better be less than or equal to txq->tx_rs_thresh
782                  */
783                 if (nb_used > txq->nb_tx_free) {
784                         PMD_TX_FREE_LOG(DEBUG,
785                                         "Not enough free TX descriptors "
786                                         "nb_used=%4u nb_free=%4u "
787                                         "(port=%d queue=%d)",
788                                         nb_used, txq->nb_tx_free,
789                                         txq->port_id, txq->queue_id);
790
791                         if (ixgbe_xmit_cleanup(txq) != 0) {
792                                 /* Could not clean any descriptors */
793                                 if (nb_tx == 0)
794                                         return 0;
795                                 goto end_of_tx;
796                         }
797
798                         /* nb_used better be <= txq->tx_rs_thresh */
799                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
800                                 PMD_TX_FREE_LOG(DEBUG,
801                                         "The number of descriptors needed to "
802                                         "transmit the packet exceeds the "
803                                         "RS bit threshold. This will impact "
804                                         "performance."
805                                         "nb_used=%4u nb_free=%4u "
806                                         "tx_rs_thresh=%4u. "
807                                         "(port=%d queue=%d)",
808                                         nb_used, txq->nb_tx_free,
809                                         txq->tx_rs_thresh,
810                                         txq->port_id, txq->queue_id);
811                                 /*
812                                  * Loop here until there are enough TX
813                                  * descriptors or until the ring cannot be
814                                  * cleaned.
815                                  */
816                                 while (nb_used > txq->nb_tx_free) {
817                                         if (ixgbe_xmit_cleanup(txq) != 0) {
818                                                 /*
819                                                  * Could not clean any
820                                                  * descriptors
821                                                  */
822                                                 if (nb_tx == 0)
823                                                         return 0;
824                                                 goto end_of_tx;
825                                         }
826                                 }
827                         }
828                 }
829
830                 /*
831                  * By now there are enough free TX descriptors to transmit
832                  * the packet.
833                  */
834
835                 /*
836                  * Set common flags of all TX Data Descriptors.
837                  *
838                  * The following bits must be set in all Data Descriptors:
839                  *   - IXGBE_ADVTXD_DTYP_DATA
840                  *   - IXGBE_ADVTXD_DCMD_DEXT
841                  *
842                  * The following bits must be set in the first Data Descriptor
843                  * and are ignored in the other ones:
844                  *   - IXGBE_ADVTXD_DCMD_IFCS
845                  *   - IXGBE_ADVTXD_MAC_1588
846                  *   - IXGBE_ADVTXD_DCMD_VLE
847                  *
848                  * The following bits must only be set in the last Data
849                  * Descriptor:
850                  *   - IXGBE_TXD_CMD_EOP
851                  *
852                  * The following bits can be set in any Data Descriptor, but
853                  * are only set in the last Data Descriptor:
854                  *   - IXGBE_TXD_CMD_RS
855                  */
856                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
857                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
858
859 #ifdef RTE_LIBRTE_IEEE1588
860                 if (ol_flags & PKT_TX_IEEE1588_TMST)
861                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
862 #endif
863
864                 olinfo_status = 0;
865                 if (tx_ol_req) {
866
867                         if (ol_flags & PKT_TX_TCP_SEG) {
868                                 /* when TSO is on, paylen in descriptor is the
869                                  * not the packet len but the tcp payload len */
870                                 pkt_len -= (tx_offload.l2_len +
871                                         tx_offload.l3_len + tx_offload.l4_len);
872                         }
873
874                         /*
875                          * Setup the TX Advanced Context Descriptor if required
876                          */
877                         if (new_ctx) {
878                                 volatile struct ixgbe_adv_tx_context_desc *
879                                     ctx_txd;
880
881                                 ctx_txd = (volatile struct
882                                     ixgbe_adv_tx_context_desc *)
883                                     &txr[tx_id];
884
885                                 txn = &sw_ring[txe->next_id];
886                                 rte_prefetch0(&txn->mbuf->pool);
887
888                                 if (txe->mbuf != NULL) {
889                                         rte_pktmbuf_free_seg(txe->mbuf);
890                                         txe->mbuf = NULL;
891                                 }
892
893                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
894                                         tx_offload, &tx_pkt->udata64);
895
896                                 txe->last_id = tx_last;
897                                 tx_id = txe->next_id;
898                                 txe = txn;
899                         }
900
901                         /*
902                          * Setup the TX Advanced Data Descriptor,
903                          * This path will go through
904                          * whatever new/reuse the context descriptor
905                          */
906                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
907                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
908                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
909                 }
910
911                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
912 #ifdef RTE_LIBRTE_SECURITY
913                 if (use_ipsec)
914                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
915 #endif
916
917                 m_seg = tx_pkt;
918                 do {
919                         txd = &txr[tx_id];
920                         txn = &sw_ring[txe->next_id];
921                         rte_prefetch0(&txn->mbuf->pool);
922
923                         if (txe->mbuf != NULL)
924                                 rte_pktmbuf_free_seg(txe->mbuf);
925                         txe->mbuf = m_seg;
926
927                         /*
928                          * Set up Transmit Data Descriptor.
929                          */
930                         slen = m_seg->data_len;
931                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
932                         txd->read.buffer_addr =
933                                 rte_cpu_to_le_64(buf_dma_addr);
934                         txd->read.cmd_type_len =
935                                 rte_cpu_to_le_32(cmd_type_len | slen);
936                         txd->read.olinfo_status =
937                                 rte_cpu_to_le_32(olinfo_status);
938                         txe->last_id = tx_last;
939                         tx_id = txe->next_id;
940                         txe = txn;
941                         m_seg = m_seg->next;
942                 } while (m_seg != NULL);
943
944                 /*
945                  * The last packet data descriptor needs End Of Packet (EOP)
946                  */
947                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
948                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
949                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
950
951                 /* Set RS bit only on threshold packets' last descriptor */
952                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
953                         PMD_TX_FREE_LOG(DEBUG,
954                                         "Setting RS bit on TXD id="
955                                         "%4u (port=%d queue=%d)",
956                                         tx_last, txq->port_id, txq->queue_id);
957
958                         cmd_type_len |= IXGBE_TXD_CMD_RS;
959
960                         /* Update txq RS bit counters */
961                         txq->nb_tx_used = 0;
962                         txp = NULL;
963                 } else
964                         txp = txd;
965
966                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
967         }
968
969 end_of_tx:
970         /* set RS on last packet in the burst */
971         if (txp != NULL)
972                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
973
974         rte_wmb();
975
976         /*
977          * Set the Transmit Descriptor Tail (TDT)
978          */
979         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
980                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
981                    (unsigned) tx_id, (unsigned) nb_tx);
982         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
983         txq->tx_tail = tx_id;
984
985         return nb_tx;
986 }
987
988 /*********************************************************************
989  *
990  *  TX prep functions
991  *
992  **********************************************************************/
993 uint16_t
994 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
995 {
996         int i, ret;
997         uint64_t ol_flags;
998         struct rte_mbuf *m;
999         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
1000
1001         for (i = 0; i < nb_pkts; i++) {
1002                 m = tx_pkts[i];
1003                 ol_flags = m->ol_flags;
1004
1005                 /**
1006                  * Check if packet meets requirements for number of segments
1007                  *
1008                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
1009                  *       non-TSO
1010                  */
1011
1012                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
1013                         rte_errno = -EINVAL;
1014                         return i;
1015                 }
1016
1017                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
1018                         rte_errno = -ENOTSUP;
1019                         return i;
1020                 }
1021
1022 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1023                 ret = rte_validate_tx_offload(m);
1024                 if (ret != 0) {
1025                         rte_errno = ret;
1026                         return i;
1027                 }
1028 #endif
1029                 ret = rte_net_intel_cksum_prepare(m);
1030                 if (ret != 0) {
1031                         rte_errno = ret;
1032                         return i;
1033                 }
1034         }
1035
1036         return i;
1037 }
1038
1039 /*********************************************************************
1040  *
1041  *  RX functions
1042  *
1043  **********************************************************************/
1044
1045 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1046 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1047 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1048 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1049 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1051 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1052 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1053 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1054 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1055 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1056 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1057 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1058 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1059 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1060 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1061 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1062 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1063 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1064 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1065 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1066 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1067 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1068 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1069 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1070 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1071 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1072 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1073 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1074 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1075 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1076 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1077 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1078
1079 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1080 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1081 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1082 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1083 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1084 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1085 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1086 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1087 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1088 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1089 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1090 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1091 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1092 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1093 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1094 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1095 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1096 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1097 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1098 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1099 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1100 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1101 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1102
1103 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1104 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1105 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1106 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1107 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1108 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1109 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1110 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1111 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1112 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1113 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1114 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1115 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1116 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1117 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1118 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1119 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1120 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1121 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1122 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1123 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1124 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1125 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1126
1127 /**
1128  * Use 2 different table for normal packet and tunnel packet
1129  * to save the space.
1130  */
1131 const uint32_t
1132         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1133         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1134         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1135                 RTE_PTYPE_L3_IPV4,
1136         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1138         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1140         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1141                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1142         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV4_EXT,
1144         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1145                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1146         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1147                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1148         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1150         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1151                 RTE_PTYPE_L3_IPV6,
1152         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1153                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1154         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1155                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1156         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1157                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1158         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1159                 RTE_PTYPE_L3_IPV6_EXT,
1160         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1161                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1162         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1163                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1164         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1165                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1166         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1167                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                 RTE_PTYPE_INNER_L3_IPV6,
1169         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1170                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1171                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1172         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1173                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1174         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1175         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1176                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1177                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1178         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1179                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1180                 RTE_PTYPE_INNER_L3_IPV6,
1181         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1182                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1184         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1185                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1186                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1187         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1188                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1189                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1190         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1191                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1192                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1193         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1194                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1195                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1196         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1197                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1198                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1199         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1200                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1201                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1202         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1203                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1204                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1205         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1206                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1207                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1208         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1209                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1210                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1211         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1212                 RTE_PTYPE_L2_ETHER |
1213                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1214                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1215 };
1216
1217 const uint32_t
1218         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1219         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1220                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1221                 RTE_PTYPE_INNER_L2_ETHER,
1222         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1223                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1224                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1225         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1226                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1227                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1228         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1229                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1230                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1231         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1232                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1233                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1234         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1235                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1236                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1237         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1238                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1239                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1240         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1241                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1242                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1243                 RTE_PTYPE_INNER_L4_TCP,
1244         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1245                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1246                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1247                 RTE_PTYPE_INNER_L4_TCP,
1248         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1249                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1250                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1251         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1252                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1253                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1254                 RTE_PTYPE_INNER_L4_TCP,
1255         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1256                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1257                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1258                 RTE_PTYPE_INNER_L3_IPV4,
1259         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1260                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1261                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1262                 RTE_PTYPE_INNER_L4_UDP,
1263         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1264                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1265                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1266                 RTE_PTYPE_INNER_L4_UDP,
1267         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1268                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1269                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1270                 RTE_PTYPE_INNER_L4_SCTP,
1271         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1272                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1273                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1274         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1275                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1276                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1277                 RTE_PTYPE_INNER_L4_UDP,
1278         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1279                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1280                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1281                 RTE_PTYPE_INNER_L4_SCTP,
1282         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1283                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1284                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1285                 RTE_PTYPE_INNER_L3_IPV4,
1286         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1287                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1288                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1289                 RTE_PTYPE_INNER_L4_SCTP,
1290         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1291                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1292                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1293                 RTE_PTYPE_INNER_L4_SCTP,
1294         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1295                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1296                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1297                 RTE_PTYPE_INNER_L4_TCP,
1298         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1299                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1300                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1301                 RTE_PTYPE_INNER_L4_UDP,
1302
1303         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1304                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1305                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1306         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1307                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309                 RTE_PTYPE_INNER_L3_IPV4,
1310         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1311                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1314         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1315                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1316                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1317                 RTE_PTYPE_INNER_L3_IPV6,
1318         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1319                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321                 RTE_PTYPE_INNER_L3_IPV4,
1322         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1323                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1324                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1325                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1326         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1327                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                 RTE_PTYPE_INNER_L3_IPV4,
1330         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1331                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1334         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1335                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1338         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1339                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                 RTE_PTYPE_INNER_L3_IPV4,
1342         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1343                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1344                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1345                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1346         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1347                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1348                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1349                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1350         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1351                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1352                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1353                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1354         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1355                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1358         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1359                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1362         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1363                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1364                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1365                 RTE_PTYPE_INNER_L3_IPV4,
1366         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1367                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1368                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1369                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1370         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1371                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1372                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1373                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1374         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1375                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1376                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1377                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1378         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1379                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1380                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1381                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1382         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1383                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1384                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1385                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1386         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1387                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1388                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1389                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1390         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1391                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1392                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1393                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1394 };
1395
1396 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1397 static inline uint32_t
1398 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1399 {
1400
1401         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1402                 return RTE_PTYPE_UNKNOWN;
1403
1404         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1405
1406         /* For tunnel packet */
1407         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1408                 /* Remove the tunnel bit to save the space. */
1409                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1410                 return ptype_table_tn[pkt_info];
1411         }
1412
1413         /**
1414          * For x550, if it's not tunnel,
1415          * tunnel type bit should be set to 0.
1416          * Reuse 82599's mask.
1417          */
1418         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1419
1420         return ptype_table[pkt_info];
1421 }
1422
1423 static inline uint64_t
1424 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1425 {
1426         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1427                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1428                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1429                 PKT_RX_RSS_HASH, 0, 0, 0,
1430                 0, 0, 0,  PKT_RX_FDIR,
1431         };
1432 #ifdef RTE_LIBRTE_IEEE1588
1433         static uint64_t ip_pkt_etqf_map[8] = {
1434                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1435                 0, 0, 0, 0,
1436         };
1437
1438         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1439                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1440                                 ip_rss_types_map[pkt_info & 0XF];
1441         else
1442                 return ip_rss_types_map[pkt_info & 0XF];
1443 #else
1444         return ip_rss_types_map[pkt_info & 0XF];
1445 #endif
1446 }
1447
1448 static inline uint64_t
1449 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1450 {
1451         uint64_t pkt_flags;
1452
1453         /*
1454          * Check if VLAN present only.
1455          * Do not check whether L3/L4 rx checksum done by NIC or not,
1456          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1457          */
1458         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1459
1460 #ifdef RTE_LIBRTE_IEEE1588
1461         if (rx_status & IXGBE_RXD_STAT_TMST)
1462                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1463 #endif
1464         return pkt_flags;
1465 }
1466
1467 static inline uint64_t
1468 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1469 {
1470         uint64_t pkt_flags;
1471
1472         /*
1473          * Bit 31: IPE, IPv4 checksum error
1474          * Bit 30: L4I, L4I integrity error
1475          */
1476         static uint64_t error_to_pkt_flags_map[4] = {
1477                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1478                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1479                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1480                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1481         };
1482         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1483                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1484
1485         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1486             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1487                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1488         }
1489
1490 #ifdef RTE_LIBRTE_SECURITY
1491         if (rx_status & IXGBE_RXD_STAT_SECP) {
1492                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1493                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1494                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1495         }
1496 #endif
1497
1498         return pkt_flags;
1499 }
1500
1501 /*
1502  * LOOK_AHEAD defines how many desc statuses to check beyond the
1503  * current descriptor.
1504  * It must be a pound define for optimal performance.
1505  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1506  * function only works with LOOK_AHEAD=8.
1507  */
1508 #define LOOK_AHEAD 8
1509 #if (LOOK_AHEAD != 8)
1510 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1511 #endif
1512 static inline int
1513 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1514 {
1515         volatile union ixgbe_adv_rx_desc *rxdp;
1516         struct ixgbe_rx_entry *rxep;
1517         struct rte_mbuf *mb;
1518         uint16_t pkt_len;
1519         uint64_t pkt_flags;
1520         int nb_dd;
1521         uint32_t s[LOOK_AHEAD];
1522         uint32_t pkt_info[LOOK_AHEAD];
1523         int i, j, nb_rx = 0;
1524         uint32_t status;
1525         uint64_t vlan_flags = rxq->vlan_flags;
1526
1527         /* get references to current descriptor and S/W ring entry */
1528         rxdp = &rxq->rx_ring[rxq->rx_tail];
1529         rxep = &rxq->sw_ring[rxq->rx_tail];
1530
1531         status = rxdp->wb.upper.status_error;
1532         /* check to make sure there is at least 1 packet to receive */
1533         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1534                 return 0;
1535
1536         /*
1537          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1538          * reference packets that are ready to be received.
1539          */
1540         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1541              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1542                 /* Read desc statuses backwards to avoid race condition */
1543                 for (j = 0; j < LOOK_AHEAD; j++)
1544                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1545
1546                 rte_smp_rmb();
1547
1548                 /* Compute how many status bits were set */
1549                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1550                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1551                         ;
1552
1553                 for (j = 0; j < nb_dd; j++)
1554                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1555                                                        lo_dword.data);
1556
1557                 nb_rx += nb_dd;
1558
1559                 /* Translate descriptor info to mbuf format */
1560                 for (j = 0; j < nb_dd; ++j) {
1561                         mb = rxep[j].mbuf;
1562                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1563                                   rxq->crc_len;
1564                         mb->data_len = pkt_len;
1565                         mb->pkt_len = pkt_len;
1566                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1567
1568                         /* convert descriptor fields to rte mbuf flags */
1569                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1570                                 vlan_flags);
1571                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1572                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1573                                         ((uint16_t)pkt_info[j]);
1574                         mb->ol_flags = pkt_flags;
1575                         mb->packet_type =
1576                                 ixgbe_rxd_pkt_info_to_pkt_type
1577                                         (pkt_info[j], rxq->pkt_type_mask);
1578
1579                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1580                                 mb->hash.rss = rte_le_to_cpu_32(
1581                                     rxdp[j].wb.lower.hi_dword.rss);
1582                         else if (pkt_flags & PKT_RX_FDIR) {
1583                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1584                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1585                                     IXGBE_ATR_HASH_MASK;
1586                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1587                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1588                         }
1589                 }
1590
1591                 /* Move mbuf pointers from the S/W ring to the stage */
1592                 for (j = 0; j < LOOK_AHEAD; ++j) {
1593                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1594                 }
1595
1596                 /* stop if all requested packets could not be received */
1597                 if (nb_dd != LOOK_AHEAD)
1598                         break;
1599         }
1600
1601         /* clear software ring entries so we can cleanup correctly */
1602         for (i = 0; i < nb_rx; ++i) {
1603                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1604         }
1605
1606
1607         return nb_rx;
1608 }
1609
1610 static inline int
1611 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1612 {
1613         volatile union ixgbe_adv_rx_desc *rxdp;
1614         struct ixgbe_rx_entry *rxep;
1615         struct rte_mbuf *mb;
1616         uint16_t alloc_idx;
1617         __le64 dma_addr;
1618         int diag, i;
1619
1620         /* allocate buffers in bulk directly into the S/W ring */
1621         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1622         rxep = &rxq->sw_ring[alloc_idx];
1623         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1624                                     rxq->rx_free_thresh);
1625         if (unlikely(diag != 0))
1626                 return -ENOMEM;
1627
1628         rxdp = &rxq->rx_ring[alloc_idx];
1629         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1630                 /* populate the static rte mbuf fields */
1631                 mb = rxep[i].mbuf;
1632                 if (reset_mbuf) {
1633                         mb->port = rxq->port_id;
1634                 }
1635
1636                 rte_mbuf_refcnt_set(mb, 1);
1637                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1638
1639                 /* populate the descriptors */
1640                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1641                 rxdp[i].read.hdr_addr = 0;
1642                 rxdp[i].read.pkt_addr = dma_addr;
1643         }
1644
1645         /* update state of internal queue structure */
1646         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1647         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1648                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1649
1650         /* no errors */
1651         return 0;
1652 }
1653
1654 static inline uint16_t
1655 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1656                          uint16_t nb_pkts)
1657 {
1658         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1659         int i;
1660
1661         /* how many packets are ready to return? */
1662         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1663
1664         /* copy mbuf pointers to the application's packet list */
1665         for (i = 0; i < nb_pkts; ++i)
1666                 rx_pkts[i] = stage[i];
1667
1668         /* update internal queue state */
1669         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1670         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1671
1672         return nb_pkts;
1673 }
1674
1675 static inline uint16_t
1676 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1677              uint16_t nb_pkts)
1678 {
1679         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1680         uint16_t nb_rx = 0;
1681
1682         /* Any previously recv'd pkts will be returned from the Rx stage */
1683         if (rxq->rx_nb_avail)
1684                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1685
1686         /* Scan the H/W ring for packets to receive */
1687         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1688
1689         /* update internal queue state */
1690         rxq->rx_next_avail = 0;
1691         rxq->rx_nb_avail = nb_rx;
1692         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1693
1694         /* if required, allocate new buffers to replenish descriptors */
1695         if (rxq->rx_tail > rxq->rx_free_trigger) {
1696                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1697
1698                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1699                         int i, j;
1700
1701                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1702                                    "queue_id=%u", (unsigned) rxq->port_id,
1703                                    (unsigned) rxq->queue_id);
1704
1705                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1706                                 rxq->rx_free_thresh;
1707
1708                         /*
1709                          * Need to rewind any previous receives if we cannot
1710                          * allocate new buffers to replenish the old ones.
1711                          */
1712                         rxq->rx_nb_avail = 0;
1713                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1714                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1715                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1716
1717                         return 0;
1718                 }
1719
1720                 /* update tail pointer */
1721                 rte_wmb();
1722                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1723                                             cur_free_trigger);
1724         }
1725
1726         if (rxq->rx_tail >= rxq->nb_rx_desc)
1727                 rxq->rx_tail = 0;
1728
1729         /* received any packets this loop? */
1730         if (rxq->rx_nb_avail)
1731                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1732
1733         return 0;
1734 }
1735
1736 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1737 uint16_t
1738 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1739                            uint16_t nb_pkts)
1740 {
1741         uint16_t nb_rx;
1742
1743         if (unlikely(nb_pkts == 0))
1744                 return 0;
1745
1746         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1747                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1748
1749         /* request is relatively large, chunk it up */
1750         nb_rx = 0;
1751         while (nb_pkts) {
1752                 uint16_t ret, n;
1753
1754                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1755                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1756                 nb_rx = (uint16_t)(nb_rx + ret);
1757                 nb_pkts = (uint16_t)(nb_pkts - ret);
1758                 if (ret < n)
1759                         break;
1760         }
1761
1762         return nb_rx;
1763 }
1764
1765 uint16_t
1766 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1767                 uint16_t nb_pkts)
1768 {
1769         struct ixgbe_rx_queue *rxq;
1770         volatile union ixgbe_adv_rx_desc *rx_ring;
1771         volatile union ixgbe_adv_rx_desc *rxdp;
1772         struct ixgbe_rx_entry *sw_ring;
1773         struct ixgbe_rx_entry *rxe;
1774         struct rte_mbuf *rxm;
1775         struct rte_mbuf *nmb;
1776         union ixgbe_adv_rx_desc rxd;
1777         uint64_t dma_addr;
1778         uint32_t staterr;
1779         uint32_t pkt_info;
1780         uint16_t pkt_len;
1781         uint16_t rx_id;
1782         uint16_t nb_rx;
1783         uint16_t nb_hold;
1784         uint64_t pkt_flags;
1785         uint64_t vlan_flags;
1786
1787         nb_rx = 0;
1788         nb_hold = 0;
1789         rxq = rx_queue;
1790         rx_id = rxq->rx_tail;
1791         rx_ring = rxq->rx_ring;
1792         sw_ring = rxq->sw_ring;
1793         vlan_flags = rxq->vlan_flags;
1794         while (nb_rx < nb_pkts) {
1795                 /*
1796                  * The order of operations here is important as the DD status
1797                  * bit must not be read after any other descriptor fields.
1798                  * rx_ring and rxdp are pointing to volatile data so the order
1799                  * of accesses cannot be reordered by the compiler. If they were
1800                  * not volatile, they could be reordered which could lead to
1801                  * using invalid descriptor fields when read from rxd.
1802                  */
1803                 rxdp = &rx_ring[rx_id];
1804                 staterr = rxdp->wb.upper.status_error;
1805                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1806                         break;
1807                 rxd = *rxdp;
1808
1809                 /*
1810                  * End of packet.
1811                  *
1812                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1813                  * is likely to be invalid and to be dropped by the various
1814                  * validation checks performed by the network stack.
1815                  *
1816                  * Allocate a new mbuf to replenish the RX ring descriptor.
1817                  * If the allocation fails:
1818                  *    - arrange for that RX descriptor to be the first one
1819                  *      being parsed the next time the receive function is
1820                  *      invoked [on the same queue].
1821                  *
1822                  *    - Stop parsing the RX ring and return immediately.
1823                  *
1824                  * This policy do not drop the packet received in the RX
1825                  * descriptor for which the allocation of a new mbuf failed.
1826                  * Thus, it allows that packet to be later retrieved if
1827                  * mbuf have been freed in the mean time.
1828                  * As a side effect, holding RX descriptors instead of
1829                  * systematically giving them back to the NIC may lead to
1830                  * RX ring exhaustion situations.
1831                  * However, the NIC can gracefully prevent such situations
1832                  * to happen by sending specific "back-pressure" flow control
1833                  * frames to its peer(s).
1834                  */
1835                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1836                            "ext_err_stat=0x%08x pkt_len=%u",
1837                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1838                            (unsigned) rx_id, (unsigned) staterr,
1839                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1840
1841                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1842                 if (nmb == NULL) {
1843                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1844                                    "queue_id=%u", (unsigned) rxq->port_id,
1845                                    (unsigned) rxq->queue_id);
1846                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1847                         break;
1848                 }
1849
1850                 nb_hold++;
1851                 rxe = &sw_ring[rx_id];
1852                 rx_id++;
1853                 if (rx_id == rxq->nb_rx_desc)
1854                         rx_id = 0;
1855
1856                 /* Prefetch next mbuf while processing current one. */
1857                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1858
1859                 /*
1860                  * When next RX descriptor is on a cache-line boundary,
1861                  * prefetch the next 4 RX descriptors and the next 8 pointers
1862                  * to mbufs.
1863                  */
1864                 if ((rx_id & 0x3) == 0) {
1865                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1866                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1867                 }
1868
1869                 rxm = rxe->mbuf;
1870                 rxe->mbuf = nmb;
1871                 dma_addr =
1872                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1873                 rxdp->read.hdr_addr = 0;
1874                 rxdp->read.pkt_addr = dma_addr;
1875
1876                 /*
1877                  * Initialize the returned mbuf.
1878                  * 1) setup generic mbuf fields:
1879                  *    - number of segments,
1880                  *    - next segment,
1881                  *    - packet length,
1882                  *    - RX port identifier.
1883                  * 2) integrate hardware offload data, if any:
1884                  *    - RSS flag & hash,
1885                  *    - IP checksum flag,
1886                  *    - VLAN TCI, if any,
1887                  *    - error flags.
1888                  */
1889                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1890                                       rxq->crc_len);
1891                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1892                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1893                 rxm->nb_segs = 1;
1894                 rxm->next = NULL;
1895                 rxm->pkt_len = pkt_len;
1896                 rxm->data_len = pkt_len;
1897                 rxm->port = rxq->port_id;
1898
1899                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1900                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1901                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1902
1903                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1904                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1905                 pkt_flags = pkt_flags |
1906                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1907                 rxm->ol_flags = pkt_flags;
1908                 rxm->packet_type =
1909                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1910                                                        rxq->pkt_type_mask);
1911
1912                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1913                         rxm->hash.rss = rte_le_to_cpu_32(
1914                                                 rxd.wb.lower.hi_dword.rss);
1915                 else if (pkt_flags & PKT_RX_FDIR) {
1916                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1917                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1918                                         IXGBE_ATR_HASH_MASK;
1919                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1920                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1921                 }
1922                 /*
1923                  * Store the mbuf address into the next entry of the array
1924                  * of returned packets.
1925                  */
1926                 rx_pkts[nb_rx++] = rxm;
1927         }
1928         rxq->rx_tail = rx_id;
1929
1930         /*
1931          * If the number of free RX descriptors is greater than the RX free
1932          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1933          * register.
1934          * Update the RDT with the value of the last processed RX descriptor
1935          * minus 1, to guarantee that the RDT register is never equal to the
1936          * RDH register, which creates a "full" ring situtation from the
1937          * hardware point of view...
1938          */
1939         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1940         if (nb_hold > rxq->rx_free_thresh) {
1941                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1942                            "nb_hold=%u nb_rx=%u",
1943                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1944                            (unsigned) rx_id, (unsigned) nb_hold,
1945                            (unsigned) nb_rx);
1946                 rx_id = (uint16_t) ((rx_id == 0) ?
1947                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1948                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1949                 nb_hold = 0;
1950         }
1951         rxq->nb_rx_hold = nb_hold;
1952         return nb_rx;
1953 }
1954
1955 /**
1956  * Detect an RSC descriptor.
1957  */
1958 static inline uint32_t
1959 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1960 {
1961         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1962                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1963 }
1964
1965 /**
1966  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1967  *
1968  * Fill the following info in the HEAD buffer of the Rx cluster:
1969  *    - RX port identifier
1970  *    - hardware offload data, if any:
1971  *      - RSS flag & hash
1972  *      - IP checksum flag
1973  *      - VLAN TCI, if any
1974  *      - error flags
1975  * @head HEAD of the packet cluster
1976  * @desc HW descriptor to get data from
1977  * @rxq Pointer to the Rx queue
1978  */
1979 static inline void
1980 ixgbe_fill_cluster_head_buf(
1981         struct rte_mbuf *head,
1982         union ixgbe_adv_rx_desc *desc,
1983         struct ixgbe_rx_queue *rxq,
1984         uint32_t staterr)
1985 {
1986         uint32_t pkt_info;
1987         uint64_t pkt_flags;
1988
1989         head->port = rxq->port_id;
1990
1991         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1992          * set in the pkt_flags field.
1993          */
1994         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1995         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1996         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1997         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1998         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1999         head->ol_flags = pkt_flags;
2000         head->packet_type =
2001                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
2002
2003         if (likely(pkt_flags & PKT_RX_RSS_HASH))
2004                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
2005         else if (pkt_flags & PKT_RX_FDIR) {
2006                 head->hash.fdir.hash =
2007                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
2008                                                           & IXGBE_ATR_HASH_MASK;
2009                 head->hash.fdir.id =
2010                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
2011         }
2012 }
2013
2014 /**
2015  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2016  *
2017  * @rx_queue Rx queue handle
2018  * @rx_pkts table of received packets
2019  * @nb_pkts size of rx_pkts table
2020  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2021  *
2022  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2023  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2024  *
2025  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2026  * 1) When non-EOP RSC completion arrives:
2027  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2028  *       segment's data length.
2029  *    b) Set the "next" pointer of the current segment to point to the segment
2030  *       at the NEXTP index.
2031  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2032  *       in the sw_rsc_ring.
2033  * 2) When EOP arrives we just update the cluster's total length and offload
2034  *    flags and deliver the cluster up to the upper layers. In our case - put it
2035  *    in the rx_pkts table.
2036  *
2037  * Returns the number of received packets/clusters (according to the "bulk
2038  * receive" interface).
2039  */
2040 static inline uint16_t
2041 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2042                     bool bulk_alloc)
2043 {
2044         struct ixgbe_rx_queue *rxq = rx_queue;
2045         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2046         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2047         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2048         uint16_t rx_id = rxq->rx_tail;
2049         uint16_t nb_rx = 0;
2050         uint16_t nb_hold = rxq->nb_rx_hold;
2051         uint16_t prev_id = rxq->rx_tail;
2052
2053         while (nb_rx < nb_pkts) {
2054                 bool eop;
2055                 struct ixgbe_rx_entry *rxe;
2056                 struct ixgbe_scattered_rx_entry *sc_entry;
2057                 struct ixgbe_scattered_rx_entry *next_sc_entry;
2058                 struct ixgbe_rx_entry *next_rxe = NULL;
2059                 struct rte_mbuf *first_seg;
2060                 struct rte_mbuf *rxm;
2061                 struct rte_mbuf *nmb;
2062                 union ixgbe_adv_rx_desc rxd;
2063                 uint16_t data_len;
2064                 uint16_t next_id;
2065                 volatile union ixgbe_adv_rx_desc *rxdp;
2066                 uint32_t staterr;
2067
2068 next_desc:
2069                 /*
2070                  * The code in this whole file uses the volatile pointer to
2071                  * ensure the read ordering of the status and the rest of the
2072                  * descriptor fields (on the compiler level only!!!). This is so
2073                  * UGLY - why not to just use the compiler barrier instead? DPDK
2074                  * even has the rte_compiler_barrier() for that.
2075                  *
2076                  * But most importantly this is just wrong because this doesn't
2077                  * ensure memory ordering in a general case at all. For
2078                  * instance, DPDK is supposed to work on Power CPUs where
2079                  * compiler barrier may just not be enough!
2080                  *
2081                  * I tried to write only this function properly to have a
2082                  * starting point (as a part of an LRO/RSC series) but the
2083                  * compiler cursed at me when I tried to cast away the
2084                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2085                  * keeping it the way it is for now.
2086                  *
2087                  * The code in this file is broken in so many other places and
2088                  * will just not work on a big endian CPU anyway therefore the
2089                  * lines below will have to be revisited together with the rest
2090                  * of the ixgbe PMD.
2091                  *
2092                  * TODO:
2093                  *    - Get rid of "volatile" crap and let the compiler do its
2094                  *      job.
2095                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2096                  *      memory ordering below.
2097                  */
2098                 rxdp = &rx_ring[rx_id];
2099                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2100
2101                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2102                         break;
2103
2104                 rxd = *rxdp;
2105
2106                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2107                                   "staterr=0x%x data_len=%u",
2108                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2109                            rte_le_to_cpu_16(rxd.wb.upper.length));
2110
2111                 if (!bulk_alloc) {
2112                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2113                         if (nmb == NULL) {
2114                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2115                                                   "port_id=%u queue_id=%u",
2116                                            rxq->port_id, rxq->queue_id);
2117
2118                                 rte_eth_devices[rxq->port_id].data->
2119                                                         rx_mbuf_alloc_failed++;
2120                                 break;
2121                         }
2122                 } else if (nb_hold > rxq->rx_free_thresh) {
2123                         uint16_t next_rdt = rxq->rx_free_trigger;
2124
2125                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2126                                 rte_wmb();
2127                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2128                                                             next_rdt);
2129                                 nb_hold -= rxq->rx_free_thresh;
2130                         } else {
2131                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2132                                                   "port_id=%u queue_id=%u",
2133                                            rxq->port_id, rxq->queue_id);
2134
2135                                 rte_eth_devices[rxq->port_id].data->
2136                                                         rx_mbuf_alloc_failed++;
2137                                 break;
2138                         }
2139                 }
2140
2141                 nb_hold++;
2142                 rxe = &sw_ring[rx_id];
2143                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2144
2145                 next_id = rx_id + 1;
2146                 if (next_id == rxq->nb_rx_desc)
2147                         next_id = 0;
2148
2149                 /* Prefetch next mbuf while processing current one. */
2150                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2151
2152                 /*
2153                  * When next RX descriptor is on a cache-line boundary,
2154                  * prefetch the next 4 RX descriptors and the next 4 pointers
2155                  * to mbufs.
2156                  */
2157                 if ((next_id & 0x3) == 0) {
2158                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2159                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2160                 }
2161
2162                 rxm = rxe->mbuf;
2163
2164                 if (!bulk_alloc) {
2165                         __le64 dma =
2166                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2167                         /*
2168                          * Update RX descriptor with the physical address of the
2169                          * new data buffer of the new allocated mbuf.
2170                          */
2171                         rxe->mbuf = nmb;
2172
2173                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2174                         rxdp->read.hdr_addr = 0;
2175                         rxdp->read.pkt_addr = dma;
2176                 } else
2177                         rxe->mbuf = NULL;
2178
2179                 /*
2180                  * Set data length & data buffer address of mbuf.
2181                  */
2182                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2183                 rxm->data_len = data_len;
2184
2185                 if (!eop) {
2186                         uint16_t nextp_id;
2187                         /*
2188                          * Get next descriptor index:
2189                          *  - For RSC it's in the NEXTP field.
2190                          *  - For a scattered packet - it's just a following
2191                          *    descriptor.
2192                          */
2193                         if (ixgbe_rsc_count(&rxd))
2194                                 nextp_id =
2195                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2196                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2197                         else
2198                                 nextp_id = next_id;
2199
2200                         next_sc_entry = &sw_sc_ring[nextp_id];
2201                         next_rxe = &sw_ring[nextp_id];
2202                         rte_ixgbe_prefetch(next_rxe);
2203                 }
2204
2205                 sc_entry = &sw_sc_ring[rx_id];
2206                 first_seg = sc_entry->fbuf;
2207                 sc_entry->fbuf = NULL;
2208
2209                 /*
2210                  * If this is the first buffer of the received packet,
2211                  * set the pointer to the first mbuf of the packet and
2212                  * initialize its context.
2213                  * Otherwise, update the total length and the number of segments
2214                  * of the current scattered packet, and update the pointer to
2215                  * the last mbuf of the current packet.
2216                  */
2217                 if (first_seg == NULL) {
2218                         first_seg = rxm;
2219                         first_seg->pkt_len = data_len;
2220                         first_seg->nb_segs = 1;
2221                 } else {
2222                         first_seg->pkt_len += data_len;
2223                         first_seg->nb_segs++;
2224                 }
2225
2226                 prev_id = rx_id;
2227                 rx_id = next_id;
2228
2229                 /*
2230                  * If this is not the last buffer of the received packet, update
2231                  * the pointer to the first mbuf at the NEXTP entry in the
2232                  * sw_sc_ring and continue to parse the RX ring.
2233                  */
2234                 if (!eop && next_rxe) {
2235                         rxm->next = next_rxe->mbuf;
2236                         next_sc_entry->fbuf = first_seg;
2237                         goto next_desc;
2238                 }
2239
2240                 /* Initialize the first mbuf of the returned packet */
2241                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2242
2243                 /*
2244                  * Deal with the case, when HW CRC srip is disabled.
2245                  * That can't happen when LRO is enabled, but still could
2246                  * happen for scattered RX mode.
2247                  */
2248                 first_seg->pkt_len -= rxq->crc_len;
2249                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2250                         struct rte_mbuf *lp;
2251
2252                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2253                                 ;
2254
2255                         first_seg->nb_segs--;
2256                         lp->data_len -= rxq->crc_len - rxm->data_len;
2257                         lp->next = NULL;
2258                         rte_pktmbuf_free_seg(rxm);
2259                 } else
2260                         rxm->data_len -= rxq->crc_len;
2261
2262                 /* Prefetch data of first segment, if configured to do so. */
2263                 rte_packet_prefetch((char *)first_seg->buf_addr +
2264                         first_seg->data_off);
2265
2266                 /*
2267                  * Store the mbuf address into the next entry of the array
2268                  * of returned packets.
2269                  */
2270                 rx_pkts[nb_rx++] = first_seg;
2271         }
2272
2273         /*
2274          * Record index of the next RX descriptor to probe.
2275          */
2276         rxq->rx_tail = rx_id;
2277
2278         /*
2279          * If the number of free RX descriptors is greater than the RX free
2280          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2281          * register.
2282          * Update the RDT with the value of the last processed RX descriptor
2283          * minus 1, to guarantee that the RDT register is never equal to the
2284          * RDH register, which creates a "full" ring situtation from the
2285          * hardware point of view...
2286          */
2287         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2288                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2289                            "nb_hold=%u nb_rx=%u",
2290                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2291
2292                 rte_wmb();
2293                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2294                 nb_hold = 0;
2295         }
2296
2297         rxq->nb_rx_hold = nb_hold;
2298         return nb_rx;
2299 }
2300
2301 uint16_t
2302 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2303                                  uint16_t nb_pkts)
2304 {
2305         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2306 }
2307
2308 uint16_t
2309 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2310                                uint16_t nb_pkts)
2311 {
2312         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2313 }
2314
2315 /*********************************************************************
2316  *
2317  *  Queue management functions
2318  *
2319  **********************************************************************/
2320
2321 static void __attribute__((cold))
2322 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2323 {
2324         unsigned i;
2325
2326         if (txq->sw_ring != NULL) {
2327                 for (i = 0; i < txq->nb_tx_desc; i++) {
2328                         if (txq->sw_ring[i].mbuf != NULL) {
2329                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2330                                 txq->sw_ring[i].mbuf = NULL;
2331                         }
2332                 }
2333         }
2334 }
2335
2336 static void __attribute__((cold))
2337 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2338 {
2339         if (txq != NULL &&
2340             txq->sw_ring != NULL)
2341                 rte_free(txq->sw_ring);
2342 }
2343
2344 static void __attribute__((cold))
2345 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2346 {
2347         if (txq != NULL && txq->ops != NULL) {
2348                 txq->ops->release_mbufs(txq);
2349                 txq->ops->free_swring(txq);
2350                 rte_free(txq);
2351         }
2352 }
2353
2354 void __attribute__((cold))
2355 ixgbe_dev_tx_queue_release(void *txq)
2356 {
2357         ixgbe_tx_queue_release(txq);
2358 }
2359
2360 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2361 static void __attribute__((cold))
2362 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2363 {
2364         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2365         struct ixgbe_tx_entry *txe = txq->sw_ring;
2366         uint16_t prev, i;
2367
2368         /* Zero out HW ring memory */
2369         for (i = 0; i < txq->nb_tx_desc; i++) {
2370                 txq->tx_ring[i] = zeroed_desc;
2371         }
2372
2373         /* Initialize SW ring entries */
2374         prev = (uint16_t) (txq->nb_tx_desc - 1);
2375         for (i = 0; i < txq->nb_tx_desc; i++) {
2376                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2377
2378                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2379                 txe[i].mbuf = NULL;
2380                 txe[i].last_id = i;
2381                 txe[prev].next_id = i;
2382                 prev = i;
2383         }
2384
2385         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2386         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2387
2388         txq->tx_tail = 0;
2389         txq->nb_tx_used = 0;
2390         /*
2391          * Always allow 1 descriptor to be un-allocated to avoid
2392          * a H/W race condition
2393          */
2394         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2395         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2396         txq->ctx_curr = 0;
2397         memset((void *)&txq->ctx_cache, 0,
2398                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2399 }
2400
2401 static const struct ixgbe_txq_ops def_txq_ops = {
2402         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2403         .free_swring = ixgbe_tx_free_swring,
2404         .reset = ixgbe_reset_tx_queue,
2405 };
2406
2407 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2408  * the queue parameters. Used in tx_queue_setup by primary process and then
2409  * in dev_init by secondary process when attaching to an existing ethdev.
2410  */
2411 void __attribute__((cold))
2412 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2413 {
2414         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2415         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) &&
2416 #ifdef RTE_LIBRTE_SECURITY
2417                         !(txq->using_ipsec) &&
2418 #endif
2419                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2420                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2421                 dev->tx_pkt_prepare = NULL;
2422 #ifdef RTE_IXGBE_INC_VECTOR
2423                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2424                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2425                                         ixgbe_txq_vec_setup(txq) == 0)) {
2426                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2427                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2428                 } else
2429 #endif
2430                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2431         } else {
2432                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2433                 PMD_INIT_LOG(DEBUG,
2434                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2435                                 (unsigned long)txq->txq_flags,
2436                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2437                 PMD_INIT_LOG(DEBUG,
2438                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2439                                 (unsigned long)txq->tx_rs_thresh,
2440                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2441                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2442                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2443         }
2444 }
2445
2446 int __attribute__((cold))
2447 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2448                          uint16_t queue_idx,
2449                          uint16_t nb_desc,
2450                          unsigned int socket_id,
2451                          const struct rte_eth_txconf *tx_conf)
2452 {
2453         const struct rte_memzone *tz;
2454         struct ixgbe_tx_queue *txq;
2455         struct ixgbe_hw     *hw;
2456         uint16_t tx_rs_thresh, tx_free_thresh;
2457
2458         PMD_INIT_FUNC_TRACE();
2459         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2460
2461         /*
2462          * Validate number of transmit descriptors.
2463          * It must not exceed hardware maximum, and must be multiple
2464          * of IXGBE_ALIGN.
2465          */
2466         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2467                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2468                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2469                 return -EINVAL;
2470         }
2471
2472         /*
2473          * The following two parameters control the setting of the RS bit on
2474          * transmit descriptors.
2475          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2476          * descriptors have been used.
2477          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2478          * descriptors are used or if the number of descriptors required
2479          * to transmit a packet is greater than the number of free TX
2480          * descriptors.
2481          * The following constraints must be satisfied:
2482          *  tx_rs_thresh must be greater than 0.
2483          *  tx_rs_thresh must be less than the size of the ring minus 2.
2484          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2485          *  tx_rs_thresh must be a divisor of the ring size.
2486          *  tx_free_thresh must be greater than 0.
2487          *  tx_free_thresh must be less than the size of the ring minus 3.
2488          * One descriptor in the TX ring is used as a sentinel to avoid a
2489          * H/W race condition, hence the maximum threshold constraints.
2490          * When set to zero use default values.
2491          */
2492         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2493                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2494         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2495                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2496         if (tx_rs_thresh >= (nb_desc - 2)) {
2497                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2498                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2499                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2500                         (int)dev->data->port_id, (int)queue_idx);
2501                 return -(EINVAL);
2502         }
2503         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2504                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2505                         "(tx_rs_thresh=%u port=%d queue=%d)",
2506                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2507                         (int)dev->data->port_id, (int)queue_idx);
2508                 return -(EINVAL);
2509         }
2510         if (tx_free_thresh >= (nb_desc - 3)) {
2511                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2512                              "tx_free_thresh must be less than the number of "
2513                              "TX descriptors minus 3. (tx_free_thresh=%u "
2514                              "port=%d queue=%d)",
2515                              (unsigned int)tx_free_thresh,
2516                              (int)dev->data->port_id, (int)queue_idx);
2517                 return -(EINVAL);
2518         }
2519         if (tx_rs_thresh > tx_free_thresh) {
2520                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2521                              "tx_free_thresh. (tx_free_thresh=%u "
2522                              "tx_rs_thresh=%u port=%d queue=%d)",
2523                              (unsigned int)tx_free_thresh,
2524                              (unsigned int)tx_rs_thresh,
2525                              (int)dev->data->port_id,
2526                              (int)queue_idx);
2527                 return -(EINVAL);
2528         }
2529         if ((nb_desc % tx_rs_thresh) != 0) {
2530                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2531                              "number of TX descriptors. (tx_rs_thresh=%u "
2532                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2533                              (int)dev->data->port_id, (int)queue_idx);
2534                 return -(EINVAL);
2535         }
2536
2537         /*
2538          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2539          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2540          * by the NIC and all descriptors are written back after the NIC
2541          * accumulates WTHRESH descriptors.
2542          */
2543         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2544                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2545                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2546                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2547                              (int)dev->data->port_id, (int)queue_idx);
2548                 return -(EINVAL);
2549         }
2550
2551         /* Free memory prior to re-allocation if needed... */
2552         if (dev->data->tx_queues[queue_idx] != NULL) {
2553                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2554                 dev->data->tx_queues[queue_idx] = NULL;
2555         }
2556
2557         /* First allocate the tx queue data structure */
2558         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2559                                  RTE_CACHE_LINE_SIZE, socket_id);
2560         if (txq == NULL)
2561                 return -ENOMEM;
2562
2563         /*
2564          * Allocate TX ring hardware descriptors. A memzone large enough to
2565          * handle the maximum ring size is allocated in order to allow for
2566          * resizing in later calls to the queue setup function.
2567          */
2568         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2569                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2570                         IXGBE_ALIGN, socket_id);
2571         if (tz == NULL) {
2572                 ixgbe_tx_queue_release(txq);
2573                 return -ENOMEM;
2574         }
2575
2576         txq->nb_tx_desc = nb_desc;
2577         txq->tx_rs_thresh = tx_rs_thresh;
2578         txq->tx_free_thresh = tx_free_thresh;
2579         txq->pthresh = tx_conf->tx_thresh.pthresh;
2580         txq->hthresh = tx_conf->tx_thresh.hthresh;
2581         txq->wthresh = tx_conf->tx_thresh.wthresh;
2582         txq->queue_id = queue_idx;
2583         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2584                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2585         txq->port_id = dev->data->port_id;
2586         txq->txq_flags = tx_conf->txq_flags;
2587         txq->ops = &def_txq_ops;
2588         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2589 #ifdef RTE_LIBRTE_SECURITY
2590         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2591                         DEV_TX_OFFLOAD_SECURITY);
2592 #endif
2593
2594         /*
2595          * Modification to set VFTDT for virtual function if vf is detected
2596          */
2597         if (hw->mac.type == ixgbe_mac_82599_vf ||
2598             hw->mac.type == ixgbe_mac_X540_vf ||
2599             hw->mac.type == ixgbe_mac_X550_vf ||
2600             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2601             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2602                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2603         else
2604                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2605
2606         txq->tx_ring_phys_addr = tz->iova;
2607         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2608
2609         /* Allocate software ring */
2610         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2611                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2612                                 RTE_CACHE_LINE_SIZE, socket_id);
2613         if (txq->sw_ring == NULL) {
2614                 ixgbe_tx_queue_release(txq);
2615                 return -ENOMEM;
2616         }
2617         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2618                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2619
2620         /* set up vector or scalar TX function as appropriate */
2621         ixgbe_set_tx_function(dev, txq);
2622
2623         txq->ops->reset(txq);
2624
2625         dev->data->tx_queues[queue_idx] = txq;
2626
2627
2628         return 0;
2629 }
2630
2631 /**
2632  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2633  *
2634  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2635  * in the sw_rsc_ring is not set to NULL but rather points to the next
2636  * mbuf of this RSC aggregation (that has not been completed yet and still
2637  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2638  * will just free first "nb_segs" segments of the cluster explicitly by calling
2639  * an rte_pktmbuf_free_seg().
2640  *
2641  * @m scattered cluster head
2642  */
2643 static void __attribute__((cold))
2644 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2645 {
2646         uint16_t i, nb_segs = m->nb_segs;
2647         struct rte_mbuf *next_seg;
2648
2649         for (i = 0; i < nb_segs; i++) {
2650                 next_seg = m->next;
2651                 rte_pktmbuf_free_seg(m);
2652                 m = next_seg;
2653         }
2654 }
2655
2656 static void __attribute__((cold))
2657 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2658 {
2659         unsigned i;
2660
2661 #ifdef RTE_IXGBE_INC_VECTOR
2662         /* SSE Vector driver has a different way of releasing mbufs. */
2663         if (rxq->rx_using_sse) {
2664                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2665                 return;
2666         }
2667 #endif
2668
2669         if (rxq->sw_ring != NULL) {
2670                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2671                         if (rxq->sw_ring[i].mbuf != NULL) {
2672                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2673                                 rxq->sw_ring[i].mbuf = NULL;
2674                         }
2675                 }
2676                 if (rxq->rx_nb_avail) {
2677                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2678                                 struct rte_mbuf *mb;
2679
2680                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2681                                 rte_pktmbuf_free_seg(mb);
2682                         }
2683                         rxq->rx_nb_avail = 0;
2684                 }
2685         }
2686
2687         if (rxq->sw_sc_ring)
2688                 for (i = 0; i < rxq->nb_rx_desc; i++)
2689                         if (rxq->sw_sc_ring[i].fbuf) {
2690                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2691                                 rxq->sw_sc_ring[i].fbuf = NULL;
2692                         }
2693 }
2694
2695 static void __attribute__((cold))
2696 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2697 {
2698         if (rxq != NULL) {
2699                 ixgbe_rx_queue_release_mbufs(rxq);
2700                 rte_free(rxq->sw_ring);
2701                 rte_free(rxq->sw_sc_ring);
2702                 rte_free(rxq);
2703         }
2704 }
2705
2706 void __attribute__((cold))
2707 ixgbe_dev_rx_queue_release(void *rxq)
2708 {
2709         ixgbe_rx_queue_release(rxq);
2710 }
2711
2712 /*
2713  * Check if Rx Burst Bulk Alloc function can be used.
2714  * Return
2715  *        0: the preconditions are satisfied and the bulk allocation function
2716  *           can be used.
2717  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2718  *           function must be used.
2719  */
2720 static inline int __attribute__((cold))
2721 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2722 {
2723         int ret = 0;
2724
2725         /*
2726          * Make sure the following pre-conditions are satisfied:
2727          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2728          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2729          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2730          * Scattered packets are not supported.  This should be checked
2731          * outside of this function.
2732          */
2733         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2734                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2735                              "rxq->rx_free_thresh=%d, "
2736                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2737                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2738                 ret = -EINVAL;
2739         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2740                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2741                              "rxq->rx_free_thresh=%d, "
2742                              "rxq->nb_rx_desc=%d",
2743                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2744                 ret = -EINVAL;
2745         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2746                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2747                              "rxq->nb_rx_desc=%d, "
2748                              "rxq->rx_free_thresh=%d",
2749                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2750                 ret = -EINVAL;
2751         }
2752
2753         return ret;
2754 }
2755
2756 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2757 static void __attribute__((cold))
2758 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2759 {
2760         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2761         unsigned i;
2762         uint16_t len = rxq->nb_rx_desc;
2763
2764         /*
2765          * By default, the Rx queue setup function allocates enough memory for
2766          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2767          * extra memory at the end of the descriptor ring to be zero'd out.
2768          */
2769         if (adapter->rx_bulk_alloc_allowed)
2770                 /* zero out extra memory */
2771                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2772
2773         /*
2774          * Zero out HW ring memory. Zero out extra memory at the end of
2775          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2776          * reads extra memory as zeros.
2777          */
2778         for (i = 0; i < len; i++) {
2779                 rxq->rx_ring[i] = zeroed_desc;
2780         }
2781
2782         /*
2783          * initialize extra software ring entries. Space for these extra
2784          * entries is always allocated
2785          */
2786         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2787         for (i = rxq->nb_rx_desc; i < len; ++i) {
2788                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2789         }
2790
2791         rxq->rx_nb_avail = 0;
2792         rxq->rx_next_avail = 0;
2793         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2794         rxq->rx_tail = 0;
2795         rxq->nb_rx_hold = 0;
2796         rxq->pkt_first_seg = NULL;
2797         rxq->pkt_last_seg = NULL;
2798
2799 #ifdef RTE_IXGBE_INC_VECTOR
2800         rxq->rxrearm_start = 0;
2801         rxq->rxrearm_nb = 0;
2802 #endif
2803 }
2804
2805 int __attribute__((cold))
2806 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2807                          uint16_t queue_idx,
2808                          uint16_t nb_desc,
2809                          unsigned int socket_id,
2810                          const struct rte_eth_rxconf *rx_conf,
2811                          struct rte_mempool *mp)
2812 {
2813         const struct rte_memzone *rz;
2814         struct ixgbe_rx_queue *rxq;
2815         struct ixgbe_hw     *hw;
2816         uint16_t len;
2817         struct ixgbe_adapter *adapter =
2818                 (struct ixgbe_adapter *)dev->data->dev_private;
2819
2820         PMD_INIT_FUNC_TRACE();
2821         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2822
2823         /*
2824          * Validate number of receive descriptors.
2825          * It must not exceed hardware maximum, and must be multiple
2826          * of IXGBE_ALIGN.
2827          */
2828         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2829                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2830                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2831                 return -EINVAL;
2832         }
2833
2834         /* Free memory prior to re-allocation if needed... */
2835         if (dev->data->rx_queues[queue_idx] != NULL) {
2836                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2837                 dev->data->rx_queues[queue_idx] = NULL;
2838         }
2839
2840         /* First allocate the rx queue data structure */
2841         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2842                                  RTE_CACHE_LINE_SIZE, socket_id);
2843         if (rxq == NULL)
2844                 return -ENOMEM;
2845         rxq->mb_pool = mp;
2846         rxq->nb_rx_desc = nb_desc;
2847         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2848         rxq->queue_id = queue_idx;
2849         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2850                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2851         rxq->port_id = dev->data->port_id;
2852         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2853                                                         0 : ETHER_CRC_LEN);
2854         rxq->drop_en = rx_conf->rx_drop_en;
2855         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2856
2857         /*
2858          * The packet type in RX descriptor is different for different NICs.
2859          * Some bits are used for x550 but reserved for other NICS.
2860          * So set different masks for different NICs.
2861          */
2862         if (hw->mac.type == ixgbe_mac_X550 ||
2863             hw->mac.type == ixgbe_mac_X550EM_x ||
2864             hw->mac.type == ixgbe_mac_X550EM_a ||
2865             hw->mac.type == ixgbe_mac_X550_vf ||
2866             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2867             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2868                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2869         else
2870                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2871
2872         /*
2873          * Allocate RX ring hardware descriptors. A memzone large enough to
2874          * handle the maximum ring size is allocated in order to allow for
2875          * resizing in later calls to the queue setup function.
2876          */
2877         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2878                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2879         if (rz == NULL) {
2880                 ixgbe_rx_queue_release(rxq);
2881                 return -ENOMEM;
2882         }
2883
2884         /*
2885          * Zero init all the descriptors in the ring.
2886          */
2887         memset(rz->addr, 0, RX_RING_SZ);
2888
2889         /*
2890          * Modified to setup VFRDT for Virtual Function
2891          */
2892         if (hw->mac.type == ixgbe_mac_82599_vf ||
2893             hw->mac.type == ixgbe_mac_X540_vf ||
2894             hw->mac.type == ixgbe_mac_X550_vf ||
2895             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2896             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2897                 rxq->rdt_reg_addr =
2898                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2899                 rxq->rdh_reg_addr =
2900                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2901         } else {
2902                 rxq->rdt_reg_addr =
2903                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2904                 rxq->rdh_reg_addr =
2905                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2906         }
2907
2908         rxq->rx_ring_phys_addr = rz->iova;
2909         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2910
2911         /*
2912          * Certain constraints must be met in order to use the bulk buffer
2913          * allocation Rx burst function. If any of Rx queues doesn't meet them
2914          * the feature should be disabled for the whole port.
2915          */
2916         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2917                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2918                                     "preconditions - canceling the feature for "
2919                                     "the whole port[%d]",
2920                              rxq->queue_id, rxq->port_id);
2921                 adapter->rx_bulk_alloc_allowed = false;
2922         }
2923
2924         /*
2925          * Allocate software ring. Allow for space at the end of the
2926          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2927          * function does not access an invalid memory region.
2928          */
2929         len = nb_desc;
2930         if (adapter->rx_bulk_alloc_allowed)
2931                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2932
2933         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2934                                           sizeof(struct ixgbe_rx_entry) * len,
2935                                           RTE_CACHE_LINE_SIZE, socket_id);
2936         if (!rxq->sw_ring) {
2937                 ixgbe_rx_queue_release(rxq);
2938                 return -ENOMEM;
2939         }
2940
2941         /*
2942          * Always allocate even if it's not going to be needed in order to
2943          * simplify the code.
2944          *
2945          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2946          * be requested in ixgbe_dev_rx_init(), which is called later from
2947          * dev_start() flow.
2948          */
2949         rxq->sw_sc_ring =
2950                 rte_zmalloc_socket("rxq->sw_sc_ring",
2951                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2952                                    RTE_CACHE_LINE_SIZE, socket_id);
2953         if (!rxq->sw_sc_ring) {
2954                 ixgbe_rx_queue_release(rxq);
2955                 return -ENOMEM;
2956         }
2957
2958         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2959                             "dma_addr=0x%"PRIx64,
2960                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2961                      rxq->rx_ring_phys_addr);
2962
2963         if (!rte_is_power_of_2(nb_desc)) {
2964                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2965                                     "preconditions - canceling the feature for "
2966                                     "the whole port[%d]",
2967                              rxq->queue_id, rxq->port_id);
2968                 adapter->rx_vec_allowed = false;
2969         } else
2970                 ixgbe_rxq_vec_setup(rxq);
2971
2972         dev->data->rx_queues[queue_idx] = rxq;
2973
2974         ixgbe_reset_rx_queue(adapter, rxq);
2975
2976         return 0;
2977 }
2978
2979 uint32_t
2980 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2981 {
2982 #define IXGBE_RXQ_SCAN_INTERVAL 4
2983         volatile union ixgbe_adv_rx_desc *rxdp;
2984         struct ixgbe_rx_queue *rxq;
2985         uint32_t desc = 0;
2986
2987         rxq = dev->data->rx_queues[rx_queue_id];
2988         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2989
2990         while ((desc < rxq->nb_rx_desc) &&
2991                 (rxdp->wb.upper.status_error &
2992                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2993                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2994                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2995                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2996                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2997                                 desc - rxq->nb_rx_desc]);
2998         }
2999
3000         return desc;
3001 }
3002
3003 int
3004 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3005 {
3006         volatile union ixgbe_adv_rx_desc *rxdp;
3007         struct ixgbe_rx_queue *rxq = rx_queue;
3008         uint32_t desc;
3009
3010         if (unlikely(offset >= rxq->nb_rx_desc))
3011                 return 0;
3012         desc = rxq->rx_tail + offset;
3013         if (desc >= rxq->nb_rx_desc)
3014                 desc -= rxq->nb_rx_desc;
3015
3016         rxdp = &rxq->rx_ring[desc];
3017         return !!(rxdp->wb.upper.status_error &
3018                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3019 }
3020
3021 int
3022 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3023 {
3024         struct ixgbe_rx_queue *rxq = rx_queue;
3025         volatile uint32_t *status;
3026         uint32_t nb_hold, desc;
3027
3028         if (unlikely(offset >= rxq->nb_rx_desc))
3029                 return -EINVAL;
3030
3031 #ifdef RTE_IXGBE_INC_VECTOR
3032         if (rxq->rx_using_sse)
3033                 nb_hold = rxq->rxrearm_nb;
3034         else
3035 #endif
3036                 nb_hold = rxq->nb_rx_hold;
3037         if (offset >= rxq->nb_rx_desc - nb_hold)
3038                 return RTE_ETH_RX_DESC_UNAVAIL;
3039
3040         desc = rxq->rx_tail + offset;
3041         if (desc >= rxq->nb_rx_desc)
3042                 desc -= rxq->nb_rx_desc;
3043
3044         status = &rxq->rx_ring[desc].wb.upper.status_error;
3045         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3046                 return RTE_ETH_RX_DESC_DONE;
3047
3048         return RTE_ETH_RX_DESC_AVAIL;
3049 }
3050
3051 int
3052 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3053 {
3054         struct ixgbe_tx_queue *txq = tx_queue;
3055         volatile uint32_t *status;
3056         uint32_t desc;
3057
3058         if (unlikely(offset >= txq->nb_tx_desc))
3059                 return -EINVAL;
3060
3061         desc = txq->tx_tail + offset;
3062         /* go to next desc that has the RS bit */
3063         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3064                 txq->tx_rs_thresh;
3065         if (desc >= txq->nb_tx_desc) {
3066                 desc -= txq->nb_tx_desc;
3067                 if (desc >= txq->nb_tx_desc)
3068                         desc -= txq->nb_tx_desc;
3069         }
3070
3071         status = &txq->tx_ring[desc].wb.status;
3072         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3073                 return RTE_ETH_TX_DESC_DONE;
3074
3075         return RTE_ETH_TX_DESC_FULL;
3076 }
3077
3078 void __attribute__((cold))
3079 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3080 {
3081         unsigned i;
3082         struct ixgbe_adapter *adapter =
3083                 (struct ixgbe_adapter *)dev->data->dev_private;
3084
3085         PMD_INIT_FUNC_TRACE();
3086
3087         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3088                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3089
3090                 if (txq != NULL) {
3091                         txq->ops->release_mbufs(txq);
3092                         txq->ops->reset(txq);
3093                 }
3094         }
3095
3096         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3097                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3098
3099                 if (rxq != NULL) {
3100                         ixgbe_rx_queue_release_mbufs(rxq);
3101                         ixgbe_reset_rx_queue(adapter, rxq);
3102                 }
3103         }
3104 }
3105
3106 void
3107 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3108 {
3109         unsigned i;
3110
3111         PMD_INIT_FUNC_TRACE();
3112
3113         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3114                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3115                 dev->data->rx_queues[i] = NULL;
3116         }
3117         dev->data->nb_rx_queues = 0;
3118
3119         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3120                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3121                 dev->data->tx_queues[i] = NULL;
3122         }
3123         dev->data->nb_tx_queues = 0;
3124 }
3125
3126 /*********************************************************************
3127  *
3128  *  Device RX/TX init functions
3129  *
3130  **********************************************************************/
3131
3132 /**
3133  * Receive Side Scaling (RSS)
3134  * See section 7.1.2.8 in the following document:
3135  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3136  *
3137  * Principles:
3138  * The source and destination IP addresses of the IP header and the source
3139  * and destination ports of TCP/UDP headers, if any, of received packets are
3140  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3141  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3142  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3143  * RSS output index which is used as the RX queue index where to store the
3144  * received packets.
3145  * The following output is supplied in the RX write-back descriptor:
3146  *     - 32-bit result of the Microsoft RSS hash function,
3147  *     - 4-bit RSS type field.
3148  */
3149
3150 /*
3151  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3152  * Used as the default key.
3153  */
3154 static uint8_t rss_intel_key[40] = {
3155         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3156         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3157         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3158         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3159         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3160 };
3161
3162 static void
3163 ixgbe_rss_disable(struct rte_eth_dev *dev)
3164 {
3165         struct ixgbe_hw *hw;
3166         uint32_t mrqc;
3167         uint32_t mrqc_reg;
3168
3169         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3170         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3171         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3172         mrqc &= ~IXGBE_MRQC_RSSEN;
3173         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3174 }
3175
3176 static void
3177 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3178 {
3179         uint8_t  *hash_key;
3180         uint32_t mrqc;
3181         uint32_t rss_key;
3182         uint64_t rss_hf;
3183         uint16_t i;
3184         uint32_t mrqc_reg;
3185         uint32_t rssrk_reg;
3186
3187         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3188         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3189
3190         hash_key = rss_conf->rss_key;
3191         if (hash_key != NULL) {
3192                 /* Fill in RSS hash key */
3193                 for (i = 0; i < 10; i++) {
3194                         rss_key  = hash_key[(i * 4)];
3195                         rss_key |= hash_key[(i * 4) + 1] << 8;
3196                         rss_key |= hash_key[(i * 4) + 2] << 16;
3197                         rss_key |= hash_key[(i * 4) + 3] << 24;
3198                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3199                 }
3200         }
3201
3202         /* Set configured hashing protocols in MRQC register */
3203         rss_hf = rss_conf->rss_hf;
3204         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3205         if (rss_hf & ETH_RSS_IPV4)
3206                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3207         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3208                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3209         if (rss_hf & ETH_RSS_IPV6)
3210                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3211         if (rss_hf & ETH_RSS_IPV6_EX)
3212                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3213         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3214                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3215         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3216                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3217         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3218                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3219         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3220                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3221         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3222                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3223         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3224 }
3225
3226 int
3227 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3228                           struct rte_eth_rss_conf *rss_conf)
3229 {
3230         struct ixgbe_hw *hw;
3231         uint32_t mrqc;
3232         uint64_t rss_hf;
3233         uint32_t mrqc_reg;
3234
3235         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3236
3237         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3238                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3239                         "NIC.");
3240                 return -ENOTSUP;
3241         }
3242         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3243
3244         /*
3245          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3246          *     "RSS enabling cannot be done dynamically while it must be
3247          *      preceded by a software reset"
3248          * Before changing anything, first check that the update RSS operation
3249          * does not attempt to disable RSS, if RSS was enabled at
3250          * initialization time, or does not attempt to enable RSS, if RSS was
3251          * disabled at initialization time.
3252          */
3253         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3254         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3255         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3256                 if (rss_hf != 0) /* Enable RSS */
3257                         return -(EINVAL);
3258                 return 0; /* Nothing to do */
3259         }
3260         /* RSS enabled */
3261         if (rss_hf == 0) /* Disable RSS */
3262                 return -(EINVAL);
3263         ixgbe_hw_rss_hash_set(hw, rss_conf);
3264         return 0;
3265 }
3266
3267 int
3268 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3269                             struct rte_eth_rss_conf *rss_conf)
3270 {
3271         struct ixgbe_hw *hw;
3272         uint8_t *hash_key;
3273         uint32_t mrqc;
3274         uint32_t rss_key;
3275         uint64_t rss_hf;
3276         uint16_t i;
3277         uint32_t mrqc_reg;
3278         uint32_t rssrk_reg;
3279
3280         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3281         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3282         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3283         hash_key = rss_conf->rss_key;
3284         if (hash_key != NULL) {
3285                 /* Return RSS hash key */
3286                 for (i = 0; i < 10; i++) {
3287                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3288                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3289                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3290                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3291                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3292                 }
3293         }
3294
3295         /* Get RSS functions configured in MRQC register */
3296         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3297         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3298                 rss_conf->rss_hf = 0;
3299                 return 0;
3300         }
3301         rss_hf = 0;
3302         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3303                 rss_hf |= ETH_RSS_IPV4;
3304         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3305                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3306         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3307                 rss_hf |= ETH_RSS_IPV6;
3308         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3309                 rss_hf |= ETH_RSS_IPV6_EX;
3310         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3311                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3312         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3313                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3314         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3315                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3316         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3317                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3318         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3319                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3320         rss_conf->rss_hf = rss_hf;
3321         return 0;
3322 }
3323
3324 static void
3325 ixgbe_rss_configure(struct rte_eth_dev *dev)
3326 {
3327         struct rte_eth_rss_conf rss_conf;
3328         struct ixgbe_hw *hw;
3329         uint32_t reta;
3330         uint16_t i;
3331         uint16_t j;
3332         uint16_t sp_reta_size;
3333         uint32_t reta_reg;
3334
3335         PMD_INIT_FUNC_TRACE();
3336         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3337
3338         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3339
3340         /*
3341          * Fill in redirection table
3342          * The byte-swap is needed because NIC registers are in
3343          * little-endian order.
3344          */
3345         reta = 0;
3346         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3347                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3348
3349                 if (j == dev->data->nb_rx_queues)
3350                         j = 0;
3351                 reta = (reta << 8) | j;
3352                 if ((i & 3) == 3)
3353                         IXGBE_WRITE_REG(hw, reta_reg,
3354                                         rte_bswap32(reta));
3355         }
3356
3357         /*
3358          * Configure the RSS key and the RSS protocols used to compute
3359          * the RSS hash of input packets.
3360          */
3361         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3362         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3363                 ixgbe_rss_disable(dev);
3364                 return;
3365         }
3366         if (rss_conf.rss_key == NULL)
3367                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3368         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3369 }
3370
3371 #define NUM_VFTA_REGISTERS 128
3372 #define NIC_RX_BUFFER_SIZE 0x200
3373 #define X550_RX_BUFFER_SIZE 0x180
3374
3375 static void
3376 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3377 {
3378         struct rte_eth_vmdq_dcb_conf *cfg;
3379         struct ixgbe_hw *hw;
3380         enum rte_eth_nb_pools num_pools;
3381         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3382         uint16_t pbsize;
3383         uint8_t nb_tcs; /* number of traffic classes */
3384         int i;
3385
3386         PMD_INIT_FUNC_TRACE();
3387         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3388         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3389         num_pools = cfg->nb_queue_pools;
3390         /* Check we have a valid number of pools */
3391         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3392                 ixgbe_rss_disable(dev);
3393                 return;
3394         }
3395         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3396         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3397
3398         /*
3399          * RXPBSIZE
3400          * split rx buffer up into sections, each for 1 traffic class
3401          */
3402         switch (hw->mac.type) {
3403         case ixgbe_mac_X550:
3404         case ixgbe_mac_X550EM_x:
3405         case ixgbe_mac_X550EM_a:
3406                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3407                 break;
3408         default:
3409                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3410                 break;
3411         }
3412         for (i = 0; i < nb_tcs; i++) {
3413                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3414
3415                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3416                 /* clear 10 bits. */
3417                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3418                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3419         }
3420         /* zero alloc all unused TCs */
3421         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3422                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3423
3424                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3425                 /* clear 10 bits. */
3426                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3427         }
3428
3429         /* MRQC: enable vmdq and dcb */
3430         mrqc = (num_pools == ETH_16_POOLS) ?
3431                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3432         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3433
3434         /* PFVTCTL: turn on virtualisation and set the default pool */
3435         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3436         if (cfg->enable_default_pool) {
3437                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3438         } else {
3439                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3440         }
3441
3442         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3443
3444         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3445         queue_mapping = 0;
3446         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3447                 /*
3448                  * mapping is done with 3 bits per priority,
3449                  * so shift by i*3 each time
3450                  */
3451                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3452
3453         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3454
3455         /* RTRPCS: DCB related */
3456         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3457
3458         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3459         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3460         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3461         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3462
3463         /* VFTA - enable all vlan filters */
3464         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3465                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3466         }
3467
3468         /* VFRE: pool enabling for receive - 16 or 32 */
3469         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3470                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3471
3472         /*
3473          * MPSAR - allow pools to read specific mac addresses
3474          * In this case, all pools should be able to read from mac addr 0
3475          */
3476         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3477         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3478
3479         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3480         for (i = 0; i < cfg->nb_pool_maps; i++) {
3481                 /* set vlan id in VF register and set the valid bit */
3482                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3483                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3484                 /*
3485                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3486                  * pools, we only need to use the first half of the register
3487                  * i.e. bits 0-31
3488                  */
3489                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3490         }
3491 }
3492
3493 /**
3494  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3495  * @dev: pointer to eth_dev structure
3496  * @dcb_config: pointer to ixgbe_dcb_config structure
3497  */
3498 static void
3499 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3500                        struct ixgbe_dcb_config *dcb_config)
3501 {
3502         uint32_t reg;
3503         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3504
3505         PMD_INIT_FUNC_TRACE();
3506         if (hw->mac.type != ixgbe_mac_82598EB) {
3507                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3508                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3509                 reg |= IXGBE_RTTDCS_ARBDIS;
3510                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3511
3512                 /* Enable DCB for Tx with 8 TCs */
3513                 if (dcb_config->num_tcs.pg_tcs == 8) {
3514                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3515                 } else {
3516                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3517                 }
3518                 if (dcb_config->vt_mode)
3519                         reg |= IXGBE_MTQC_VT_ENA;
3520                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3521
3522                 /* Enable the Tx desc arbiter */
3523                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3524                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3525                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3526
3527                 /* Enable Security TX Buffer IFG for DCB */
3528                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3529                 reg |= IXGBE_SECTX_DCB;
3530                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3531         }
3532 }
3533
3534 /**
3535  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3536  * @dev: pointer to rte_eth_dev structure
3537  * @dcb_config: pointer to ixgbe_dcb_config structure
3538  */
3539 static void
3540 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3541                         struct ixgbe_dcb_config *dcb_config)
3542 {
3543         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3544                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3545         struct ixgbe_hw *hw =
3546                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3547
3548         PMD_INIT_FUNC_TRACE();
3549         if (hw->mac.type != ixgbe_mac_82598EB)
3550                 /*PF VF Transmit Enable*/
3551                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3552                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3553
3554         /*Configure general DCB TX parameters*/
3555         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3556 }
3557
3558 static void
3559 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3560                         struct ixgbe_dcb_config *dcb_config)
3561 {
3562         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3563                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3564         struct ixgbe_dcb_tc_config *tc;
3565         uint8_t i, j;
3566
3567         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3568         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3569                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3570                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3571         } else {
3572                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3573                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3574         }
3575
3576         /* Initialize User Priority to Traffic Class mapping */
3577         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3578                 tc = &dcb_config->tc_config[j];
3579                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3580         }
3581
3582         /* User Priority to Traffic Class mapping */
3583         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3584                 j = vmdq_rx_conf->dcb_tc[i];
3585                 tc = &dcb_config->tc_config[j];
3586                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3587                                                 (uint8_t)(1 << i);
3588         }
3589 }
3590
3591 static void
3592 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3593                         struct ixgbe_dcb_config *dcb_config)
3594 {
3595         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3596                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3597         struct ixgbe_dcb_tc_config *tc;
3598         uint8_t i, j;
3599
3600         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3601         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3602                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3603                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3604         } else {
3605                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3606                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3607         }
3608
3609         /* Initialize User Priority to Traffic Class mapping */
3610         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3611                 tc = &dcb_config->tc_config[j];
3612                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3613         }
3614
3615         /* User Priority to Traffic Class mapping */
3616         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3617                 j = vmdq_tx_conf->dcb_tc[i];
3618                 tc = &dcb_config->tc_config[j];
3619                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3620                                                 (uint8_t)(1 << i);
3621         }
3622 }
3623
3624 static void
3625 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3626                 struct ixgbe_dcb_config *dcb_config)
3627 {
3628         struct rte_eth_dcb_rx_conf *rx_conf =
3629                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3630         struct ixgbe_dcb_tc_config *tc;
3631         uint8_t i, j;
3632
3633         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3634         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3635
3636         /* Initialize User Priority to Traffic Class mapping */
3637         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3638                 tc = &dcb_config->tc_config[j];
3639                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3640         }
3641
3642         /* User Priority to Traffic Class mapping */
3643         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3644                 j = rx_conf->dcb_tc[i];
3645                 tc = &dcb_config->tc_config[j];
3646                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3647                                                 (uint8_t)(1 << i);
3648         }
3649 }
3650
3651 static void
3652 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3653                 struct ixgbe_dcb_config *dcb_config)
3654 {
3655         struct rte_eth_dcb_tx_conf *tx_conf =
3656                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3657         struct ixgbe_dcb_tc_config *tc;
3658         uint8_t i, j;
3659
3660         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3661         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3662
3663         /* Initialize User Priority to Traffic Class mapping */
3664         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3665                 tc = &dcb_config->tc_config[j];
3666                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3667         }
3668
3669         /* User Priority to Traffic Class mapping */
3670         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3671                 j = tx_conf->dcb_tc[i];
3672                 tc = &dcb_config->tc_config[j];
3673                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3674                                                 (uint8_t)(1 << i);
3675         }
3676 }
3677
3678 /**
3679  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3680  * @dev: pointer to eth_dev structure
3681  * @dcb_config: pointer to ixgbe_dcb_config structure
3682  */
3683 static void
3684 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3685                        struct ixgbe_dcb_config *dcb_config)
3686 {
3687         uint32_t reg;
3688         uint32_t vlanctrl;
3689         uint8_t i;
3690         uint32_t q;
3691         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3692
3693         PMD_INIT_FUNC_TRACE();
3694         /*
3695          * Disable the arbiter before changing parameters
3696          * (always enable recycle mode; WSP)
3697          */
3698         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3699         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3700
3701         if (hw->mac.type != ixgbe_mac_82598EB) {
3702                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3703                 if (dcb_config->num_tcs.pg_tcs == 4) {
3704                         if (dcb_config->vt_mode)
3705                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3706                                         IXGBE_MRQC_VMDQRT4TCEN;
3707                         else {
3708                                 /* no matter the mode is DCB or DCB_RSS, just
3709                                  * set the MRQE to RSSXTCEN. RSS is controlled
3710                                  * by RSS_FIELD
3711                                  */
3712                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3713                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3714                                         IXGBE_MRQC_RTRSS4TCEN;
3715                         }
3716                 }
3717                 if (dcb_config->num_tcs.pg_tcs == 8) {
3718                         if (dcb_config->vt_mode)
3719                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3720                                         IXGBE_MRQC_VMDQRT8TCEN;
3721                         else {
3722                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3723                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3724                                         IXGBE_MRQC_RTRSS8TCEN;
3725                         }
3726                 }
3727
3728                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3729
3730                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3731                         /* Disable drop for all queues in VMDQ mode*/
3732                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3733                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3734                                                 (IXGBE_QDE_WRITE |
3735                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3736                 } else {
3737                         /* Enable drop for all queues in SRIOV mode */
3738                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3739                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3740                                                 (IXGBE_QDE_WRITE |
3741                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3742                                                  IXGBE_QDE_ENABLE));
3743                 }
3744         }
3745
3746         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3747         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3748         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3749         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3750
3751         /* VFTA - enable all vlan filters */
3752         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3753                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3754         }
3755
3756         /*
3757          * Configure Rx packet plane (recycle mode; WSP) and
3758          * enable arbiter
3759          */
3760         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3761         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3762 }
3763
3764 static void
3765 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3766                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3767 {
3768         switch (hw->mac.type) {
3769         case ixgbe_mac_82598EB:
3770                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3771                 break;
3772         case ixgbe_mac_82599EB:
3773         case ixgbe_mac_X540:
3774         case ixgbe_mac_X550:
3775         case ixgbe_mac_X550EM_x:
3776         case ixgbe_mac_X550EM_a:
3777                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3778                                                   tsa, map);
3779                 break;
3780         default:
3781                 break;
3782         }
3783 }
3784
3785 static void
3786 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3787                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3788 {
3789         switch (hw->mac.type) {
3790         case ixgbe_mac_82598EB:
3791                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3792                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3793                 break;
3794         case ixgbe_mac_82599EB:
3795         case ixgbe_mac_X540:
3796         case ixgbe_mac_X550:
3797         case ixgbe_mac_X550EM_x:
3798         case ixgbe_mac_X550EM_a:
3799                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3800                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3801                 break;
3802         default:
3803                 break;
3804         }
3805 }
3806
3807 #define DCB_RX_CONFIG  1
3808 #define DCB_TX_CONFIG  1
3809 #define DCB_TX_PB      1024
3810 /**
3811  * ixgbe_dcb_hw_configure - Enable DCB and configure
3812  * general DCB in VT mode and non-VT mode parameters
3813  * @dev: pointer to rte_eth_dev structure
3814  * @dcb_config: pointer to ixgbe_dcb_config structure
3815  */
3816 static int
3817 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3818                         struct ixgbe_dcb_config *dcb_config)
3819 {
3820         int     ret = 0;
3821         uint8_t i, pfc_en, nb_tcs;
3822         uint16_t pbsize, rx_buffer_size;
3823         uint8_t config_dcb_rx = 0;
3824         uint8_t config_dcb_tx = 0;
3825         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3826         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3827         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3828         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3829         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3830         struct ixgbe_dcb_tc_config *tc;
3831         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3832         struct ixgbe_hw *hw =
3833                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3834         struct ixgbe_bw_conf *bw_conf =
3835                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
3836
3837         switch (dev->data->dev_conf.rxmode.mq_mode) {
3838         case ETH_MQ_RX_VMDQ_DCB:
3839                 dcb_config->vt_mode = true;
3840                 if (hw->mac.type != ixgbe_mac_82598EB) {
3841                         config_dcb_rx = DCB_RX_CONFIG;
3842                         /*
3843                          *get dcb and VT rx configuration parameters
3844                          *from rte_eth_conf
3845                          */
3846                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3847                         /*Configure general VMDQ and DCB RX parameters*/
3848                         ixgbe_vmdq_dcb_configure(dev);
3849                 }
3850                 break;
3851         case ETH_MQ_RX_DCB:
3852         case ETH_MQ_RX_DCB_RSS:
3853                 dcb_config->vt_mode = false;
3854                 config_dcb_rx = DCB_RX_CONFIG;
3855                 /* Get dcb TX configuration parameters from rte_eth_conf */
3856                 ixgbe_dcb_rx_config(dev, dcb_config);
3857                 /*Configure general DCB RX parameters*/
3858                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3859                 break;
3860         default:
3861                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3862                 break;
3863         }
3864         switch (dev->data->dev_conf.txmode.mq_mode) {
3865         case ETH_MQ_TX_VMDQ_DCB:
3866                 dcb_config->vt_mode = true;
3867                 config_dcb_tx = DCB_TX_CONFIG;
3868                 /* get DCB and VT TX configuration parameters
3869                  * from rte_eth_conf
3870                  */
3871                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3872                 /*Configure general VMDQ and DCB TX parameters*/
3873                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3874                 break;
3875
3876         case ETH_MQ_TX_DCB:
3877                 dcb_config->vt_mode = false;
3878                 config_dcb_tx = DCB_TX_CONFIG;
3879                 /*get DCB TX configuration parameters from rte_eth_conf*/
3880                 ixgbe_dcb_tx_config(dev, dcb_config);
3881                 /*Configure general DCB TX parameters*/
3882                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3883                 break;
3884         default:
3885                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3886                 break;
3887         }
3888
3889         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3890         /* Unpack map */
3891         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3892         if (nb_tcs == ETH_4_TCS) {
3893                 /* Avoid un-configured priority mapping to TC0 */
3894                 uint8_t j = 4;
3895                 uint8_t mask = 0xFF;
3896
3897                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3898                         mask = (uint8_t)(mask & (~(1 << map[i])));
3899                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3900                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3901                                 map[j++] = i;
3902                         mask >>= 1;
3903                 }
3904                 /* Re-configure 4 TCs BW */
3905                 for (i = 0; i < nb_tcs; i++) {
3906                         tc = &dcb_config->tc_config[i];
3907                         if (bw_conf->tc_num != nb_tcs)
3908                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3909                                         (uint8_t)(100 / nb_tcs);
3910                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3911                                                 (uint8_t)(100 / nb_tcs);
3912                 }
3913                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3914                         tc = &dcb_config->tc_config[i];
3915                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3916                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3917                 }
3918         } else {
3919                 /* Re-configure 8 TCs BW */
3920                 for (i = 0; i < nb_tcs; i++) {
3921                         tc = &dcb_config->tc_config[i];
3922                         if (bw_conf->tc_num != nb_tcs)
3923                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3924                                         (uint8_t)(100 / nb_tcs + (i & 1));
3925                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3926                                 (uint8_t)(100 / nb_tcs + (i & 1));
3927                 }
3928         }
3929
3930         switch (hw->mac.type) {
3931         case ixgbe_mac_X550:
3932         case ixgbe_mac_X550EM_x:
3933         case ixgbe_mac_X550EM_a:
3934                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3935                 break;
3936         default:
3937                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3938                 break;
3939         }
3940
3941         if (config_dcb_rx) {
3942                 /* Set RX buffer size */
3943                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3944                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3945
3946                 for (i = 0; i < nb_tcs; i++) {
3947                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3948                 }
3949                 /* zero alloc all unused TCs */
3950                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3951                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3952                 }
3953         }
3954         if (config_dcb_tx) {
3955                 /* Only support an equally distributed
3956                  *  Tx packet buffer strategy.
3957                  */
3958                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3959                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3960
3961                 for (i = 0; i < nb_tcs; i++) {
3962                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3963                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3964                 }
3965                 /* Clear unused TCs, if any, to zero buffer size*/
3966                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3967                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3968                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3969                 }
3970         }
3971
3972         /*Calculates traffic class credits*/
3973         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3974                                 IXGBE_DCB_TX_CONFIG);
3975         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3976                                 IXGBE_DCB_RX_CONFIG);
3977
3978         if (config_dcb_rx) {
3979                 /* Unpack CEE standard containers */
3980                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3981                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3982                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3983                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3984                 /* Configure PG(ETS) RX */
3985                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3986         }
3987
3988         if (config_dcb_tx) {
3989                 /* Unpack CEE standard containers */
3990                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3991                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3992                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3993                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3994                 /* Configure PG(ETS) TX */
3995                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3996         }
3997
3998         /*Configure queue statistics registers*/
3999         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4000
4001         /* Check if the PFC is supported */
4002         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4003                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4004                 for (i = 0; i < nb_tcs; i++) {
4005                         /*
4006                         * If the TC count is 8,and the default high_water is 48,
4007                         * the low_water is 16 as default.
4008                         */
4009                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4010                         hw->fc.low_water[i] = pbsize / 4;
4011                         /* Enable pfc for this TC */
4012                         tc = &dcb_config->tc_config[i];
4013                         tc->pfc = ixgbe_dcb_pfc_enabled;
4014                 }
4015                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4016                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4017                         pfc_en &= 0x0F;
4018                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4019         }
4020
4021         return ret;
4022 }
4023
4024 /**
4025  * ixgbe_configure_dcb - Configure DCB  Hardware
4026  * @dev: pointer to rte_eth_dev
4027  */
4028 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4029 {
4030         struct ixgbe_dcb_config *dcb_cfg =
4031                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4032         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4033
4034         PMD_INIT_FUNC_TRACE();
4035
4036         /* check support mq_mode for DCB */
4037         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4038             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4039             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4040                 return;
4041
4042         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4043                 return;
4044
4045         /** Configure DCB hardware **/
4046         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4047 }
4048
4049 /*
4050  * VMDq only support for 10 GbE NIC.
4051  */
4052 static void
4053 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4054 {
4055         struct rte_eth_vmdq_rx_conf *cfg;
4056         struct ixgbe_hw *hw;
4057         enum rte_eth_nb_pools num_pools;
4058         uint32_t mrqc, vt_ctl, vlanctrl;
4059         uint32_t vmolr = 0;
4060         int i;
4061
4062         PMD_INIT_FUNC_TRACE();
4063         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4064         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4065         num_pools = cfg->nb_queue_pools;
4066
4067         ixgbe_rss_disable(dev);
4068
4069         /* MRQC: enable vmdq */
4070         mrqc = IXGBE_MRQC_VMDQEN;
4071         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4072
4073         /* PFVTCTL: turn on virtualisation and set the default pool */
4074         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4075         if (cfg->enable_default_pool)
4076                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4077         else
4078                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4079
4080         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4081
4082         for (i = 0; i < (int)num_pools; i++) {
4083                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4084                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4085         }
4086
4087         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4088         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4089         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4090         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4091
4092         /* VFTA - enable all vlan filters */
4093         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4094                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4095
4096         /* VFRE: pool enabling for receive - 64 */
4097         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4098         if (num_pools == ETH_64_POOLS)
4099                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4100
4101         /*
4102          * MPSAR - allow pools to read specific mac addresses
4103          * In this case, all pools should be able to read from mac addr 0
4104          */
4105         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4106         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4107
4108         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4109         for (i = 0; i < cfg->nb_pool_maps; i++) {
4110                 /* set vlan id in VF register and set the valid bit */
4111                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4112                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4113                 /*
4114                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4115                  * pools, we only need to use the first half of the register
4116                  * i.e. bits 0-31
4117                  */
4118                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4119                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4120                                         (cfg->pool_map[i].pools & UINT32_MAX));
4121                 else
4122                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4123                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4124
4125         }
4126
4127         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4128         if (cfg->enable_loop_back) {
4129                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4130                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4131                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4132         }
4133
4134         IXGBE_WRITE_FLUSH(hw);
4135 }
4136
4137 /*
4138  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4139  * @hw: pointer to hardware structure
4140  */
4141 static void
4142 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4143 {
4144         uint32_t reg;
4145         uint32_t q;
4146
4147         PMD_INIT_FUNC_TRACE();
4148         /*PF VF Transmit Enable*/
4149         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4150         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4151
4152         /* Disable the Tx desc arbiter so that MTQC can be changed */
4153         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4154         reg |= IXGBE_RTTDCS_ARBDIS;
4155         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4156
4157         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4158         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4159
4160         /* Disable drop for all queues */
4161         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4162                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4163                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4164
4165         /* Enable the Tx desc arbiter */
4166         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4167         reg &= ~IXGBE_RTTDCS_ARBDIS;
4168         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4169
4170         IXGBE_WRITE_FLUSH(hw);
4171 }
4172
4173 static int __attribute__((cold))
4174 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4175 {
4176         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4177         uint64_t dma_addr;
4178         unsigned int i;
4179
4180         /* Initialize software ring entries */
4181         for (i = 0; i < rxq->nb_rx_desc; i++) {
4182                 volatile union ixgbe_adv_rx_desc *rxd;
4183                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4184
4185                 if (mbuf == NULL) {
4186                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4187                                      (unsigned) rxq->queue_id);
4188                         return -ENOMEM;
4189                 }
4190
4191                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4192                 mbuf->port = rxq->port_id;
4193
4194                 dma_addr =
4195                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4196                 rxd = &rxq->rx_ring[i];
4197                 rxd->read.hdr_addr = 0;
4198                 rxd->read.pkt_addr = dma_addr;
4199                 rxe[i].mbuf = mbuf;
4200         }
4201
4202         return 0;
4203 }
4204
4205 static int
4206 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4207 {
4208         struct ixgbe_hw *hw;
4209         uint32_t mrqc;
4210
4211         ixgbe_rss_configure(dev);
4212
4213         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4214
4215         /* MRQC: enable VF RSS */
4216         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4217         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4218         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4219         case ETH_64_POOLS:
4220                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4221                 break;
4222
4223         case ETH_32_POOLS:
4224                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4225                 break;
4226
4227         default:
4228                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4229                 return -EINVAL;
4230         }
4231
4232         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4233
4234         return 0;
4235 }
4236
4237 static int
4238 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4239 {
4240         struct ixgbe_hw *hw =
4241                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4242
4243         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4244         case ETH_64_POOLS:
4245                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4246                         IXGBE_MRQC_VMDQEN);
4247                 break;
4248
4249         case ETH_32_POOLS:
4250                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4251                         IXGBE_MRQC_VMDQRT4TCEN);
4252                 break;
4253
4254         case ETH_16_POOLS:
4255                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4256                         IXGBE_MRQC_VMDQRT8TCEN);
4257                 break;
4258         default:
4259                 PMD_INIT_LOG(ERR,
4260                         "invalid pool number in IOV mode");
4261                 break;
4262         }
4263         return 0;
4264 }
4265
4266 static int
4267 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4268 {
4269         struct ixgbe_hw *hw =
4270                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4271
4272         if (hw->mac.type == ixgbe_mac_82598EB)
4273                 return 0;
4274
4275         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4276                 /*
4277                  * SRIOV inactive scheme
4278                  * any DCB/RSS w/o VMDq multi-queue setting
4279                  */
4280                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4281                 case ETH_MQ_RX_RSS:
4282                 case ETH_MQ_RX_DCB_RSS:
4283                 case ETH_MQ_RX_VMDQ_RSS:
4284                         ixgbe_rss_configure(dev);
4285                         break;
4286
4287                 case ETH_MQ_RX_VMDQ_DCB:
4288                         ixgbe_vmdq_dcb_configure(dev);
4289                         break;
4290
4291                 case ETH_MQ_RX_VMDQ_ONLY:
4292                         ixgbe_vmdq_rx_hw_configure(dev);
4293                         break;
4294
4295                 case ETH_MQ_RX_NONE:
4296                 default:
4297                         /* if mq_mode is none, disable rss mode.*/
4298                         ixgbe_rss_disable(dev);
4299                         break;
4300                 }
4301         } else {
4302                 /* SRIOV active scheme
4303                  * Support RSS together with SRIOV.
4304                  */
4305                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4306                 case ETH_MQ_RX_RSS:
4307                 case ETH_MQ_RX_VMDQ_RSS:
4308                         ixgbe_config_vf_rss(dev);
4309                         break;
4310                 case ETH_MQ_RX_VMDQ_DCB:
4311                 case ETH_MQ_RX_DCB:
4312                 /* In SRIOV, the configuration is the same as VMDq case */
4313                         ixgbe_vmdq_dcb_configure(dev);
4314                         break;
4315                 /* DCB/RSS together with SRIOV is not supported */
4316                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4317                 case ETH_MQ_RX_DCB_RSS:
4318                         PMD_INIT_LOG(ERR,
4319                                 "Could not support DCB/RSS with VMDq & SRIOV");
4320                         return -1;
4321                 default:
4322                         ixgbe_config_vf_default(dev);
4323                         break;
4324                 }
4325         }
4326
4327         return 0;
4328 }
4329
4330 static int
4331 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4332 {
4333         struct ixgbe_hw *hw =
4334                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4335         uint32_t mtqc;
4336         uint32_t rttdcs;
4337
4338         if (hw->mac.type == ixgbe_mac_82598EB)
4339                 return 0;
4340
4341         /* disable arbiter before setting MTQC */
4342         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4343         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4344         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4345
4346         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4347                 /*
4348                  * SRIOV inactive scheme
4349                  * any DCB w/o VMDq multi-queue setting
4350                  */
4351                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4352                         ixgbe_vmdq_tx_hw_configure(hw);
4353                 else {
4354                         mtqc = IXGBE_MTQC_64Q_1PB;
4355                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4356                 }
4357         } else {
4358                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4359
4360                 /*
4361                  * SRIOV active scheme
4362                  * FIXME if support DCB together with VMDq & SRIOV
4363                  */
4364                 case ETH_64_POOLS:
4365                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4366                         break;
4367                 case ETH_32_POOLS:
4368                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4369                         break;
4370                 case ETH_16_POOLS:
4371                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4372                                 IXGBE_MTQC_8TC_8TQ;
4373                         break;
4374                 default:
4375                         mtqc = IXGBE_MTQC_64Q_1PB;
4376                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4377                 }
4378                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4379         }
4380
4381         /* re-enable arbiter */
4382         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4383         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4384
4385         return 0;
4386 }
4387
4388 /**
4389  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4390  *
4391  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4392  * spec rev. 3.0 chapter 8.2.3.8.13.
4393  *
4394  * @pool Memory pool of the Rx queue
4395  */
4396 static inline uint32_t
4397 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4398 {
4399         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4400
4401         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4402         uint16_t maxdesc =
4403                 IPV4_MAX_PKT_LEN /
4404                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4405
4406         if (maxdesc >= 16)
4407                 return IXGBE_RSCCTL_MAXDESC_16;
4408         else if (maxdesc >= 8)
4409                 return IXGBE_RSCCTL_MAXDESC_8;
4410         else if (maxdesc >= 4)
4411                 return IXGBE_RSCCTL_MAXDESC_4;
4412         else
4413                 return IXGBE_RSCCTL_MAXDESC_1;
4414 }
4415
4416 /**
4417  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4418  * interrupt
4419  *
4420  * (Taken from FreeBSD tree)
4421  * (yes this is all very magic and confusing :)
4422  *
4423  * @dev port handle
4424  * @entry the register array entry
4425  * @vector the MSIX vector for this queue
4426  * @type RX/TX/MISC
4427  */
4428 static void
4429 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4430 {
4431         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4432         u32 ivar, index;
4433
4434         vector |= IXGBE_IVAR_ALLOC_VAL;
4435
4436         switch (hw->mac.type) {
4437
4438         case ixgbe_mac_82598EB:
4439                 if (type == -1)
4440                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4441                 else
4442                         entry += (type * 64);
4443                 index = (entry >> 2) & 0x1F;
4444                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4445                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4446                 ivar |= (vector << (8 * (entry & 0x3)));
4447                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4448                 break;
4449
4450         case ixgbe_mac_82599EB:
4451         case ixgbe_mac_X540:
4452                 if (type == -1) { /* MISC IVAR */
4453                         index = (entry & 1) * 8;
4454                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4455                         ivar &= ~(0xFF << index);
4456                         ivar |= (vector << index);
4457                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4458                 } else {        /* RX/TX IVARS */
4459                         index = (16 * (entry & 1)) + (8 * type);
4460                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4461                         ivar &= ~(0xFF << index);
4462                         ivar |= (vector << index);
4463                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4464                 }
4465
4466                 break;
4467
4468         default:
4469                 break;
4470         }
4471 }
4472
4473 void __attribute__((cold))
4474 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4475 {
4476         uint16_t i, rx_using_sse;
4477         struct ixgbe_adapter *adapter =
4478                 (struct ixgbe_adapter *)dev->data->dev_private;
4479
4480         /*
4481          * In order to allow Vector Rx there are a few configuration
4482          * conditions to be met and Rx Bulk Allocation should be allowed.
4483          */
4484         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4485             !adapter->rx_bulk_alloc_allowed) {
4486                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4487                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4488                                     "not enabled",
4489                              dev->data->port_id);
4490
4491                 adapter->rx_vec_allowed = false;
4492         }
4493
4494         /*
4495          * Initialize the appropriate LRO callback.
4496          *
4497          * If all queues satisfy the bulk allocation preconditions
4498          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4499          * Otherwise use a single allocation version.
4500          */
4501         if (dev->data->lro) {
4502                 if (adapter->rx_bulk_alloc_allowed) {
4503                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4504                                            "allocation version");
4505                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4506                 } else {
4507                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4508                                            "allocation version");
4509                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4510                 }
4511         } else if (dev->data->scattered_rx) {
4512                 /*
4513                  * Set the non-LRO scattered callback: there are Vector and
4514                  * single allocation versions.
4515                  */
4516                 if (adapter->rx_vec_allowed) {
4517                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4518                                             "callback (port=%d).",
4519                                      dev->data->port_id);
4520
4521                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4522                 } else if (adapter->rx_bulk_alloc_allowed) {
4523                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4524                                            "allocation callback (port=%d).",
4525                                      dev->data->port_id);
4526                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4527                 } else {
4528                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4529                                             "single allocation) "
4530                                             "Scattered Rx callback "
4531                                             "(port=%d).",
4532                                      dev->data->port_id);
4533
4534                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4535                 }
4536         /*
4537          * Below we set "simple" callbacks according to port/queues parameters.
4538          * If parameters allow we are going to choose between the following
4539          * callbacks:
4540          *    - Vector
4541          *    - Bulk Allocation
4542          *    - Single buffer allocation (the simplest one)
4543          */
4544         } else if (adapter->rx_vec_allowed) {
4545                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4546                                     "burst size no less than %d (port=%d).",
4547                              RTE_IXGBE_DESCS_PER_LOOP,
4548                              dev->data->port_id);
4549
4550                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4551         } else if (adapter->rx_bulk_alloc_allowed) {
4552                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4553                                     "satisfied. Rx Burst Bulk Alloc function "
4554                                     "will be used on port=%d.",
4555                              dev->data->port_id);
4556
4557                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4558         } else {
4559                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4560                                     "satisfied, or Scattered Rx is requested "
4561                                     "(port=%d).",
4562                              dev->data->port_id);
4563
4564                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4565         }
4566
4567         /* Propagate information about RX function choice through all queues. */
4568
4569         rx_using_sse =
4570                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4571                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4572
4573         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4574                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4575
4576                 rxq->rx_using_sse = rx_using_sse;
4577 #ifdef RTE_LIBRTE_SECURITY
4578                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4579                                 DEV_RX_OFFLOAD_SECURITY);
4580 #endif
4581         }
4582 }
4583
4584 /**
4585  * ixgbe_set_rsc - configure RSC related port HW registers
4586  *
4587  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4588  * of 82599 Spec (x540 configuration is virtually the same).
4589  *
4590  * @dev port handle
4591  *
4592  * Returns 0 in case of success or a non-zero error code
4593  */
4594 static int
4595 ixgbe_set_rsc(struct rte_eth_dev *dev)
4596 {
4597         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4598         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4599         struct rte_eth_dev_info dev_info = { 0 };
4600         bool rsc_capable = false;
4601         uint16_t i;
4602         uint32_t rdrxctl;
4603         uint32_t rfctl;
4604
4605         /* Sanity check */
4606         dev->dev_ops->dev_infos_get(dev, &dev_info);
4607         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4608                 rsc_capable = true;
4609
4610         if (!rsc_capable && rx_conf->enable_lro) {
4611                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4612                                    "support it");
4613                 return -EINVAL;
4614         }
4615
4616         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4617
4618         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4619                 /*
4620                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4621                  * 3.0 RSC configuration requires HW CRC stripping being
4622                  * enabled. If user requested both HW CRC stripping off
4623                  * and RSC on - return an error.
4624                  */
4625                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4626                                     "is disabled");
4627                 return -EINVAL;
4628         }
4629
4630         /* RFCTL configuration  */
4631         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4632         if ((rsc_capable) && (rx_conf->enable_lro))
4633                 /*
4634                  * Since NFS packets coalescing is not supported - clear
4635                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4636                  * enabled.
4637                  */
4638                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4639                            IXGBE_RFCTL_NFSR_DIS);
4640         else
4641                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4642         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4643
4644         /* If LRO hasn't been requested - we are done here. */
4645         if (!rx_conf->enable_lro)
4646                 return 0;
4647
4648         /* Set RDRXCTL.RSCACKC bit */
4649         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4650         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4651         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4652
4653         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4654         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4655                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4656                 uint32_t srrctl =
4657                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4658                 uint32_t rscctl =
4659                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4660                 uint32_t psrtype =
4661                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4662                 uint32_t eitr =
4663                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4664
4665                 /*
4666                  * ixgbe PMD doesn't support header-split at the moment.
4667                  *
4668                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4669                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4670                  * should be configured even if header split is not
4671                  * enabled. We will configure it 128 bytes following the
4672                  * recommendation in the spec.
4673                  */
4674                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4675                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4676                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4677
4678                 /*
4679                  * TODO: Consider setting the Receive Descriptor Minimum
4680                  * Threshold Size for an RSC case. This is not an obviously
4681                  * beneficiary option but the one worth considering...
4682                  */
4683
4684                 rscctl |= IXGBE_RSCCTL_RSCEN;
4685                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4686                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4687
4688                 /*
4689                  * RSC: Set ITR interval corresponding to 2K ints/s.
4690                  *
4691                  * Full-sized RSC aggregations for a 10Gb/s link will
4692                  * arrive at about 20K aggregation/s rate.
4693                  *
4694                  * 2K inst/s rate will make only 10% of the
4695                  * aggregations to be closed due to the interrupt timer
4696                  * expiration for a streaming at wire-speed case.
4697                  *
4698                  * For a sparse streaming case this setting will yield
4699                  * at most 500us latency for a single RSC aggregation.
4700                  */
4701                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4702                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4703
4704                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4705                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4706                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4707                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4708
4709                 /*
4710                  * RSC requires the mapping of the queue to the
4711                  * interrupt vector.
4712                  */
4713                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4714         }
4715
4716         dev->data->lro = 1;
4717
4718         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4719
4720         return 0;
4721 }
4722
4723 /*
4724  * Initializes Receive Unit.
4725  */
4726 int __attribute__((cold))
4727 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4728 {
4729         struct ixgbe_hw     *hw;
4730         struct ixgbe_rx_queue *rxq;
4731         uint64_t bus_addr;
4732         uint32_t rxctrl;
4733         uint32_t fctrl;
4734         uint32_t hlreg0;
4735         uint32_t maxfrs;
4736         uint32_t srrctl;
4737         uint32_t rdrxctl;
4738         uint32_t rxcsum;
4739         uint16_t buf_size;
4740         uint16_t i;
4741         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4742         int rc;
4743
4744         PMD_INIT_FUNC_TRACE();
4745         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4746
4747         /*
4748          * Make sure receives are disabled while setting
4749          * up the RX context (registers, descriptor rings, etc.).
4750          */
4751         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4752         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4753
4754         /* Enable receipt of broadcasted frames */
4755         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4756         fctrl |= IXGBE_FCTRL_BAM;
4757         fctrl |= IXGBE_FCTRL_DPF;
4758         fctrl |= IXGBE_FCTRL_PMCF;
4759         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4760
4761         /*
4762          * Configure CRC stripping, if any.
4763          */
4764         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4765         if (rx_conf->hw_strip_crc)
4766                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4767         else
4768                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4769
4770         /*
4771          * Configure jumbo frame support, if any.
4772          */
4773         if (rx_conf->jumbo_frame == 1) {
4774                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4775                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4776                 maxfrs &= 0x0000FFFF;
4777                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4778                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4779         } else
4780                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4781
4782         /*
4783          * If loopback mode is configured for 82599, set LPBK bit.
4784          */
4785         if (hw->mac.type == ixgbe_mac_82599EB &&
4786                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4787                 hlreg0 |= IXGBE_HLREG0_LPBK;
4788         else
4789                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4790
4791         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4792
4793         /* Setup RX queues */
4794         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4795                 rxq = dev->data->rx_queues[i];
4796
4797                 /*
4798                  * Reset crc_len in case it was changed after queue setup by a
4799                  * call to configure.
4800                  */
4801                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4802
4803                 /* Setup the Base and Length of the Rx Descriptor Rings */
4804                 bus_addr = rxq->rx_ring_phys_addr;
4805                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4806                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4807                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4808                                 (uint32_t)(bus_addr >> 32));
4809                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4810                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4811                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4812                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4813
4814                 /* Configure the SRRCTL register */
4815 #ifdef RTE_HEADER_SPLIT_ENABLE
4816                 /*
4817                  * Configure Header Split
4818                  */
4819                 if (rx_conf->header_split) {
4820                         if (hw->mac.type == ixgbe_mac_82599EB) {
4821                                 /* Must setup the PSRTYPE register */
4822                                 uint32_t psrtype;
4823
4824                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4825                                         IXGBE_PSRTYPE_UDPHDR   |
4826                                         IXGBE_PSRTYPE_IPV4HDR  |
4827                                         IXGBE_PSRTYPE_IPV6HDR;
4828                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4829                         }
4830                         srrctl = ((rx_conf->split_hdr_size <<
4831                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4832                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4833                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4834                 } else
4835 #endif
4836                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4837
4838                 /* Set if packets are dropped when no descriptors available */
4839                 if (rxq->drop_en)
4840                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4841
4842                 /*
4843                  * Configure the RX buffer size in the BSIZEPACKET field of
4844                  * the SRRCTL register of the queue.
4845                  * The value is in 1 KB resolution. Valid values can be from
4846                  * 1 KB to 16 KB.
4847                  */
4848                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4849                         RTE_PKTMBUF_HEADROOM);
4850                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4851                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4852
4853                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4854
4855                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4856                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4857
4858                 /* It adds dual VLAN length for supporting dual VLAN */
4859                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4860                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4861                         dev->data->scattered_rx = 1;
4862         }
4863
4864         if (rx_conf->enable_scatter)
4865                 dev->data->scattered_rx = 1;
4866
4867         /*
4868          * Device configured with multiple RX queues.
4869          */
4870         ixgbe_dev_mq_rx_configure(dev);
4871
4872         /*
4873          * Setup the Checksum Register.
4874          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4875          * Enable IP/L4 checkum computation by hardware if requested to do so.
4876          */
4877         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4878         rxcsum |= IXGBE_RXCSUM_PCSD;
4879         if (rx_conf->hw_ip_checksum)
4880                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4881         else
4882                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4883
4884         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4885
4886         if (hw->mac.type == ixgbe_mac_82599EB ||
4887             hw->mac.type == ixgbe_mac_X540) {
4888                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4889                 if (rx_conf->hw_strip_crc)
4890                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4891                 else
4892                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4893                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4894                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4895         }
4896
4897         rc = ixgbe_set_rsc(dev);
4898         if (rc)
4899                 return rc;
4900
4901         ixgbe_set_rx_function(dev);
4902
4903         return 0;
4904 }
4905
4906 /*
4907  * Initializes Transmit Unit.
4908  */
4909 void __attribute__((cold))
4910 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4911 {
4912         struct ixgbe_hw     *hw;
4913         struct ixgbe_tx_queue *txq;
4914         uint64_t bus_addr;
4915         uint32_t hlreg0;
4916         uint32_t txctrl;
4917         uint16_t i;
4918
4919         PMD_INIT_FUNC_TRACE();
4920         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4921
4922         /* Enable TX CRC (checksum offload requirement) and hw padding
4923          * (TSO requirement)
4924          */
4925         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4926         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4927         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4928
4929         /* Setup the Base and Length of the Tx Descriptor Rings */
4930         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4931                 txq = dev->data->tx_queues[i];
4932
4933                 bus_addr = txq->tx_ring_phys_addr;
4934                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4935                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4936                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4937                                 (uint32_t)(bus_addr >> 32));
4938                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4939                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4940                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4941                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4942                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4943
4944                 /*
4945                  * Disable Tx Head Writeback RO bit, since this hoses
4946                  * bookkeeping if things aren't delivered in order.
4947                  */
4948                 switch (hw->mac.type) {
4949                 case ixgbe_mac_82598EB:
4950                         txctrl = IXGBE_READ_REG(hw,
4951                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4952                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4953                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4954                                         txctrl);
4955                         break;
4956
4957                 case ixgbe_mac_82599EB:
4958                 case ixgbe_mac_X540:
4959                 case ixgbe_mac_X550:
4960                 case ixgbe_mac_X550EM_x:
4961                 case ixgbe_mac_X550EM_a:
4962                 default:
4963                         txctrl = IXGBE_READ_REG(hw,
4964                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4965                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4966                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4967                                         txctrl);
4968                         break;
4969                 }
4970         }
4971
4972         /* Device configured with multiple TX queues. */
4973         ixgbe_dev_mq_tx_configure(dev);
4974 }
4975
4976 /*
4977  * Set up link for 82599 loopback mode Tx->Rx.
4978  */
4979 static inline void __attribute__((cold))
4980 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4981 {
4982         PMD_INIT_FUNC_TRACE();
4983
4984         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4985                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4986                                 IXGBE_SUCCESS) {
4987                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4988                         /* ignore error */
4989                         return;
4990                 }
4991         }
4992
4993         /* Restart link */
4994         IXGBE_WRITE_REG(hw,
4995                         IXGBE_AUTOC,
4996                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4997         ixgbe_reset_pipeline_82599(hw);
4998
4999         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5000         msec_delay(50);
5001 }
5002
5003
5004 /*
5005  * Start Transmit and Receive Units.
5006  */
5007 int __attribute__((cold))
5008 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5009 {
5010         struct ixgbe_hw     *hw;
5011         struct ixgbe_tx_queue *txq;
5012         struct ixgbe_rx_queue *rxq;
5013         uint32_t txdctl;
5014         uint32_t dmatxctl;
5015         uint32_t rxctrl;
5016         uint16_t i;
5017         int ret = 0;
5018
5019         PMD_INIT_FUNC_TRACE();
5020         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5021
5022         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5023                 txq = dev->data->tx_queues[i];
5024                 /* Setup Transmit Threshold Registers */
5025                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5026                 txdctl |= txq->pthresh & 0x7F;
5027                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5028                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5029                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5030         }
5031
5032         if (hw->mac.type != ixgbe_mac_82598EB) {
5033                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5034                 dmatxctl |= IXGBE_DMATXCTL_TE;
5035                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5036         }
5037
5038         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5039                 txq = dev->data->tx_queues[i];
5040                 if (!txq->tx_deferred_start) {
5041                         ret = ixgbe_dev_tx_queue_start(dev, i);
5042                         if (ret < 0)
5043                                 return ret;
5044                 }
5045         }
5046
5047         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5048                 rxq = dev->data->rx_queues[i];
5049                 if (!rxq->rx_deferred_start) {
5050                         ret = ixgbe_dev_rx_queue_start(dev, i);
5051                         if (ret < 0)
5052                                 return ret;
5053                 }
5054         }
5055
5056         /* Enable Receive engine */
5057         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5058         if (hw->mac.type == ixgbe_mac_82598EB)
5059                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5060         rxctrl |= IXGBE_RXCTRL_RXEN;
5061         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5062
5063         /* If loopback mode is enabled for 82599, set up the link accordingly */
5064         if (hw->mac.type == ixgbe_mac_82599EB &&
5065                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
5066                 ixgbe_setup_loopback_link_82599(hw);
5067
5068 #ifdef RTE_LIBRTE_SECURITY
5069         if ((dev->data->dev_conf.rxmode.offloads &
5070                         DEV_RX_OFFLOAD_SECURITY) ||
5071                 (dev->data->dev_conf.txmode.offloads &
5072                         DEV_TX_OFFLOAD_SECURITY)) {
5073                 ret = ixgbe_crypto_enable_ipsec(dev);
5074                 if (ret != 0) {
5075                         PMD_DRV_LOG(ERR,
5076                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5077                                     ret);
5078                         return ret;
5079                 }
5080         }
5081 #endif
5082
5083         return 0;
5084 }
5085
5086 /*
5087  * Start Receive Units for specified queue.
5088  */
5089 int __attribute__((cold))
5090 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5091 {
5092         struct ixgbe_hw     *hw;
5093         struct ixgbe_rx_queue *rxq;
5094         uint32_t rxdctl;
5095         int poll_ms;
5096
5097         PMD_INIT_FUNC_TRACE();
5098         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5099
5100         if (rx_queue_id < dev->data->nb_rx_queues) {
5101                 rxq = dev->data->rx_queues[rx_queue_id];
5102
5103                 /* Allocate buffers for descriptor rings */
5104                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5105                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5106                                      rx_queue_id);
5107                         return -1;
5108                 }
5109                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5110                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5111                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5112
5113                 /* Wait until RX Enable ready */
5114                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5115                 do {
5116                         rte_delay_ms(1);
5117                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5118                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5119                 if (!poll_ms)
5120                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
5121                                      rx_queue_id);
5122                 rte_wmb();
5123                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5124                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5125                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5126         } else
5127                 return -1;
5128
5129         return 0;
5130 }
5131
5132 /*
5133  * Stop Receive Units for specified queue.
5134  */
5135 int __attribute__((cold))
5136 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5137 {
5138         struct ixgbe_hw     *hw;
5139         struct ixgbe_adapter *adapter =
5140                 (struct ixgbe_adapter *)dev->data->dev_private;
5141         struct ixgbe_rx_queue *rxq;
5142         uint32_t rxdctl;
5143         int poll_ms;
5144
5145         PMD_INIT_FUNC_TRACE();
5146         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5147
5148         if (rx_queue_id < dev->data->nb_rx_queues) {
5149                 rxq = dev->data->rx_queues[rx_queue_id];
5150
5151                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5152                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5153                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5154
5155                 /* Wait until RX Enable bit clear */
5156                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5157                 do {
5158                         rte_delay_ms(1);
5159                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5160                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5161                 if (!poll_ms)
5162                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
5163                                      rx_queue_id);
5164
5165                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5166
5167                 ixgbe_rx_queue_release_mbufs(rxq);
5168                 ixgbe_reset_rx_queue(adapter, rxq);
5169                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5170         } else
5171                 return -1;
5172
5173         return 0;
5174 }
5175
5176
5177 /*
5178  * Start Transmit Units for specified queue.
5179  */
5180 int __attribute__((cold))
5181 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5182 {
5183         struct ixgbe_hw     *hw;
5184         struct ixgbe_tx_queue *txq;
5185         uint32_t txdctl;
5186         int poll_ms;
5187
5188         PMD_INIT_FUNC_TRACE();
5189         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5190
5191         if (tx_queue_id < dev->data->nb_tx_queues) {
5192                 txq = dev->data->tx_queues[tx_queue_id];
5193                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5194                 txdctl |= IXGBE_TXDCTL_ENABLE;
5195                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5196
5197                 /* Wait until TX Enable ready */
5198                 if (hw->mac.type == ixgbe_mac_82599EB) {
5199                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5200                         do {
5201                                 rte_delay_ms(1);
5202                                 txdctl = IXGBE_READ_REG(hw,
5203                                         IXGBE_TXDCTL(txq->reg_idx));
5204                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5205                         if (!poll_ms)
5206                                 PMD_INIT_LOG(ERR, "Could not enable "
5207                                              "Tx Queue %d", tx_queue_id);
5208                 }
5209                 rte_wmb();
5210                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5211                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5212                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5213         } else
5214                 return -1;
5215
5216         return 0;
5217 }
5218
5219 /*
5220  * Stop Transmit Units for specified queue.
5221  */
5222 int __attribute__((cold))
5223 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5224 {
5225         struct ixgbe_hw     *hw;
5226         struct ixgbe_tx_queue *txq;
5227         uint32_t txdctl;
5228         uint32_t txtdh, txtdt;
5229         int poll_ms;
5230
5231         PMD_INIT_FUNC_TRACE();
5232         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5233
5234         if (tx_queue_id >= dev->data->nb_tx_queues)
5235                 return -1;
5236
5237         txq = dev->data->tx_queues[tx_queue_id];
5238
5239         /* Wait until TX queue is empty */
5240         if (hw->mac.type == ixgbe_mac_82599EB) {
5241                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5242                 do {
5243                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5244                         txtdh = IXGBE_READ_REG(hw,
5245                                                IXGBE_TDH(txq->reg_idx));
5246                         txtdt = IXGBE_READ_REG(hw,
5247                                                IXGBE_TDT(txq->reg_idx));
5248                 } while (--poll_ms && (txtdh != txtdt));
5249                 if (!poll_ms)
5250                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5251                                      "when stopping.", tx_queue_id);
5252         }
5253
5254         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5255         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5256         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5257
5258         /* Wait until TX Enable bit clear */
5259         if (hw->mac.type == ixgbe_mac_82599EB) {
5260                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5261                 do {
5262                         rte_delay_ms(1);
5263                         txdctl = IXGBE_READ_REG(hw,
5264                                                 IXGBE_TXDCTL(txq->reg_idx));
5265                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5266                 if (!poll_ms)
5267                         PMD_INIT_LOG(ERR, "Could not disable "
5268                                      "Tx Queue %d", tx_queue_id);
5269         }
5270
5271         if (txq->ops != NULL) {
5272                 txq->ops->release_mbufs(txq);
5273                 txq->ops->reset(txq);
5274         }
5275         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5276
5277         return 0;
5278 }
5279
5280 void
5281 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5282         struct rte_eth_rxq_info *qinfo)
5283 {
5284         struct ixgbe_rx_queue *rxq;
5285
5286         rxq = dev->data->rx_queues[queue_id];
5287
5288         qinfo->mp = rxq->mb_pool;
5289         qinfo->scattered_rx = dev->data->scattered_rx;
5290         qinfo->nb_desc = rxq->nb_rx_desc;
5291
5292         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5293         qinfo->conf.rx_drop_en = rxq->drop_en;
5294         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5295 }
5296
5297 void
5298 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5299         struct rte_eth_txq_info *qinfo)
5300 {
5301         struct ixgbe_tx_queue *txq;
5302
5303         txq = dev->data->tx_queues[queue_id];
5304
5305         qinfo->nb_desc = txq->nb_tx_desc;
5306
5307         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5308         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5309         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5310
5311         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5312         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5313         qinfo->conf.txq_flags = txq->txq_flags;
5314         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5315 }
5316
5317 /*
5318  * [VF] Initializes Receive Unit.
5319  */
5320 int __attribute__((cold))
5321 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5322 {
5323         struct ixgbe_hw     *hw;
5324         struct ixgbe_rx_queue *rxq;
5325         uint64_t bus_addr;
5326         uint32_t srrctl, psrtype = 0;
5327         uint16_t buf_size;
5328         uint16_t i;
5329         int ret;
5330
5331         PMD_INIT_FUNC_TRACE();
5332         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5333
5334         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5335                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5336                         "it should be power of 2");
5337                 return -1;
5338         }
5339
5340         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5341                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5342                         "it should be equal to or less than %d",
5343                         hw->mac.max_rx_queues);
5344                 return -1;
5345         }
5346
5347         /*
5348          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5349          * disables the VF receipt of packets if the PF MTU is > 1500.
5350          * This is done to deal with 82599 limitations that imposes
5351          * the PF and all VFs to share the same MTU.
5352          * Then, the PF driver enables again the VF receipt of packet when
5353          * the VF driver issues a IXGBE_VF_SET_LPE request.
5354          * In the meantime, the VF device cannot be used, even if the VF driver
5355          * and the Guest VM network stack are ready to accept packets with a
5356          * size up to the PF MTU.
5357          * As a work-around to this PF behaviour, force the call to
5358          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5359          * VF packets received can work in all cases.
5360          */
5361         ixgbevf_rlpml_set_vf(hw,
5362                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5363
5364         /* Setup RX queues */
5365         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5366                 rxq = dev->data->rx_queues[i];
5367
5368                 /* Allocate buffers for descriptor rings */
5369                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5370                 if (ret)
5371                         return ret;
5372
5373                 /* Setup the Base and Length of the Rx Descriptor Rings */
5374                 bus_addr = rxq->rx_ring_phys_addr;
5375
5376                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5377                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5378                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5379                                 (uint32_t)(bus_addr >> 32));
5380                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5381                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5382                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5383                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5384
5385
5386                 /* Configure the SRRCTL register */
5387 #ifdef RTE_HEADER_SPLIT_ENABLE
5388                 /*
5389                  * Configure Header Split
5390                  */
5391                 if (dev->data->dev_conf.rxmode.header_split) {
5392                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5393                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5394                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5395                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5396                 } else
5397 #endif
5398                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5399
5400                 /* Set if packets are dropped when no descriptors available */
5401                 if (rxq->drop_en)
5402                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5403
5404                 /*
5405                  * Configure the RX buffer size in the BSIZEPACKET field of
5406                  * the SRRCTL register of the queue.
5407                  * The value is in 1 KB resolution. Valid values can be from
5408                  * 1 KB to 16 KB.
5409                  */
5410                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5411                         RTE_PKTMBUF_HEADROOM);
5412                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5413                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5414
5415                 /*
5416                  * VF modification to write virtual function SRRCTL register
5417                  */
5418                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5419
5420                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5421                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5422
5423                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5424                     /* It adds dual VLAN length for supporting dual VLAN */
5425                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5426                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5427                         if (!dev->data->scattered_rx)
5428                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5429                         dev->data->scattered_rx = 1;
5430                 }
5431         }
5432
5433 #ifdef RTE_HEADER_SPLIT_ENABLE
5434         if (dev->data->dev_conf.rxmode.header_split)
5435                 /* Must setup the PSRTYPE register */
5436                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5437                         IXGBE_PSRTYPE_UDPHDR   |
5438                         IXGBE_PSRTYPE_IPV4HDR  |
5439                         IXGBE_PSRTYPE_IPV6HDR;
5440 #endif
5441
5442         /* Set RQPL for VF RSS according to max Rx queue */
5443         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5444                 IXGBE_PSRTYPE_RQPL_SHIFT;
5445         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5446
5447         ixgbe_set_rx_function(dev);
5448
5449         return 0;
5450 }
5451
5452 /*
5453  * [VF] Initializes Transmit Unit.
5454  */
5455 void __attribute__((cold))
5456 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5457 {
5458         struct ixgbe_hw     *hw;
5459         struct ixgbe_tx_queue *txq;
5460         uint64_t bus_addr;
5461         uint32_t txctrl;
5462         uint16_t i;
5463
5464         PMD_INIT_FUNC_TRACE();
5465         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5466
5467         /* Setup the Base and Length of the Tx Descriptor Rings */
5468         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5469                 txq = dev->data->tx_queues[i];
5470                 bus_addr = txq->tx_ring_phys_addr;
5471                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5472                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5473                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5474                                 (uint32_t)(bus_addr >> 32));
5475                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5476                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5477                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5478                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5479                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5480
5481                 /*
5482                  * Disable Tx Head Writeback RO bit, since this hoses
5483                  * bookkeeping if things aren't delivered in order.
5484                  */
5485                 txctrl = IXGBE_READ_REG(hw,
5486                                 IXGBE_VFDCA_TXCTRL(i));
5487                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5488                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5489                                 txctrl);
5490         }
5491 }
5492
5493 /*
5494  * [VF] Start Transmit and Receive Units.
5495  */
5496 void __attribute__((cold))
5497 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5498 {
5499         struct ixgbe_hw     *hw;
5500         struct ixgbe_tx_queue *txq;
5501         struct ixgbe_rx_queue *rxq;
5502         uint32_t txdctl;
5503         uint32_t rxdctl;
5504         uint16_t i;
5505         int poll_ms;
5506
5507         PMD_INIT_FUNC_TRACE();
5508         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5509
5510         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5511                 txq = dev->data->tx_queues[i];
5512                 /* Setup Transmit Threshold Registers */
5513                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5514                 txdctl |= txq->pthresh & 0x7F;
5515                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5516                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5517                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5518         }
5519
5520         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5521
5522                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5523                 txdctl |= IXGBE_TXDCTL_ENABLE;
5524                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5525
5526                 poll_ms = 10;
5527                 /* Wait until TX Enable ready */
5528                 do {
5529                         rte_delay_ms(1);
5530                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5531                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5532                 if (!poll_ms)
5533                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5534         }
5535         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5536
5537                 rxq = dev->data->rx_queues[i];
5538
5539                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5540                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5541                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5542
5543                 /* Wait until RX Enable ready */
5544                 poll_ms = 10;
5545                 do {
5546                         rte_delay_ms(1);
5547                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5548                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5549                 if (!poll_ms)
5550                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5551                 rte_wmb();
5552                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5553
5554         }
5555 }
5556
5557 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5558 int __attribute__((weak))
5559 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5560 {
5561         return -1;
5562 }
5563
5564 uint16_t __attribute__((weak))
5565 ixgbe_recv_pkts_vec(
5566         void __rte_unused *rx_queue,
5567         struct rte_mbuf __rte_unused **rx_pkts,
5568         uint16_t __rte_unused nb_pkts)
5569 {
5570         return 0;
5571 }
5572
5573 uint16_t __attribute__((weak))
5574 ixgbe_recv_scattered_pkts_vec(
5575         void __rte_unused *rx_queue,
5576         struct rte_mbuf __rte_unused **rx_pkts,
5577         uint16_t __rte_unused nb_pkts)
5578 {
5579         return 0;
5580 }
5581
5582 int __attribute__((weak))
5583 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5584 {
5585         return -1;
5586 }