New upstream version 18.08
[deb_dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
52                 PKT_TX_VLAN_PKT | \
53                 PKT_TX_L4_MASK |  \
54                 PKT_TX_TCP_SEG)
55
56 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
57         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
58
59 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
60
61 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
62 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
63 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
64 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
65 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
66 #endif
67
68 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
69 static void
70 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
71 {
72         uint32_t avail = 0;
73
74         if (rxq == NULL)
75                 return;
76
77         PMD_RX_LOG(DEBUG,
78                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
79                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
80         PMD_RX_LOG(DEBUG,
81                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
82                    (unsigned long)rxq->cmd_ring[0].basePA,
83                    (unsigned long)rxq->cmd_ring[1].basePA,
84                    (unsigned long)rxq->comp_ring.basePA);
85
86         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
87         PMD_RX_LOG(DEBUG,
88                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
89                    (uint32_t)rxq->cmd_ring[0].size, avail,
90                    rxq->comp_ring.next2proc,
91                    rxq->cmd_ring[0].size - avail);
92
93         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
94         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
95                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
96                    rxq->cmd_ring[1].size - avail);
97
98 }
99
100 static void
101 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
102 {
103         uint32_t avail = 0;
104
105         if (txq == NULL)
106                 return;
107
108         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
109                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
110         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
111                    (unsigned long)txq->cmd_ring.basePA,
112                    (unsigned long)txq->comp_ring.basePA,
113                    (unsigned long)txq->data_ring.basePA);
114
115         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
116         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
117                    (uint32_t)txq->cmd_ring.size, avail,
118                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
119 }
120 #endif
121
122 static void
123 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
124 {
125         while (ring->next2comp != ring->next2fill) {
126                 /* No need to worry about desc ownership, device is quiesced by now. */
127                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
128
129                 if (buf_info->m) {
130                         rte_pktmbuf_free(buf_info->m);
131                         buf_info->m = NULL;
132                         buf_info->bufPA = 0;
133                         buf_info->len = 0;
134                 }
135                 vmxnet3_cmd_ring_adv_next2comp(ring);
136         }
137 }
138
139 static void
140 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
141 {
142         uint32_t i;
143
144         for (i = 0; i < ring->size; i++) {
145                 /* No need to worry about desc ownership, device is quiesced by now. */
146                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
147
148                 if (buf_info->m) {
149                         rte_pktmbuf_free_seg(buf_info->m);
150                         buf_info->m = NULL;
151                         buf_info->bufPA = 0;
152                         buf_info->len = 0;
153                 }
154                 vmxnet3_cmd_ring_adv_next2comp(ring);
155         }
156 }
157
158 static void
159 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
160 {
161         rte_free(ring->buf_info);
162         ring->buf_info = NULL;
163 }
164
165 void
166 vmxnet3_dev_tx_queue_release(void *txq)
167 {
168         vmxnet3_tx_queue_t *tq = txq;
169
170         if (tq != NULL) {
171                 /* Release mbufs */
172                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
173                 /* Release the cmd_ring */
174                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
175                 /* Release the memzone */
176                 rte_memzone_free(tq->mz);
177                 /* Release the queue */
178                 rte_free(tq);
179         }
180 }
181
182 void
183 vmxnet3_dev_rx_queue_release(void *rxq)
184 {
185         int i;
186         vmxnet3_rx_queue_t *rq = rxq;
187
188         if (rq != NULL) {
189                 /* Release mbufs */
190                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
191                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
192
193                 /* Release both the cmd_rings */
194                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
195                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
196
197                 /* Release the memzone */
198                 rte_memzone_free(rq->mz);
199
200                 /* Release the queue */
201                 rte_free(rq);
202         }
203 }
204
205 static void
206 vmxnet3_dev_tx_queue_reset(void *txq)
207 {
208         vmxnet3_tx_queue_t *tq = txq;
209         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
210         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
211         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
212         int size;
213
214         if (tq != NULL) {
215                 /* Release the cmd_ring mbufs */
216                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
217         }
218
219         /* Tx vmxnet rings structure initialization*/
220         ring->next2fill = 0;
221         ring->next2comp = 0;
222         ring->gen = VMXNET3_INIT_GEN;
223         comp_ring->next2proc = 0;
224         comp_ring->gen = VMXNET3_INIT_GEN;
225
226         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
227         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
228         size += tq->txdata_desc_size * data_ring->size;
229
230         memset(ring->base, 0, size);
231 }
232
233 static void
234 vmxnet3_dev_rx_queue_reset(void *rxq)
235 {
236         int i;
237         vmxnet3_rx_queue_t *rq = rxq;
238         struct vmxnet3_hw *hw = rq->hw;
239         struct vmxnet3_cmd_ring *ring0, *ring1;
240         struct vmxnet3_comp_ring *comp_ring;
241         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
242         int size;
243
244         /* Release both the cmd_rings mbufs */
245         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
246                 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
247
248         ring0 = &rq->cmd_ring[0];
249         ring1 = &rq->cmd_ring[1];
250         comp_ring = &rq->comp_ring;
251
252         /* Rx vmxnet rings structure initialization */
253         ring0->next2fill = 0;
254         ring1->next2fill = 0;
255         ring0->next2comp = 0;
256         ring1->next2comp = 0;
257         ring0->gen = VMXNET3_INIT_GEN;
258         ring1->gen = VMXNET3_INIT_GEN;
259         comp_ring->next2proc = 0;
260         comp_ring->gen = VMXNET3_INIT_GEN;
261
262         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
263         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
264         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
265                 size += rq->data_desc_size * data_ring->size;
266
267         memset(ring0->base, 0, size);
268 }
269
270 void
271 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
272 {
273         unsigned i;
274
275         PMD_INIT_FUNC_TRACE();
276
277         for (i = 0; i < dev->data->nb_tx_queues; i++) {
278                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
279
280                 if (txq != NULL) {
281                         txq->stopped = TRUE;
282                         vmxnet3_dev_tx_queue_reset(txq);
283                 }
284         }
285
286         for (i = 0; i < dev->data->nb_rx_queues; i++) {
287                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
288
289                 if (rxq != NULL) {
290                         rxq->stopped = TRUE;
291                         vmxnet3_dev_rx_queue_reset(rxq);
292                 }
293         }
294 }
295
296 static int
297 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
298 {
299         int completed = 0;
300         struct rte_mbuf *mbuf;
301
302         /* Release cmd_ring descriptor and free mbuf */
303         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
304
305         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
306         if (mbuf == NULL)
307                 rte_panic("EOP desc does not point to a valid mbuf");
308         rte_pktmbuf_free(mbuf);
309
310         txq->cmd_ring.buf_info[eop_idx].m = NULL;
311
312         while (txq->cmd_ring.next2comp != eop_idx) {
313                 /* no out-of-order completion */
314                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
315                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
316                 completed++;
317         }
318
319         /* Mark the txd for which tcd was generated as completed */
320         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
321
322         return completed + 1;
323 }
324
325 static void
326 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
327 {
328         int completed = 0;
329         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
330         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
331                 (comp_ring->base + comp_ring->next2proc);
332
333         while (tcd->gen == comp_ring->gen) {
334                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
335
336                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
337                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
338                                                     comp_ring->next2proc);
339         }
340
341         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
342 }
343
344 uint16_t
345 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
346         uint16_t nb_pkts)
347 {
348         int32_t ret;
349         uint32_t i;
350         uint64_t ol_flags;
351         struct rte_mbuf *m;
352
353         for (i = 0; i != nb_pkts; i++) {
354                 m = tx_pkts[i];
355                 ol_flags = m->ol_flags;
356
357                 /* Non-TSO packet cannot occupy more than
358                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
359                  */
360                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
361                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
362                         rte_errno = -EINVAL;
363                         return i;
364                 }
365
366                 /* check that only supported TX offloads are requested. */
367                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
368                                 (ol_flags & PKT_TX_L4_MASK) ==
369                                 PKT_TX_SCTP_CKSUM) {
370                         rte_errno = -ENOTSUP;
371                         return i;
372                 }
373
374 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
375                 ret = rte_validate_tx_offload(m);
376                 if (ret != 0) {
377                         rte_errno = ret;
378                         return i;
379                 }
380 #endif
381                 ret = rte_net_intel_cksum_prepare(m);
382                 if (ret != 0) {
383                         rte_errno = ret;
384                         return i;
385                 }
386         }
387
388         return i;
389 }
390
391 uint16_t
392 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
393                   uint16_t nb_pkts)
394 {
395         uint16_t nb_tx;
396         vmxnet3_tx_queue_t *txq = tx_queue;
397         struct vmxnet3_hw *hw = txq->hw;
398         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
399         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
400
401         if (unlikely(txq->stopped)) {
402                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
403                 return 0;
404         }
405
406         /* Free up the comp_descriptors aggressively */
407         vmxnet3_tq_tx_complete(txq);
408
409         nb_tx = 0;
410         while (nb_tx < nb_pkts) {
411                 Vmxnet3_GenericDesc *gdesc;
412                 vmxnet3_buf_info_t *tbi;
413                 uint32_t first2fill, avail, dw2;
414                 struct rte_mbuf *txm = tx_pkts[nb_tx];
415                 struct rte_mbuf *m_seg = txm;
416                 int copy_size = 0;
417                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
418                 /* # of descriptors needed for a packet. */
419                 unsigned count = txm->nb_segs;
420
421                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
422                 if (count > avail) {
423                         /* Is command ring full? */
424                         if (unlikely(avail == 0)) {
425                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
426                                 txq->stats.tx_ring_full++;
427                                 txq->stats.drop_total += (nb_pkts - nb_tx);
428                                 break;
429                         }
430
431                         /* Command ring is not full but cannot handle the
432                          * multi-segmented packet. Let's try the next packet
433                          * in this case.
434                          */
435                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
436                                    "(avail %d needed %d)", avail, count);
437                         txq->stats.drop_total++;
438                         if (tso)
439                                 txq->stats.drop_tso++;
440                         rte_pktmbuf_free(txm);
441                         nb_tx++;
442                         continue;
443                 }
444
445                 /* Drop non-TSO packet that is excessively fragmented */
446                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
447                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
448                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
449                         txq->stats.drop_too_many_segs++;
450                         txq->stats.drop_total++;
451                         rte_pktmbuf_free(txm);
452                         nb_tx++;
453                         continue;
454                 }
455
456                 if (txm->nb_segs == 1 &&
457                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
458                         struct Vmxnet3_TxDataDesc *tdd;
459
460                         /* Skip empty packets */
461                         if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
462                                 txq->stats.drop_total++;
463                                 rte_pktmbuf_free(txm);
464                                 nb_tx++;
465                                 continue;
466                         }
467
468                         tdd = (struct Vmxnet3_TxDataDesc *)
469                                 ((uint8 *)txq->data_ring.base +
470                                  txq->cmd_ring.next2fill *
471                                  txq->txdata_desc_size);
472                         copy_size = rte_pktmbuf_pkt_len(txm);
473                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
474                 }
475
476                 /* use the previous gen bit for the SOP desc */
477                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
478                 first2fill = txq->cmd_ring.next2fill;
479                 do {
480                         /* Remember the transmit buffer for cleanup */
481                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
482
483                         /* NB: the following assumes that VMXNET3 maximum
484                          * transmit buffer size (16K) is greater than
485                          * maximum size of mbuf segment size.
486                          */
487                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
488
489                         /* Skip empty segments */
490                         if (unlikely(m_seg->data_len == 0))
491                                 continue;
492
493                         if (copy_size) {
494                                 uint64 offset =
495                                         (uint64)txq->cmd_ring.next2fill *
496                                                         txq->txdata_desc_size;
497                                 gdesc->txd.addr =
498                                         rte_cpu_to_le_64(txq->data_ring.basePA +
499                                                          offset);
500                         } else {
501                                 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
502                         }
503
504                         gdesc->dword[2] = dw2 | m_seg->data_len;
505                         gdesc->dword[3] = 0;
506
507                         /* move to the next2fill descriptor */
508                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
509
510                         /* use the right gen for non-SOP desc */
511                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
512                 } while ((m_seg = m_seg->next) != NULL);
513
514                 /* set the last buf_info for the pkt */
515                 tbi->m = txm;
516                 /* Update the EOP descriptor */
517                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
518
519                 /* Add VLAN tag if present */
520                 gdesc = txq->cmd_ring.base + first2fill;
521                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
522                         gdesc->txd.ti = 1;
523                         gdesc->txd.tci = txm->vlan_tci;
524                 }
525
526                 if (tso) {
527                         uint16_t mss = txm->tso_segsz;
528
529                         RTE_ASSERT(mss > 0);
530
531                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
532                         gdesc->txd.om = VMXNET3_OM_TSO;
533                         gdesc->txd.msscof = mss;
534
535                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
536                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
537                         gdesc->txd.om = VMXNET3_OM_CSUM;
538                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
539
540                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
541                         case PKT_TX_TCP_CKSUM:
542                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
543                                 break;
544                         case PKT_TX_UDP_CKSUM:
545                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
546                                 break;
547                         default:
548                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
549                                            txm->ol_flags & PKT_TX_L4_MASK);
550                                 abort();
551                         }
552                         deferred++;
553                 } else {
554                         gdesc->txd.hlen = 0;
555                         gdesc->txd.om = VMXNET3_OM_NONE;
556                         gdesc->txd.msscof = 0;
557                         deferred++;
558                 }
559
560                 /* flip the GEN bit on the SOP */
561                 rte_compiler_barrier();
562                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
563
564                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
565                 nb_tx++;
566         }
567
568         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
569
570         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
571                 txq_ctrl->txNumDeferred = 0;
572                 /* Notify vSwitch that packets are available. */
573                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
574                                        txq->cmd_ring.next2fill);
575         }
576
577         return nb_tx;
578 }
579
580 static inline void
581 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
582                    struct rte_mbuf *mbuf)
583 {
584         uint32_t val;
585         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
586         struct Vmxnet3_RxDesc *rxd =
587                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
588         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
589
590         if (ring_id == 0) {
591                 /* Usually: One HEAD type buf per packet
592                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
593                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
594                  */
595
596                 /* We use single packet buffer so all heads here */
597                 val = VMXNET3_RXD_BTYPE_HEAD;
598         } else {
599                 /* All BODY type buffers for 2nd ring */
600                 val = VMXNET3_RXD_BTYPE_BODY;
601         }
602
603         /*
604          * Load mbuf pointer into buf_info[ring_size]
605          * buf_info structure is equivalent to cookie for virtio-virtqueue
606          */
607         buf_info->m = mbuf;
608         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
609         buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
610
611         /* Load Rx Descriptor with the buffer's GPA */
612         rxd->addr = buf_info->bufPA;
613
614         /* After this point rxd->addr MUST not be NULL */
615         rxd->btype = val;
616         rxd->len = buf_info->len;
617         /* Flip gen bit at the end to change ownership */
618         rxd->gen = ring->gen;
619
620         vmxnet3_cmd_ring_adv_next2fill(ring);
621 }
622 /*
623  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
624  *  so that device can receive packets in those buffers.
625  *  Ring layout:
626  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
627  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
628  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
629  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
630  *      only for LRO.
631  */
632 static int
633 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
634 {
635         int err = 0;
636         uint32_t i = 0;
637         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
638
639         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
640                 struct rte_mbuf *mbuf;
641
642                 /* Allocate blank mbuf for the current Rx Descriptor */
643                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
644                 if (unlikely(mbuf == NULL)) {
645                         PMD_RX_LOG(ERR, "Error allocating mbuf");
646                         rxq->stats.rx_buf_alloc_failure++;
647                         err = ENOMEM;
648                         break;
649                 }
650
651                 vmxnet3_renew_desc(rxq, ring_id, mbuf);
652                 i++;
653         }
654
655         /* Return error only if no buffers are posted at present */
656         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
657                 return -err;
658         else
659                 return i;
660 }
661
662 /* MSS not provided by vmxnet3, guess one with available information */
663 static uint16_t
664 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
665                 struct rte_mbuf *rxm)
666 {
667         uint32_t hlen, slen;
668         struct ipv4_hdr *ipv4_hdr;
669         struct ipv6_hdr *ipv6_hdr;
670         struct tcp_hdr *tcp_hdr;
671         char *ptr;
672
673         RTE_ASSERT(rcd->tcp);
674
675         ptr = rte_pktmbuf_mtod(rxm, char *);
676         slen = rte_pktmbuf_data_len(rxm);
677         hlen = sizeof(struct ether_hdr);
678
679         if (rcd->v4) {
680                 if (unlikely(slen < hlen + sizeof(struct ipv4_hdr)))
681                         return hw->mtu - sizeof(struct ipv4_hdr)
682                                         - sizeof(struct tcp_hdr);
683
684                 ipv4_hdr = (struct ipv4_hdr *)(ptr + hlen);
685                 hlen += (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
686                                 IPV4_IHL_MULTIPLIER;
687         } else if (rcd->v6) {
688                 if (unlikely(slen < hlen + sizeof(struct ipv6_hdr)))
689                         return hw->mtu - sizeof(struct ipv6_hdr) -
690                                         sizeof(struct tcp_hdr);
691
692                 ipv6_hdr = (struct ipv6_hdr *)(ptr + hlen);
693                 hlen += sizeof(struct ipv6_hdr);
694                 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
695                         int frag;
696
697                         rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
698                                         &hlen, &frag);
699                 }
700         }
701
702         if (unlikely(slen < hlen + sizeof(struct tcp_hdr)))
703                 return hw->mtu - hlen - sizeof(struct tcp_hdr) +
704                                 sizeof(struct ether_hdr);
705
706         tcp_hdr = (struct tcp_hdr *)(ptr + hlen);
707         hlen += (tcp_hdr->data_off & 0xf0) >> 2;
708
709         if (rxm->udata64 > 1)
710                 return (rte_pktmbuf_pkt_len(rxm) - hlen +
711                                 rxm->udata64 - 1) / rxm->udata64;
712         else
713                 return hw->mtu - hlen + sizeof(struct ether_hdr);
714 }
715
716 /* Receive side checksum and other offloads */
717 static inline void
718 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
719                 struct rte_mbuf *rxm, const uint8_t sop)
720 {
721         uint64_t ol_flags = rxm->ol_flags;
722         uint32_t packet_type = rxm->packet_type;
723
724         /* Offloads set in sop */
725         if (sop) {
726                 /* Set packet type */
727                 packet_type |= RTE_PTYPE_L2_ETHER;
728
729                 /* Check large packet receive */
730                 if (VMXNET3_VERSION_GE_2(hw) &&
731                     rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
732                         const Vmxnet3_RxCompDescExt *rcde =
733                                         (const Vmxnet3_RxCompDescExt *)rcd;
734
735                         rxm->tso_segsz = rcde->mss;
736                         rxm->udata64 = rcde->segCnt;
737                         ol_flags |= PKT_RX_LRO;
738                 }
739         } else { /* Offloads set in eop */
740                 /* Check for RSS */
741                 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
742                         ol_flags |= PKT_RX_RSS_HASH;
743                         rxm->hash.rss = rcd->rssHash;
744                 }
745
746                 /* Check for hardware stripped VLAN tag */
747                 if (rcd->ts) {
748                         ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
749                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
750                 }
751
752                 /* Check packet type, checksum errors, etc. */
753                 if (rcd->cnc) {
754                         ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
755                 } else {
756                         if (rcd->v4) {
757                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
758
759                                 if (rcd->ipc)
760                                         ol_flags |= PKT_RX_IP_CKSUM_GOOD;
761                                 else
762                                         ol_flags |= PKT_RX_IP_CKSUM_BAD;
763
764                                 if (rcd->tuc) {
765                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
766                                         if (rcd->tcp)
767                                                 packet_type |= RTE_PTYPE_L4_TCP;
768                                         else
769                                                 packet_type |= RTE_PTYPE_L4_UDP;
770                                 } else {
771                                         if (rcd->tcp) {
772                                                 packet_type |= RTE_PTYPE_L4_TCP;
773                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
774                                         } else if (rcd->udp) {
775                                                 packet_type |= RTE_PTYPE_L4_UDP;
776                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
777                                         }
778                                 }
779                         } else if (rcd->v6) {
780                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
781
782                                 if (rcd->tuc) {
783                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
784                                         if (rcd->tcp)
785                                                 packet_type |= RTE_PTYPE_L4_TCP;
786                                         else
787                                                 packet_type |= RTE_PTYPE_L4_UDP;
788                                 } else {
789                                         if (rcd->tcp) {
790                                                 packet_type |= RTE_PTYPE_L4_TCP;
791                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
792                                         } else if (rcd->udp) {
793                                                 packet_type |= RTE_PTYPE_L4_UDP;
794                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
795                                         }
796                                 }
797                         } else {
798                                 packet_type |= RTE_PTYPE_UNKNOWN;
799                         }
800
801                         /* Old variants of vmxnet3 do not provide MSS */
802                         if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
803                                 rxm->tso_segsz = vmxnet3_guess_mss(hw,
804                                                 rcd, rxm);
805                 }
806         }
807
808         rxm->ol_flags = ol_flags;
809         rxm->packet_type = packet_type;
810 }
811
812 /*
813  * Process the Rx Completion Ring of given vmxnet3_rx_queue
814  * for nb_pkts burst and return the number of packets received
815  */
816 uint16_t
817 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
818 {
819         uint16_t nb_rx;
820         uint32_t nb_rxd, idx;
821         uint8_t ring_idx;
822         vmxnet3_rx_queue_t *rxq;
823         Vmxnet3_RxCompDesc *rcd;
824         vmxnet3_buf_info_t *rbi;
825         Vmxnet3_RxDesc *rxd;
826         struct rte_mbuf *rxm = NULL;
827         struct vmxnet3_hw *hw;
828
829         nb_rx = 0;
830         ring_idx = 0;
831         nb_rxd = 0;
832         idx = 0;
833
834         rxq = rx_queue;
835         hw = rxq->hw;
836
837         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
838
839         if (unlikely(rxq->stopped)) {
840                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
841                 return 0;
842         }
843
844         while (rcd->gen == rxq->comp_ring.gen) {
845                 struct rte_mbuf *newm;
846
847                 if (nb_rx >= nb_pkts)
848                         break;
849
850                 newm = rte_mbuf_raw_alloc(rxq->mp);
851                 if (unlikely(newm == NULL)) {
852                         PMD_RX_LOG(ERR, "Error allocating mbuf");
853                         rxq->stats.rx_buf_alloc_failure++;
854                         break;
855                 }
856
857                 idx = rcd->rxdIdx;
858                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
859                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
860                 RTE_SET_USED(rxd); /* used only for assert when enabled */
861                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
862
863                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
864
865                 RTE_ASSERT(rcd->len <= rxd->len);
866                 RTE_ASSERT(rbi->m);
867
868                 /* Get the packet buffer pointer from buf_info */
869                 rxm = rbi->m;
870
871                 /* Clear descriptor associated buf_info to be reused */
872                 rbi->m = NULL;
873                 rbi->bufPA = 0;
874
875                 /* Update the index that we received a packet */
876                 rxq->cmd_ring[ring_idx].next2comp = idx;
877
878                 /* For RCD with EOP set, check if there is frame error */
879                 if (unlikely(rcd->eop && rcd->err)) {
880                         rxq->stats.drop_total++;
881                         rxq->stats.drop_err++;
882
883                         if (!rcd->fcs) {
884                                 rxq->stats.drop_fcs++;
885                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
886                         }
887                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
888                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
889                                          rxq->comp_ring.base), rcd->rxdIdx);
890                         rte_pktmbuf_free_seg(rxm);
891                         if (rxq->start_seg) {
892                                 struct rte_mbuf *start = rxq->start_seg;
893
894                                 rxq->start_seg = NULL;
895                                 rte_pktmbuf_free(start);
896                         }
897                         goto rcd_done;
898                 }
899
900                 /* Initialize newly received packet buffer */
901                 rxm->port = rxq->port_id;
902                 rxm->nb_segs = 1;
903                 rxm->next = NULL;
904                 rxm->pkt_len = (uint16_t)rcd->len;
905                 rxm->data_len = (uint16_t)rcd->len;
906                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
907                 rxm->ol_flags = 0;
908                 rxm->vlan_tci = 0;
909                 rxm->packet_type = 0;
910
911                 /*
912                  * If this is the first buffer of the received packet,
913                  * set the pointer to the first mbuf of the packet
914                  * Otherwise, update the total length and the number of segments
915                  * of the current scattered packet, and update the pointer to
916                  * the last mbuf of the current packet.
917                  */
918                 if (rcd->sop) {
919                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
920
921                         if (unlikely(rcd->len == 0)) {
922                                 RTE_ASSERT(rcd->eop);
923
924                                 PMD_RX_LOG(DEBUG,
925                                            "Rx buf was skipped. rxring[%d][%d])",
926                                            ring_idx, idx);
927                                 rte_pktmbuf_free_seg(rxm);
928                                 goto rcd_done;
929                         }
930
931                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
932                                 uint8_t *rdd = rxq->data_ring.base +
933                                         idx * rxq->data_desc_size;
934
935                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
936                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
937                                            rdd, rcd->len);
938                         }
939
940                         rxq->start_seg = rxm;
941                         rxq->last_seg = rxm;
942                         vmxnet3_rx_offload(hw, rcd, rxm, 1);
943                 } else {
944                         struct rte_mbuf *start = rxq->start_seg;
945
946                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
947
948                         if (rxm->data_len) {
949                                 start->pkt_len += rxm->data_len;
950                                 start->nb_segs++;
951
952                                 rxq->last_seg->next = rxm;
953                                 rxq->last_seg = rxm;
954                         } else {
955                                 rte_pktmbuf_free_seg(rxm);
956                         }
957                 }
958
959                 if (rcd->eop) {
960                         struct rte_mbuf *start = rxq->start_seg;
961
962                         vmxnet3_rx_offload(hw, rcd, start, 0);
963                         rx_pkts[nb_rx++] = start;
964                         rxq->start_seg = NULL;
965                 }
966
967 rcd_done:
968                 rxq->cmd_ring[ring_idx].next2comp = idx;
969                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
970                                           rxq->cmd_ring[ring_idx].size);
971
972                 /* It's time to renew descriptors */
973                 vmxnet3_renew_desc(rxq, ring_idx, newm);
974                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
975                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
976                                                rxq->cmd_ring[ring_idx].next2fill);
977                 }
978
979                 /* Advance to the next descriptor in comp_ring */
980                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
981
982                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
983                 nb_rxd++;
984                 if (nb_rxd > rxq->cmd_ring[0].size) {
985                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
986                                    " relinquish control.");
987                         break;
988                 }
989         }
990
991         if (unlikely(nb_rxd == 0)) {
992                 uint32_t avail;
993                 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
994                         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
995                         if (unlikely(avail > 0)) {
996                                 /* try to alloc new buf and renew descriptors */
997                                 vmxnet3_post_rx_bufs(rxq, ring_idx);
998                         }
999                 }
1000                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1001                         for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1002                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1003                                                        rxq->cmd_ring[ring_idx].next2fill);
1004                         }
1005                 }
1006         }
1007
1008         return nb_rx;
1009 }
1010
1011 int
1012 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1013                            uint16_t queue_idx,
1014                            uint16_t nb_desc,
1015                            unsigned int socket_id,
1016                            const struct rte_eth_txconf *tx_conf __rte_unused)
1017 {
1018         struct vmxnet3_hw *hw = dev->data->dev_private;
1019         const struct rte_memzone *mz;
1020         struct vmxnet3_tx_queue *txq;
1021         struct vmxnet3_cmd_ring *ring;
1022         struct vmxnet3_comp_ring *comp_ring;
1023         struct vmxnet3_data_ring *data_ring;
1024         int size;
1025
1026         PMD_INIT_FUNC_TRACE();
1027
1028         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1029                           RTE_CACHE_LINE_SIZE);
1030         if (txq == NULL) {
1031                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1032                 return -ENOMEM;
1033         }
1034
1035         txq->queue_id = queue_idx;
1036         txq->port_id = dev->data->port_id;
1037         txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1038         txq->hw = hw;
1039         txq->qid = queue_idx;
1040         txq->stopped = TRUE;
1041         txq->txdata_desc_size = hw->txdata_desc_size;
1042
1043         ring = &txq->cmd_ring;
1044         comp_ring = &txq->comp_ring;
1045         data_ring = &txq->data_ring;
1046
1047         /* Tx vmxnet ring length should be between 512-4096 */
1048         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1049                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1050                              VMXNET3_DEF_TX_RING_SIZE);
1051                 return -EINVAL;
1052         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1053                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1054                              VMXNET3_TX_RING_MAX_SIZE);
1055                 return -EINVAL;
1056         } else {
1057                 ring->size = nb_desc;
1058                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1059         }
1060         comp_ring->size = data_ring->size = ring->size;
1061
1062         /* Tx vmxnet rings structure initialization*/
1063         ring->next2fill = 0;
1064         ring->next2comp = 0;
1065         ring->gen = VMXNET3_INIT_GEN;
1066         comp_ring->next2proc = 0;
1067         comp_ring->gen = VMXNET3_INIT_GEN;
1068
1069         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1070         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1071         size += txq->txdata_desc_size * data_ring->size;
1072
1073         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1074                                       VMXNET3_RING_BA_ALIGN, socket_id);
1075         if (mz == NULL) {
1076                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1077                 return -ENOMEM;
1078         }
1079         txq->mz = mz;
1080         memset(mz->addr, 0, mz->len);
1081
1082         /* cmd_ring initialization */
1083         ring->base = mz->addr;
1084         ring->basePA = mz->iova;
1085
1086         /* comp_ring initialization */
1087         comp_ring->base = ring->base + ring->size;
1088         comp_ring->basePA = ring->basePA +
1089                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1090
1091         /* data_ring initialization */
1092         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1093         data_ring->basePA = comp_ring->basePA +
1094                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1095
1096         /* cmd_ring0 buf_info allocation */
1097         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1098                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1099         if (ring->buf_info == NULL) {
1100                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1101                 return -ENOMEM;
1102         }
1103
1104         /* Update the data portion with txq */
1105         dev->data->tx_queues[queue_idx] = txq;
1106
1107         return 0;
1108 }
1109
1110 int
1111 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1112                            uint16_t queue_idx,
1113                            uint16_t nb_desc,
1114                            unsigned int socket_id,
1115                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1116                            struct rte_mempool *mp)
1117 {
1118         const struct rte_memzone *mz;
1119         struct vmxnet3_rx_queue *rxq;
1120         struct vmxnet3_hw *hw = dev->data->dev_private;
1121         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1122         struct vmxnet3_comp_ring *comp_ring;
1123         struct vmxnet3_rx_data_ring *data_ring;
1124         int size;
1125         uint8_t i;
1126         char mem_name[32];
1127
1128         PMD_INIT_FUNC_TRACE();
1129
1130         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1131                           RTE_CACHE_LINE_SIZE);
1132         if (rxq == NULL) {
1133                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1134                 return -ENOMEM;
1135         }
1136
1137         rxq->mp = mp;
1138         rxq->queue_id = queue_idx;
1139         rxq->port_id = dev->data->port_id;
1140         rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1141         rxq->hw = hw;
1142         rxq->qid1 = queue_idx;
1143         rxq->qid2 = queue_idx + hw->num_rx_queues;
1144         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1145         rxq->data_desc_size = hw->rxdata_desc_size;
1146         rxq->stopped = TRUE;
1147
1148         ring0 = &rxq->cmd_ring[0];
1149         ring1 = &rxq->cmd_ring[1];
1150         comp_ring = &rxq->comp_ring;
1151         data_ring = &rxq->data_ring;
1152
1153         /* Rx vmxnet rings length should be between 256-4096 */
1154         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1155                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1156                 return -EINVAL;
1157         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1158                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1159                 return -EINVAL;
1160         } else {
1161                 ring0->size = nb_desc;
1162                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1163                 ring1->size = ring0->size;
1164         }
1165
1166         comp_ring->size = ring0->size + ring1->size;
1167         data_ring->size = ring0->size;
1168
1169         /* Rx vmxnet rings structure initialization */
1170         ring0->next2fill = 0;
1171         ring1->next2fill = 0;
1172         ring0->next2comp = 0;
1173         ring1->next2comp = 0;
1174         ring0->gen = VMXNET3_INIT_GEN;
1175         ring1->gen = VMXNET3_INIT_GEN;
1176         comp_ring->next2proc = 0;
1177         comp_ring->gen = VMXNET3_INIT_GEN;
1178
1179         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1180         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1181         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1182                 size += rxq->data_desc_size * data_ring->size;
1183
1184         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1185                                       VMXNET3_RING_BA_ALIGN, socket_id);
1186         if (mz == NULL) {
1187                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1188                 return -ENOMEM;
1189         }
1190         rxq->mz = mz;
1191         memset(mz->addr, 0, mz->len);
1192
1193         /* cmd_ring0 initialization */
1194         ring0->base = mz->addr;
1195         ring0->basePA = mz->iova;
1196
1197         /* cmd_ring1 initialization */
1198         ring1->base = ring0->base + ring0->size;
1199         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1200
1201         /* comp_ring initialization */
1202         comp_ring->base = ring1->base + ring1->size;
1203         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1204                 ring1->size;
1205
1206         /* data_ring initialization */
1207         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1208                 data_ring->base =
1209                         (uint8_t *)(comp_ring->base + comp_ring->size);
1210                 data_ring->basePA = comp_ring->basePA +
1211                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1212         }
1213
1214         /* cmd_ring0-cmd_ring1 buf_info allocation */
1215         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1216
1217                 ring = &rxq->cmd_ring[i];
1218                 ring->rid = i;
1219                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1220
1221                 ring->buf_info = rte_zmalloc(mem_name,
1222                                              ring->size * sizeof(vmxnet3_buf_info_t),
1223                                              RTE_CACHE_LINE_SIZE);
1224                 if (ring->buf_info == NULL) {
1225                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1226                         return -ENOMEM;
1227                 }
1228         }
1229
1230         /* Update the data portion with rxq */
1231         dev->data->rx_queues[queue_idx] = rxq;
1232
1233         return 0;
1234 }
1235
1236 /*
1237  * Initializes Receive Unit
1238  * Load mbufs in rx queue in advance
1239  */
1240 int
1241 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1242 {
1243         struct vmxnet3_hw *hw = dev->data->dev_private;
1244
1245         int i, ret;
1246         uint8_t j;
1247
1248         PMD_INIT_FUNC_TRACE();
1249
1250         for (i = 0; i < hw->num_rx_queues; i++) {
1251                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1252
1253                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1254                         /* Passing 0 as alloc_num will allocate full ring */
1255                         ret = vmxnet3_post_rx_bufs(rxq, j);
1256                         if (ret <= 0) {
1257                                 PMD_INIT_LOG(ERR,
1258                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1259                                              i, j);
1260                                 return -ret;
1261                         }
1262                         /*
1263                          * Updating device with the index:next2fill to fill the
1264                          * mbufs for coming packets.
1265                          */
1266                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1267                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1268                                                        rxq->cmd_ring[j].next2fill);
1269                         }
1270                 }
1271                 rxq->stopped = FALSE;
1272                 rxq->start_seg = NULL;
1273         }
1274
1275         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1276                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1277
1278                 txq->stopped = FALSE;
1279         }
1280
1281         return 0;
1282 }
1283
1284 static uint8_t rss_intel_key[40] = {
1285         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1286         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1287         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1288         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1289         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1290 };
1291
1292 /*
1293  * Configure RSS feature
1294  */
1295 int
1296 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1297 {
1298         struct vmxnet3_hw *hw = dev->data->dev_private;
1299         struct VMXNET3_RSSConf *dev_rss_conf;
1300         struct rte_eth_rss_conf *port_rss_conf;
1301         uint64_t rss_hf;
1302         uint8_t i, j;
1303
1304         PMD_INIT_FUNC_TRACE();
1305
1306         dev_rss_conf = hw->rss_conf;
1307         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1308
1309         /* loading hashFunc */
1310         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1311         /* loading hashKeySize */
1312         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1313         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1314         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1315
1316         if (port_rss_conf->rss_key == NULL) {
1317                 /* Default hash key */
1318                 port_rss_conf->rss_key = rss_intel_key;
1319         }
1320
1321         /* loading hashKey */
1322         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1323                dev_rss_conf->hashKeySize);
1324
1325         /* loading indTable */
1326         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1327                 if (j == dev->data->nb_rx_queues)
1328                         j = 0;
1329                 dev_rss_conf->indTable[i] = j;
1330         }
1331
1332         /* loading hashType */
1333         dev_rss_conf->hashType = 0;
1334         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1335         if (rss_hf & ETH_RSS_IPV4)
1336                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1337         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1338                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1339         if (rss_hf & ETH_RSS_IPV6)
1340                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1341         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1342                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1343
1344         return VMXNET3_SUCCESS;
1345 }