Imported Upstream version 17.05.2
[deb_dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
62 #include <rte_mbuf.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
66 #include <rte_ip.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_net.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 #define VMXNET3_TX_OFFLOAD_MASK ( \
81                 PKT_TX_VLAN_PKT | \
82                 PKT_TX_L4_MASK |  \
83                 PKT_TX_TCP_SEG)
84
85 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
86         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
87
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
89
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 #endif
96
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
98 static void
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
100 {
101         uint32_t avail = 0;
102
103         if (rxq == NULL)
104                 return;
105
106         PMD_RX_LOG(DEBUG,
107                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
109         PMD_RX_LOG(DEBUG,
110                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111                    (unsigned long)rxq->cmd_ring[0].basePA,
112                    (unsigned long)rxq->cmd_ring[1].basePA,
113                    (unsigned long)rxq->comp_ring.basePA);
114
115         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
116         PMD_RX_LOG(DEBUG,
117                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118                    (uint32_t)rxq->cmd_ring[0].size, avail,
119                    rxq->comp_ring.next2proc,
120                    rxq->cmd_ring[0].size - avail);
121
122         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125                    rxq->cmd_ring[1].size - avail);
126
127 }
128
129 static void
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
131 {
132         uint32_t avail = 0;
133
134         if (txq == NULL)
135                 return;
136
137         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140                    (unsigned long)txq->cmd_ring.basePA,
141                    (unsigned long)txq->comp_ring.basePA,
142                    (unsigned long)txq->data_ring.basePA);
143
144         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146                    (uint32_t)txq->cmd_ring.size, avail,
147                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
148 }
149 #endif
150
151 static void
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
153 {
154         while (ring->next2comp != ring->next2fill) {
155                 /* No need to worry about desc ownership, device is quiesced by now. */
156                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
157
158                 if (buf_info->m) {
159                         rte_pktmbuf_free(buf_info->m);
160                         buf_info->m = NULL;
161                         buf_info->bufPA = 0;
162                         buf_info->len = 0;
163                 }
164                 vmxnet3_cmd_ring_adv_next2comp(ring);
165         }
166 }
167
168 static void
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
170 {
171         uint32_t i;
172
173         for (i = 0; i < ring->size; i++) {
174                 /* No need to worry about desc ownership, device is quiesced by now. */
175                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
176
177                 if (buf_info->m) {
178                         rte_pktmbuf_free_seg(buf_info->m);
179                         buf_info->m = NULL;
180                         buf_info->bufPA = 0;
181                         buf_info->len = 0;
182                 }
183                 vmxnet3_cmd_ring_adv_next2comp(ring);
184         }
185 }
186
187 static void
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
189 {
190         rte_free(ring->buf_info);
191         ring->buf_info = NULL;
192 }
193
194 void
195 vmxnet3_dev_tx_queue_release(void *txq)
196 {
197         vmxnet3_tx_queue_t *tq = txq;
198
199         if (tq != NULL) {
200                 /* Release mbufs */
201                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202                 /* Release the cmd_ring */
203                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
204                 /* Release the memzone */
205                 rte_memzone_free(tq->mz);
206         }
207 }
208
209 void
210 vmxnet3_dev_rx_queue_release(void *rxq)
211 {
212         int i;
213         vmxnet3_rx_queue_t *rq = rxq;
214
215         if (rq != NULL) {
216                 /* Release mbufs */
217                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
218                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
219
220                 /* Release both the cmd_rings */
221                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
222                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
223
224                 /* Release the memzone */
225                 rte_memzone_free(rq->mz);
226         }
227 }
228
229 static void
230 vmxnet3_dev_tx_queue_reset(void *txq)
231 {
232         vmxnet3_tx_queue_t *tq = txq;
233         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
234         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
235         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
236         int size;
237
238         if (tq != NULL) {
239                 /* Release the cmd_ring mbufs */
240                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
241         }
242
243         /* Tx vmxnet rings structure initialization*/
244         ring->next2fill = 0;
245         ring->next2comp = 0;
246         ring->gen = VMXNET3_INIT_GEN;
247         comp_ring->next2proc = 0;
248         comp_ring->gen = VMXNET3_INIT_GEN;
249
250         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
251         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
252         size += tq->txdata_desc_size * data_ring->size;
253
254         memset(ring->base, 0, size);
255 }
256
257 static void
258 vmxnet3_dev_rx_queue_reset(void *rxq)
259 {
260         int i;
261         vmxnet3_rx_queue_t *rq = rxq;
262         struct vmxnet3_hw *hw = rq->hw;
263         struct vmxnet3_cmd_ring *ring0, *ring1;
264         struct vmxnet3_comp_ring *comp_ring;
265         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
266         int size;
267
268         if (rq != NULL) {
269                 /* Release both the cmd_rings mbufs */
270                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
271                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
272         }
273
274         ring0 = &rq->cmd_ring[0];
275         ring1 = &rq->cmd_ring[1];
276         comp_ring = &rq->comp_ring;
277
278         /* Rx vmxnet rings structure initialization */
279         ring0->next2fill = 0;
280         ring1->next2fill = 0;
281         ring0->next2comp = 0;
282         ring1->next2comp = 0;
283         ring0->gen = VMXNET3_INIT_GEN;
284         ring1->gen = VMXNET3_INIT_GEN;
285         comp_ring->next2proc = 0;
286         comp_ring->gen = VMXNET3_INIT_GEN;
287
288         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
289         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
290         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
291                 size += rq->data_desc_size * data_ring->size;
292
293         memset(ring0->base, 0, size);
294 }
295
296 void
297 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
298 {
299         unsigned i;
300
301         PMD_INIT_FUNC_TRACE();
302
303         for (i = 0; i < dev->data->nb_tx_queues; i++) {
304                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
305
306                 if (txq != NULL) {
307                         txq->stopped = TRUE;
308                         vmxnet3_dev_tx_queue_reset(txq);
309                 }
310         }
311
312         for (i = 0; i < dev->data->nb_rx_queues; i++) {
313                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
314
315                 if (rxq != NULL) {
316                         rxq->stopped = TRUE;
317                         vmxnet3_dev_rx_queue_reset(rxq);
318                 }
319         }
320 }
321
322 static int
323 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
324 {
325         int completed = 0;
326         struct rte_mbuf *mbuf;
327
328         /* Release cmd_ring descriptor and free mbuf */
329         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
330
331         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
332         if (mbuf == NULL)
333                 rte_panic("EOP desc does not point to a valid mbuf");
334         rte_pktmbuf_free(mbuf);
335
336         txq->cmd_ring.buf_info[eop_idx].m = NULL;
337
338         while (txq->cmd_ring.next2comp != eop_idx) {
339                 /* no out-of-order completion */
340                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
341                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
342                 completed++;
343         }
344
345         /* Mark the txd for which tcd was generated as completed */
346         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
347
348         return completed + 1;
349 }
350
351 static void
352 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
353 {
354         int completed = 0;
355         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
356         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
357                 (comp_ring->base + comp_ring->next2proc);
358
359         while (tcd->gen == comp_ring->gen) {
360                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
361
362                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
363                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
364                                                     comp_ring->next2proc);
365         }
366
367         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
368 }
369
370 uint16_t
371 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
372         uint16_t nb_pkts)
373 {
374         int32_t ret;
375         uint32_t i;
376         uint64_t ol_flags;
377         struct rte_mbuf *m;
378
379         for (i = 0; i != nb_pkts; i++) {
380                 m = tx_pkts[i];
381                 ol_flags = m->ol_flags;
382
383                 /* Non-TSO packet cannot occupy more than
384                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
385                  */
386                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
387                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
388                         rte_errno = -EINVAL;
389                         return i;
390                 }
391
392                 /* check that only supported TX offloads are requested. */
393                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
394                                 (ol_flags & PKT_TX_L4_MASK) ==
395                                 PKT_TX_SCTP_CKSUM) {
396                         rte_errno = -ENOTSUP;
397                         return i;
398                 }
399
400 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
401                 ret = rte_validate_tx_offload(m);
402                 if (ret != 0) {
403                         rte_errno = ret;
404                         return i;
405                 }
406 #endif
407                 ret = rte_net_intel_cksum_prepare(m);
408                 if (ret != 0) {
409                         rte_errno = ret;
410                         return i;
411                 }
412         }
413
414         return i;
415 }
416
417 uint16_t
418 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
419                   uint16_t nb_pkts)
420 {
421         uint16_t nb_tx;
422         vmxnet3_tx_queue_t *txq = tx_queue;
423         struct vmxnet3_hw *hw = txq->hw;
424         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
425         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
426
427         if (unlikely(txq->stopped)) {
428                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
429                 return 0;
430         }
431
432         /* Free up the comp_descriptors aggressively */
433         vmxnet3_tq_tx_complete(txq);
434
435         nb_tx = 0;
436         while (nb_tx < nb_pkts) {
437                 Vmxnet3_GenericDesc *gdesc;
438                 vmxnet3_buf_info_t *tbi;
439                 uint32_t first2fill, avail, dw2;
440                 struct rte_mbuf *txm = tx_pkts[nb_tx];
441                 struct rte_mbuf *m_seg = txm;
442                 int copy_size = 0;
443                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
444                 /* # of descriptors needed for a packet. */
445                 unsigned count = txm->nb_segs;
446
447                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
448                 if (count > avail) {
449                         /* Is command ring full? */
450                         if (unlikely(avail == 0)) {
451                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
452                                 txq->stats.tx_ring_full++;
453                                 txq->stats.drop_total += (nb_pkts - nb_tx);
454                                 break;
455                         }
456
457                         /* Command ring is not full but cannot handle the
458                          * multi-segmented packet. Let's try the next packet
459                          * in this case.
460                          */
461                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
462                                    "(avail %d needed %d)", avail, count);
463                         txq->stats.drop_total++;
464                         if (tso)
465                                 txq->stats.drop_tso++;
466                         rte_pktmbuf_free(txm);
467                         nb_tx++;
468                         continue;
469                 }
470
471                 /* Drop non-TSO packet that is excessively fragmented */
472                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
473                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
474                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
475                         txq->stats.drop_too_many_segs++;
476                         txq->stats.drop_total++;
477                         rte_pktmbuf_free(txm);
478                         nb_tx++;
479                         continue;
480                 }
481
482                 if (txm->nb_segs == 1 &&
483                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
484                         struct Vmxnet3_TxDataDesc *tdd;
485
486                         tdd = (struct Vmxnet3_TxDataDesc *)
487                                 ((uint8 *)txq->data_ring.base +
488                                  txq->cmd_ring.next2fill *
489                                  txq->txdata_desc_size);
490                         copy_size = rte_pktmbuf_pkt_len(txm);
491                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
492                 }
493
494                 /* use the previous gen bit for the SOP desc */
495                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
496                 first2fill = txq->cmd_ring.next2fill;
497                 do {
498                         /* Remember the transmit buffer for cleanup */
499                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
500
501                         /* NB: the following assumes that VMXNET3 maximum
502                          * transmit buffer size (16K) is greater than
503                          * maximum size of mbuf segment size.
504                          */
505                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
506                         if (copy_size) {
507                                 uint64 offset = txq->cmd_ring.next2fill *
508                                                 txq->txdata_desc_size;
509                                 gdesc->txd.addr =
510                                         rte_cpu_to_le_64(txq->data_ring.basePA +
511                                                          offset);
512                         } else {
513                                 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
514                         }
515
516                         gdesc->dword[2] = dw2 | m_seg->data_len;
517                         gdesc->dword[3] = 0;
518
519                         /* move to the next2fill descriptor */
520                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
521
522                         /* use the right gen for non-SOP desc */
523                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
524                 } while ((m_seg = m_seg->next) != NULL);
525
526                 /* set the last buf_info for the pkt */
527                 tbi->m = txm;
528                 /* Update the EOP descriptor */
529                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
530
531                 /* Add VLAN tag if present */
532                 gdesc = txq->cmd_ring.base + first2fill;
533                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
534                         gdesc->txd.ti = 1;
535                         gdesc->txd.tci = txm->vlan_tci;
536                 }
537
538                 if (tso) {
539                         uint16_t mss = txm->tso_segsz;
540
541                         RTE_ASSERT(mss > 0);
542
543                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
544                         gdesc->txd.om = VMXNET3_OM_TSO;
545                         gdesc->txd.msscof = mss;
546
547                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
548                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
549                         gdesc->txd.om = VMXNET3_OM_CSUM;
550                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
551
552                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
553                         case PKT_TX_TCP_CKSUM:
554                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
555                                 break;
556                         case PKT_TX_UDP_CKSUM:
557                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
558                                 break;
559                         default:
560                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
561                                            txm->ol_flags & PKT_TX_L4_MASK);
562                                 abort();
563                         }
564                         deferred++;
565                 } else {
566                         gdesc->txd.hlen = 0;
567                         gdesc->txd.om = VMXNET3_OM_NONE;
568                         gdesc->txd.msscof = 0;
569                         deferred++;
570                 }
571
572                 /* flip the GEN bit on the SOP */
573                 rte_compiler_barrier();
574                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
575
576                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
577                 nb_tx++;
578         }
579
580         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
581
582         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
583                 txq_ctrl->txNumDeferred = 0;
584                 /* Notify vSwitch that packets are available. */
585                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
586                                        txq->cmd_ring.next2fill);
587         }
588
589         return nb_tx;
590 }
591
592 static inline void
593 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
594                    struct rte_mbuf *mbuf)
595 {
596         uint32_t val = 0;
597         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
598         struct Vmxnet3_RxDesc *rxd =
599                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
600         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
601
602         if (ring_id == 0)
603                 val = VMXNET3_RXD_BTYPE_HEAD;
604         else
605                 val = VMXNET3_RXD_BTYPE_BODY;
606
607         buf_info->m = mbuf;
608         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
609         buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
610
611         rxd->addr = buf_info->bufPA;
612         rxd->btype = val;
613         rxd->len = buf_info->len;
614         rxd->gen = ring->gen;
615
616         vmxnet3_cmd_ring_adv_next2fill(ring);
617 }
618 /*
619  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
620  *  so that device can receive packets in those buffers.
621  *  Ring layout:
622  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
623  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
624  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
625  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
626  *      only for LRO.
627  */
628 static int
629 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
630 {
631         int err = 0;
632         uint32_t i = 0, val = 0;
633         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
634
635         if (ring_id == 0) {
636                 /* Usually: One HEAD type buf per packet
637                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
638                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
639                  */
640
641                 /* We use single packet buffer so all heads here */
642                 val = VMXNET3_RXD_BTYPE_HEAD;
643         } else {
644                 /* All BODY type buffers for 2nd ring */
645                 val = VMXNET3_RXD_BTYPE_BODY;
646         }
647
648         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
649                 struct Vmxnet3_RxDesc *rxd;
650                 struct rte_mbuf *mbuf;
651                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
652
653                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
654
655                 /* Allocate blank mbuf for the current Rx Descriptor */
656                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
657                 if (unlikely(mbuf == NULL)) {
658                         PMD_RX_LOG(ERR, "Error allocating mbuf");
659                         rxq->stats.rx_buf_alloc_failure++;
660                         err = ENOMEM;
661                         break;
662                 }
663
664                 /*
665                  * Load mbuf pointer into buf_info[ring_size]
666                  * buf_info structure is equivalent to cookie for virtio-virtqueue
667                  */
668                 buf_info->m = mbuf;
669                 buf_info->len = (uint16_t)(mbuf->buf_len -
670                                            RTE_PKTMBUF_HEADROOM);
671                 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
672
673                 /* Load Rx Descriptor with the buffer's GPA */
674                 rxd->addr = buf_info->bufPA;
675
676                 /* After this point rxd->addr MUST not be NULL */
677                 rxd->btype = val;
678                 rxd->len = buf_info->len;
679                 /* Flip gen bit at the end to change ownership */
680                 rxd->gen = ring->gen;
681
682                 vmxnet3_cmd_ring_adv_next2fill(ring);
683                 i++;
684         }
685
686         /* Return error only if no buffers are posted at present */
687         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
688                 return -err;
689         else
690                 return i;
691 }
692
693
694 /* Receive side checksum and other offloads */
695 static void
696 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
697 {
698         /* Check for RSS */
699         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
700                 rxm->ol_flags |= PKT_RX_RSS_HASH;
701                 rxm->hash.rss = rcd->rssHash;
702         }
703
704         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
705         if (rcd->v4) {
706                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
707                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
708
709                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
710                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
711                 else
712                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
713
714                 if (!rcd->cnc) {
715                         if (!rcd->ipc)
716                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
717
718                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
719                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
720                 }
721         }
722 }
723
724 /*
725  * Process the Rx Completion Ring of given vmxnet3_rx_queue
726  * for nb_pkts burst and return the number of packets received
727  */
728 uint16_t
729 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
730 {
731         uint16_t nb_rx;
732         uint32_t nb_rxd, idx;
733         uint8_t ring_idx;
734         vmxnet3_rx_queue_t *rxq;
735         Vmxnet3_RxCompDesc *rcd;
736         vmxnet3_buf_info_t *rbi;
737         Vmxnet3_RxDesc *rxd;
738         struct rte_mbuf *rxm = NULL;
739         struct vmxnet3_hw *hw;
740
741         nb_rx = 0;
742         ring_idx = 0;
743         nb_rxd = 0;
744         idx = 0;
745
746         rxq = rx_queue;
747         hw = rxq->hw;
748
749         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
750
751         if (unlikely(rxq->stopped)) {
752                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
753                 return 0;
754         }
755
756         while (rcd->gen == rxq->comp_ring.gen) {
757                 struct rte_mbuf *newm;
758
759                 if (nb_rx >= nb_pkts)
760                         break;
761
762                 newm = rte_mbuf_raw_alloc(rxq->mp);
763                 if (unlikely(newm == NULL)) {
764                         PMD_RX_LOG(ERR, "Error allocating mbuf");
765                         rxq->stats.rx_buf_alloc_failure++;
766                         break;
767                 }
768
769                 idx = rcd->rxdIdx;
770                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
771                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
772                 RTE_SET_USED(rxd); /* used only for assert when enabled */
773                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
774
775                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
776
777                 RTE_ASSERT(rcd->len <= rxd->len);
778                 RTE_ASSERT(rbi->m);
779
780                 /* Get the packet buffer pointer from buf_info */
781                 rxm = rbi->m;
782
783                 /* Clear descriptor associated buf_info to be reused */
784                 rbi->m = NULL;
785                 rbi->bufPA = 0;
786
787                 /* Update the index that we received a packet */
788                 rxq->cmd_ring[ring_idx].next2comp = idx;
789
790                 /* For RCD with EOP set, check if there is frame error */
791                 if (unlikely(rcd->eop && rcd->err)) {
792                         rxq->stats.drop_total++;
793                         rxq->stats.drop_err++;
794
795                         if (!rcd->fcs) {
796                                 rxq->stats.drop_fcs++;
797                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
798                         }
799                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
800                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
801                                          rxq->comp_ring.base), rcd->rxdIdx);
802                         rte_pktmbuf_free_seg(rxm);
803                         if (rxq->start_seg) {
804                                 struct rte_mbuf *start = rxq->start_seg;
805
806                                 rxq->start_seg = NULL;
807                                 rte_pktmbuf_free(start);
808                         }
809                         goto rcd_done;
810                 }
811
812                 /* Initialize newly received packet buffer */
813                 rxm->port = rxq->port_id;
814                 rxm->nb_segs = 1;
815                 rxm->next = NULL;
816                 rxm->pkt_len = (uint16_t)rcd->len;
817                 rxm->data_len = (uint16_t)rcd->len;
818                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
819                 rxm->ol_flags = 0;
820                 rxm->vlan_tci = 0;
821
822                 /*
823                  * If this is the first buffer of the received packet,
824                  * set the pointer to the first mbuf of the packet
825                  * Otherwise, update the total length and the number of segments
826                  * of the current scattered packet, and update the pointer to
827                  * the last mbuf of the current packet.
828                  */
829                 if (rcd->sop) {
830                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
831
832                         if (unlikely(rcd->len == 0)) {
833                                 RTE_ASSERT(rcd->eop);
834
835                                 PMD_RX_LOG(DEBUG,
836                                            "Rx buf was skipped. rxring[%d][%d])",
837                                            ring_idx, idx);
838                                 rte_pktmbuf_free_seg(rxm);
839                                 goto rcd_done;
840                         }
841
842                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
843                                 uint8_t *rdd = rxq->data_ring.base +
844                                         idx * rxq->data_desc_size;
845
846                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
847                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
848                                            rdd, rcd->len);
849                         }
850
851                         rxq->start_seg = rxm;
852                         vmxnet3_rx_offload(rcd, rxm);
853                 } else {
854                         struct rte_mbuf *start = rxq->start_seg;
855
856                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
857
858                         start->pkt_len += rxm->data_len;
859                         start->nb_segs++;
860
861                         rxq->last_seg->next = rxm;
862                 }
863                 rxq->last_seg = rxm;
864
865                 if (rcd->eop) {
866                         struct rte_mbuf *start = rxq->start_seg;
867
868                         /* Check for hardware stripped VLAN tag */
869                         if (rcd->ts) {
870                                 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
871                                 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
872                         }
873
874                         rx_pkts[nb_rx++] = start;
875                         rxq->start_seg = NULL;
876                 }
877
878 rcd_done:
879                 rxq->cmd_ring[ring_idx].next2comp = idx;
880                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
881                                           rxq->cmd_ring[ring_idx].size);
882
883                 /* It's time to renew descriptors */
884                 vmxnet3_renew_desc(rxq, ring_idx, newm);
885                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
886                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
887                                                rxq->cmd_ring[ring_idx].next2fill);
888                 }
889
890                 /* Advance to the next descriptor in comp_ring */
891                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
892
893                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
894                 nb_rxd++;
895                 if (nb_rxd > rxq->cmd_ring[0].size) {
896                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
897                                    " relinquish control.");
898                         break;
899                 }
900         }
901
902         return nb_rx;
903 }
904
905 int
906 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
907                            uint16_t queue_idx,
908                            uint16_t nb_desc,
909                            unsigned int socket_id,
910                            __rte_unused const struct rte_eth_txconf *tx_conf)
911 {
912         struct vmxnet3_hw *hw = dev->data->dev_private;
913         const struct rte_memzone *mz;
914         struct vmxnet3_tx_queue *txq;
915         struct vmxnet3_cmd_ring *ring;
916         struct vmxnet3_comp_ring *comp_ring;
917         struct vmxnet3_data_ring *data_ring;
918         int size;
919
920         PMD_INIT_FUNC_TRACE();
921
922         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
923             ETH_TXQ_FLAGS_NOXSUMSCTP) {
924                 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
925                 return -EINVAL;
926         }
927
928         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
929                           RTE_CACHE_LINE_SIZE);
930         if (txq == NULL) {
931                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
932                 return -ENOMEM;
933         }
934
935         txq->queue_id = queue_idx;
936         txq->port_id = dev->data->port_id;
937         txq->shared = &hw->tqd_start[queue_idx];
938         txq->hw = hw;
939         txq->qid = queue_idx;
940         txq->stopped = TRUE;
941         txq->txdata_desc_size = hw->txdata_desc_size;
942
943         ring = &txq->cmd_ring;
944         comp_ring = &txq->comp_ring;
945         data_ring = &txq->data_ring;
946
947         /* Tx vmxnet ring length should be between 512-4096 */
948         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
949                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
950                              VMXNET3_DEF_TX_RING_SIZE);
951                 return -EINVAL;
952         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
953                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
954                              VMXNET3_TX_RING_MAX_SIZE);
955                 return -EINVAL;
956         } else {
957                 ring->size = nb_desc;
958                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
959         }
960         comp_ring->size = data_ring->size = ring->size;
961
962         /* Tx vmxnet rings structure initialization*/
963         ring->next2fill = 0;
964         ring->next2comp = 0;
965         ring->gen = VMXNET3_INIT_GEN;
966         comp_ring->next2proc = 0;
967         comp_ring->gen = VMXNET3_INIT_GEN;
968
969         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
970         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
971         size += txq->txdata_desc_size * data_ring->size;
972
973         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
974                                       VMXNET3_RING_BA_ALIGN, socket_id);
975         if (mz == NULL) {
976                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
977                 return -ENOMEM;
978         }
979         txq->mz = mz;
980         memset(mz->addr, 0, mz->len);
981
982         /* cmd_ring initialization */
983         ring->base = mz->addr;
984         ring->basePA = mz->phys_addr;
985
986         /* comp_ring initialization */
987         comp_ring->base = ring->base + ring->size;
988         comp_ring->basePA = ring->basePA +
989                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
990
991         /* data_ring initialization */
992         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
993         data_ring->basePA = comp_ring->basePA +
994                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
995
996         /* cmd_ring0 buf_info allocation */
997         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
998                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
999         if (ring->buf_info == NULL) {
1000                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1001                 return -ENOMEM;
1002         }
1003
1004         /* Update the data portion with txq */
1005         dev->data->tx_queues[queue_idx] = txq;
1006
1007         return 0;
1008 }
1009
1010 int
1011 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1012                            uint16_t queue_idx,
1013                            uint16_t nb_desc,
1014                            unsigned int socket_id,
1015                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1016                            struct rte_mempool *mp)
1017 {
1018         const struct rte_memzone *mz;
1019         struct vmxnet3_rx_queue *rxq;
1020         struct vmxnet3_hw *hw = dev->data->dev_private;
1021         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1022         struct vmxnet3_comp_ring *comp_ring;
1023         struct vmxnet3_rx_data_ring *data_ring;
1024         int size;
1025         uint8_t i;
1026         char mem_name[32];
1027
1028         PMD_INIT_FUNC_TRACE();
1029
1030         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1031                           RTE_CACHE_LINE_SIZE);
1032         if (rxq == NULL) {
1033                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1034                 return -ENOMEM;
1035         }
1036
1037         rxq->mp = mp;
1038         rxq->queue_id = queue_idx;
1039         rxq->port_id = dev->data->port_id;
1040         rxq->shared = &hw->rqd_start[queue_idx];
1041         rxq->hw = hw;
1042         rxq->qid1 = queue_idx;
1043         rxq->qid2 = queue_idx + hw->num_rx_queues;
1044         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1045         rxq->data_desc_size = hw->rxdata_desc_size;
1046         rxq->stopped = TRUE;
1047
1048         ring0 = &rxq->cmd_ring[0];
1049         ring1 = &rxq->cmd_ring[1];
1050         comp_ring = &rxq->comp_ring;
1051         data_ring = &rxq->data_ring;
1052
1053         /* Rx vmxnet rings length should be between 256-4096 */
1054         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1055                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1056                 return -EINVAL;
1057         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1058                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1059                 return -EINVAL;
1060         } else {
1061                 ring0->size = nb_desc;
1062                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1063                 ring1->size = ring0->size;
1064         }
1065
1066         comp_ring->size = ring0->size + ring1->size;
1067         data_ring->size = ring0->size;
1068
1069         /* Rx vmxnet rings structure initialization */
1070         ring0->next2fill = 0;
1071         ring1->next2fill = 0;
1072         ring0->next2comp = 0;
1073         ring1->next2comp = 0;
1074         ring0->gen = VMXNET3_INIT_GEN;
1075         ring1->gen = VMXNET3_INIT_GEN;
1076         comp_ring->next2proc = 0;
1077         comp_ring->gen = VMXNET3_INIT_GEN;
1078
1079         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1080         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1081         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1082                 size += rxq->data_desc_size * data_ring->size;
1083
1084         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1085                                       VMXNET3_RING_BA_ALIGN, socket_id);
1086         if (mz == NULL) {
1087                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1088                 return -ENOMEM;
1089         }
1090         rxq->mz = mz;
1091         memset(mz->addr, 0, mz->len);
1092
1093         /* cmd_ring0 initialization */
1094         ring0->base = mz->addr;
1095         ring0->basePA = mz->phys_addr;
1096
1097         /* cmd_ring1 initialization */
1098         ring1->base = ring0->base + ring0->size;
1099         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1100
1101         /* comp_ring initialization */
1102         comp_ring->base = ring1->base + ring1->size;
1103         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1104                 ring1->size;
1105
1106         /* data_ring initialization */
1107         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1108                 data_ring->base =
1109                         (uint8_t *)(comp_ring->base + comp_ring->size);
1110                 data_ring->basePA = comp_ring->basePA +
1111                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1112         }
1113
1114         /* cmd_ring0-cmd_ring1 buf_info allocation */
1115         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1116
1117                 ring = &rxq->cmd_ring[i];
1118                 ring->rid = i;
1119                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1120
1121                 ring->buf_info = rte_zmalloc(mem_name,
1122                                              ring->size * sizeof(vmxnet3_buf_info_t),
1123                                              RTE_CACHE_LINE_SIZE);
1124                 if (ring->buf_info == NULL) {
1125                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1126                         return -ENOMEM;
1127                 }
1128         }
1129
1130         /* Update the data portion with rxq */
1131         dev->data->rx_queues[queue_idx] = rxq;
1132
1133         return 0;
1134 }
1135
1136 /*
1137  * Initializes Receive Unit
1138  * Load mbufs in rx queue in advance
1139  */
1140 int
1141 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1142 {
1143         struct vmxnet3_hw *hw = dev->data->dev_private;
1144
1145         int i, ret;
1146         uint8_t j;
1147
1148         PMD_INIT_FUNC_TRACE();
1149
1150         for (i = 0; i < hw->num_rx_queues; i++) {
1151                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1152
1153                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1154                         /* Passing 0 as alloc_num will allocate full ring */
1155                         ret = vmxnet3_post_rx_bufs(rxq, j);
1156                         if (ret <= 0) {
1157                                 PMD_INIT_LOG(ERR,
1158                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1159                                              i, j);
1160                                 return -ret;
1161                         }
1162                         /*
1163                          * Updating device with the index:next2fill to fill the
1164                          * mbufs for coming packets.
1165                          */
1166                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1167                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1168                                                        rxq->cmd_ring[j].next2fill);
1169                         }
1170                 }
1171                 rxq->stopped = FALSE;
1172                 rxq->start_seg = NULL;
1173         }
1174
1175         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1176                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1177
1178                 txq->stopped = FALSE;
1179         }
1180
1181         return 0;
1182 }
1183
1184 static uint8_t rss_intel_key[40] = {
1185         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1186         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1187         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1188         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1189         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1190 };
1191
1192 /*
1193  * Configure RSS feature
1194  */
1195 int
1196 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1197 {
1198         struct vmxnet3_hw *hw = dev->data->dev_private;
1199         struct VMXNET3_RSSConf *dev_rss_conf;
1200         struct rte_eth_rss_conf *port_rss_conf;
1201         uint64_t rss_hf;
1202         uint8_t i, j;
1203
1204         PMD_INIT_FUNC_TRACE();
1205
1206         dev_rss_conf = hw->rss_conf;
1207         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1208
1209         /* loading hashFunc */
1210         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1211         /* loading hashKeySize */
1212         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1213         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1214         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1215
1216         if (port_rss_conf->rss_key == NULL) {
1217                 /* Default hash key */
1218                 port_rss_conf->rss_key = rss_intel_key;
1219         }
1220
1221         /* loading hashKey */
1222         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1223                dev_rss_conf->hashKeySize);
1224
1225         /* loading indTable */
1226         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1227                 if (j == dev->data->nb_rx_queues)
1228                         j = 0;
1229                 dev_rss_conf->indTable[i] = j;
1230         }
1231
1232         /* loading hashType */
1233         dev_rss_conf->hashType = 0;
1234         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1235         if (rss_hf & ETH_RSS_IPV4)
1236                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1237         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1238                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1239         if (rss_hf & ETH_RSS_IPV6)
1240                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1241         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1242                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1243
1244         return VMXNET3_SUCCESS;
1245 }