Imported Upstream version 16.04
[deb_dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_ring.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_ip.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
81
82 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
83 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
84 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
85 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
86 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
87 #endif
88
89 static struct rte_mbuf *
90 rte_rxmbuf_alloc(struct rte_mempool *mp)
91 {
92         struct rte_mbuf *m;
93
94         m = __rte_mbuf_raw_alloc(mp);
95         __rte_mbuf_sanity_check_raw(m, 0);
96         return m;
97 }
98
99 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
100 static void
101 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
102 {
103         uint32_t avail = 0;
104
105         if (rxq == NULL)
106                 return;
107
108         PMD_RX_LOG(DEBUG,
109                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
110                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
111         PMD_RX_LOG(DEBUG,
112                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
113                    (unsigned long)rxq->cmd_ring[0].basePA,
114                    (unsigned long)rxq->cmd_ring[1].basePA,
115                    (unsigned long)rxq->comp_ring.basePA);
116
117         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
118         PMD_RX_LOG(DEBUG,
119                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
120                    (uint32_t)rxq->cmd_ring[0].size, avail,
121                    rxq->comp_ring.next2proc,
122                    rxq->cmd_ring[0].size - avail);
123
124         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
125         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
126                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
127                    rxq->cmd_ring[1].size - avail);
128
129 }
130
131 static void
132 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
133 {
134         uint32_t avail = 0;
135
136         if (txq == NULL)
137                 return;
138
139         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
140                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
141         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
142                    (unsigned long)txq->cmd_ring.basePA,
143                    (unsigned long)txq->comp_ring.basePA,
144                    (unsigned long)txq->data_ring.basePA);
145
146         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
147         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
148                    (uint32_t)txq->cmd_ring.size, avail,
149                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
150 }
151 #endif
152
153 static void
154 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
155 {
156         while (ring->next2comp != ring->next2fill) {
157                 /* No need to worry about tx desc ownership, device is quiesced by now. */
158                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159
160                 if (buf_info->m) {
161                         rte_pktmbuf_free(buf_info->m);
162                         buf_info->m = NULL;
163                         buf_info->bufPA = 0;
164                         buf_info->len = 0;
165                 }
166                 vmxnet3_cmd_ring_adv_next2comp(ring);
167         }
168 }
169
170 static void
171 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
172 {
173         vmxnet3_cmd_ring_release_mbufs(ring);
174         rte_free(ring->buf_info);
175         ring->buf_info = NULL;
176 }
177
178
179 void
180 vmxnet3_dev_tx_queue_release(void *txq)
181 {
182         vmxnet3_tx_queue_t *tq = txq;
183
184         if (tq != NULL) {
185                 /* Release the cmd_ring */
186                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
187         }
188 }
189
190 void
191 vmxnet3_dev_rx_queue_release(void *rxq)
192 {
193         int i;
194         vmxnet3_rx_queue_t *rq = rxq;
195
196         if (rq != NULL) {
197                 /* Release both the cmd_rings */
198                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
199                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
200         }
201 }
202
203 static void
204 vmxnet3_dev_tx_queue_reset(void *txq)
205 {
206         vmxnet3_tx_queue_t *tq = txq;
207         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
208         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
209         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
210         int size;
211
212         if (tq != NULL) {
213                 /* Release the cmd_ring mbufs */
214                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
215         }
216
217         /* Tx vmxnet rings structure initialization*/
218         ring->next2fill = 0;
219         ring->next2comp = 0;
220         ring->gen = VMXNET3_INIT_GEN;
221         comp_ring->next2proc = 0;
222         comp_ring->gen = VMXNET3_INIT_GEN;
223
224         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
225         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
226         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
227
228         memset(ring->base, 0, size);
229 }
230
231 static void
232 vmxnet3_dev_rx_queue_reset(void *rxq)
233 {
234         int i;
235         vmxnet3_rx_queue_t *rq = rxq;
236         struct vmxnet3_cmd_ring *ring0, *ring1;
237         struct vmxnet3_comp_ring *comp_ring;
238         int size;
239
240         if (rq != NULL) {
241                 /* Release both the cmd_rings mbufs */
242                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
243                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
244         }
245
246         ring0 = &rq->cmd_ring[0];
247         ring1 = &rq->cmd_ring[1];
248         comp_ring = &rq->comp_ring;
249
250         /* Rx vmxnet rings structure initialization */
251         ring0->next2fill = 0;
252         ring1->next2fill = 0;
253         ring0->next2comp = 0;
254         ring1->next2comp = 0;
255         ring0->gen = VMXNET3_INIT_GEN;
256         ring1->gen = VMXNET3_INIT_GEN;
257         comp_ring->next2proc = 0;
258         comp_ring->gen = VMXNET3_INIT_GEN;
259
260         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
261         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
262
263         memset(ring0->base, 0, size);
264 }
265
266 void
267 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
268 {
269         unsigned i;
270
271         PMD_INIT_FUNC_TRACE();
272
273         for (i = 0; i < dev->data->nb_tx_queues; i++) {
274                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
275
276                 if (txq != NULL) {
277                         txq->stopped = TRUE;
278                         vmxnet3_dev_tx_queue_reset(txq);
279                 }
280         }
281
282         for (i = 0; i < dev->data->nb_rx_queues; i++) {
283                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
284
285                 if (rxq != NULL) {
286                         rxq->stopped = TRUE;
287                         vmxnet3_dev_rx_queue_reset(rxq);
288                 }
289         }
290 }
291
292 static int
293 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
294 {
295         int completed = 0;
296         struct rte_mbuf *mbuf;
297
298         /* Release cmd_ring descriptor and free mbuf */
299         VMXNET3_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
300
301         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
302         if (mbuf == NULL)
303                 rte_panic("EOP desc does not point to a valid mbuf");
304         rte_pktmbuf_free(mbuf);
305
306         txq->cmd_ring.buf_info[eop_idx].m = NULL;
307
308         while (txq->cmd_ring.next2comp != eop_idx) {
309                 /* no out-of-order completion */
310                 VMXNET3_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
311                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
312                 completed++;
313         }
314
315         /* Mark the txd for which tcd was generated as completed */
316         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
317
318         return completed + 1;
319 }
320
321 static void
322 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
323 {
324         int completed = 0;
325         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
326         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
327                 (comp_ring->base + comp_ring->next2proc);
328
329         while (tcd->gen == comp_ring->gen) {
330                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
331
332                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
333                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
334                                                     comp_ring->next2proc);
335         }
336
337         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
338 }
339
340 uint16_t
341 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
342                   uint16_t nb_pkts)
343 {
344         uint16_t nb_tx;
345         vmxnet3_tx_queue_t *txq = tx_queue;
346         struct vmxnet3_hw *hw = txq->hw;
347         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
348         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
349
350         if (unlikely(txq->stopped)) {
351                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
352                 return 0;
353         }
354
355         /* Free up the comp_descriptors aggressively */
356         vmxnet3_tq_tx_complete(txq);
357
358         nb_tx = 0;
359         while (nb_tx < nb_pkts) {
360                 Vmxnet3_GenericDesc *gdesc;
361                 vmxnet3_buf_info_t *tbi;
362                 uint32_t first2fill, avail, dw2;
363                 struct rte_mbuf *txm = tx_pkts[nb_tx];
364                 struct rte_mbuf *m_seg = txm;
365                 int copy_size = 0;
366                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
367                 /* # of descriptors needed for a packet. */
368                 unsigned count = txm->nb_segs;
369
370                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
371                 if (count > avail) {
372                         /* Is command ring full? */
373                         if (unlikely(avail == 0)) {
374                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
375                                 txq->stats.tx_ring_full++;
376                                 txq->stats.drop_total += (nb_pkts - nb_tx);
377                                 break;
378                         }
379
380                         /* Command ring is not full but cannot handle the
381                          * multi-segmented packet. Let's try the next packet
382                          * in this case.
383                          */
384                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
385                                    "(avail %d needed %d)", avail, count);
386                         txq->stats.drop_total++;
387                         if (tso)
388                                 txq->stats.drop_tso++;
389                         rte_pktmbuf_free(txm);
390                         nb_tx++;
391                         continue;
392                 }
393
394                 /* Drop non-TSO packet that is excessively fragmented */
395                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
396                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
397                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
398                         txq->stats.drop_too_many_segs++;
399                         txq->stats.drop_total++;
400                         rte_pktmbuf_free(txm);
401                         nb_tx++;
402                         continue;
403                 }
404
405                 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
406                         struct Vmxnet3_TxDataDesc *tdd;
407
408                         tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
409                         copy_size = rte_pktmbuf_pkt_len(txm);
410                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
411                 }
412
413                 /* use the previous gen bit for the SOP desc */
414                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
415                 first2fill = txq->cmd_ring.next2fill;
416                 do {
417                         /* Remember the transmit buffer for cleanup */
418                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
419
420                         /* NB: the following assumes that VMXNET3 maximum
421                          * transmit buffer size (16K) is greater than
422                          * maximum size of mbuf segment size.
423                          */
424                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
425                         if (copy_size)
426                                 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
427                                                                 txq->cmd_ring.next2fill *
428                                                                 sizeof(struct Vmxnet3_TxDataDesc));
429                         else
430                                 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
431
432                         gdesc->dword[2] = dw2 | m_seg->data_len;
433                         gdesc->dword[3] = 0;
434
435                         /* move to the next2fill descriptor */
436                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
437
438                         /* use the right gen for non-SOP desc */
439                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
440                 } while ((m_seg = m_seg->next) != NULL);
441
442                 /* set the last buf_info for the pkt */
443                 tbi->m = txm;
444                 /* Update the EOP descriptor */
445                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
446
447                 /* Add VLAN tag if present */
448                 gdesc = txq->cmd_ring.base + first2fill;
449                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
450                         gdesc->txd.ti = 1;
451                         gdesc->txd.tci = txm->vlan_tci;
452                 }
453
454                 if (tso) {
455                         uint16_t mss = txm->tso_segsz;
456
457                         VMXNET3_ASSERT(mss > 0);
458
459                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
460                         gdesc->txd.om = VMXNET3_OM_TSO;
461                         gdesc->txd.msscof = mss;
462
463                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
464                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
465                         gdesc->txd.om = VMXNET3_OM_CSUM;
466                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
467
468                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
469                         case PKT_TX_TCP_CKSUM:
470                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
471                                 break;
472                         case PKT_TX_UDP_CKSUM:
473                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
474                                 break;
475                         default:
476                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
477                                            txm->ol_flags & PKT_TX_L4_MASK);
478                                 abort();
479                         }
480                         deferred++;
481                 } else {
482                         gdesc->txd.hlen = 0;
483                         gdesc->txd.om = VMXNET3_OM_NONE;
484                         gdesc->txd.msscof = 0;
485                         deferred++;
486                 }
487
488                 /* flip the GEN bit on the SOP */
489                 rte_compiler_barrier();
490                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
491
492                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
493                 nb_tx++;
494         }
495
496         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
497
498         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
499                 txq_ctrl->txNumDeferred = 0;
500                 /* Notify vSwitch that packets are available. */
501                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
502                                        txq->cmd_ring.next2fill);
503         }
504
505         return nb_tx;
506 }
507
508 /*
509  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
510  *  so that device can receive packets in those buffers.
511  *      Ring layout:
512  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
513  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
514  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
515  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
516  *      only for LRO.
517  *
518  */
519 static int
520 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
521 {
522         int err = 0;
523         uint32_t i = 0, val = 0;
524         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
525
526         if (ring_id == 0) {
527                 /* Usually: One HEAD type buf per packet
528                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
529                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
530                  */
531
532                 /* We use single packet buffer so all heads here */
533                 val = VMXNET3_RXD_BTYPE_HEAD;
534         } else {
535                 /* All BODY type buffers for 2nd ring */
536                 val = VMXNET3_RXD_BTYPE_BODY;
537         }
538
539         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
540                 struct Vmxnet3_RxDesc *rxd;
541                 struct rte_mbuf *mbuf;
542                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
543
544                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
545
546                 /* Allocate blank mbuf for the current Rx Descriptor */
547                 mbuf = rte_rxmbuf_alloc(rxq->mp);
548                 if (unlikely(mbuf == NULL)) {
549                         PMD_RX_LOG(ERR, "Error allocating mbuf");
550                         rxq->stats.rx_buf_alloc_failure++;
551                         err = ENOMEM;
552                         break;
553                 }
554
555                 /*
556                  * Load mbuf pointer into buf_info[ring_size]
557                  * buf_info structure is equivalent to cookie for virtio-virtqueue
558                  */
559                 buf_info->m = mbuf;
560                 buf_info->len = (uint16_t)(mbuf->buf_len -
561                                            RTE_PKTMBUF_HEADROOM);
562                 buf_info->bufPA =
563                         rte_mbuf_data_dma_addr_default(mbuf);
564
565                 /* Load Rx Descriptor with the buffer's GPA */
566                 rxd->addr = buf_info->bufPA;
567
568                 /* After this point rxd->addr MUST not be NULL */
569                 rxd->btype = val;
570                 rxd->len = buf_info->len;
571                 /* Flip gen bit at the end to change ownership */
572                 rxd->gen = ring->gen;
573
574                 vmxnet3_cmd_ring_adv_next2fill(ring);
575                 i++;
576         }
577
578         /* Return error only if no buffers are posted at present */
579         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
580                 return -err;
581         else
582                 return i;
583 }
584
585
586 /* Receive side checksum and other offloads */
587 static void
588 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
589 {
590         /* Check for hardware stripped VLAN tag */
591         if (rcd->ts) {
592                 rxm->ol_flags |= PKT_RX_VLAN_PKT;
593                 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
594         }
595
596         /* Check for RSS */
597         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
598                 rxm->ol_flags |= PKT_RX_RSS_HASH;
599                 rxm->hash.rss = rcd->rssHash;
600         }
601
602         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
603         if (rcd->v4) {
604                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
605                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
606
607                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
608                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
609                 else
610                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
611
612                 if (!rcd->cnc) {
613                         if (!rcd->ipc)
614                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
615
616                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
617                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
618                 }
619         }
620 }
621
622 /*
623  * Process the Rx Completion Ring of given vmxnet3_rx_queue
624  * for nb_pkts burst and return the number of packets received
625  */
626 uint16_t
627 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
628 {
629         uint16_t nb_rx;
630         uint32_t nb_rxd, idx;
631         uint8_t ring_idx;
632         vmxnet3_rx_queue_t *rxq;
633         Vmxnet3_RxCompDesc *rcd;
634         vmxnet3_buf_info_t *rbi;
635         Vmxnet3_RxDesc *rxd;
636         struct rte_mbuf *rxm = NULL;
637         struct vmxnet3_hw *hw;
638
639         nb_rx = 0;
640         ring_idx = 0;
641         nb_rxd = 0;
642         idx = 0;
643
644         rxq = rx_queue;
645         hw = rxq->hw;
646
647         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
648
649         if (unlikely(rxq->stopped)) {
650                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
651                 return 0;
652         }
653
654         while (rcd->gen == rxq->comp_ring.gen) {
655                 if (nb_rx >= nb_pkts)
656                         break;
657
658                 idx = rcd->rxdIdx;
659                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
660                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
661                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
662
663                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
664
665                 VMXNET3_ASSERT(rcd->len <= rxd->len);
666                 VMXNET3_ASSERT(rbi->m);
667
668                 /* Get the packet buffer pointer from buf_info */
669                 rxm = rbi->m;
670
671                 /* Clear descriptor associated buf_info to be reused */
672                 rbi->m = NULL;
673                 rbi->bufPA = 0;
674
675                 /* Update the index that we received a packet */
676                 rxq->cmd_ring[ring_idx].next2comp = idx;
677
678                 /* For RCD with EOP set, check if there is frame error */
679                 if (unlikely(rcd->eop && rcd->err)) {
680                         rxq->stats.drop_total++;
681                         rxq->stats.drop_err++;
682
683                         if (!rcd->fcs) {
684                                 rxq->stats.drop_fcs++;
685                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
686                         }
687                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
688                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
689                                          rxq->comp_ring.base), rcd->rxdIdx);
690                         rte_pktmbuf_free_seg(rxm);
691                         goto rcd_done;
692                 }
693
694
695                 /* Initialize newly received packet buffer */
696                 rxm->port = rxq->port_id;
697                 rxm->nb_segs = 1;
698                 rxm->next = NULL;
699                 rxm->pkt_len = (uint16_t)rcd->len;
700                 rxm->data_len = (uint16_t)rcd->len;
701                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
702                 rxm->ol_flags = 0;
703                 rxm->vlan_tci = 0;
704
705                 /*
706                  * If this is the first buffer of the received packet,
707                  * set the pointer to the first mbuf of the packet
708                  * Otherwise, update the total length and the number of segments
709                  * of the current scattered packet, and update the pointer to
710                  * the last mbuf of the current packet.
711                  */
712                 if (rcd->sop) {
713                         VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
714
715                         if (unlikely(rcd->len == 0)) {
716                                 VMXNET3_ASSERT(rcd->eop);
717
718                                 PMD_RX_LOG(DEBUG,
719                                            "Rx buf was skipped. rxring[%d][%d])",
720                                            ring_idx, idx);
721                                 rte_pktmbuf_free_seg(rxm);
722                                 goto rcd_done;
723                         }
724
725                         rxq->start_seg = rxm;
726                         vmxnet3_rx_offload(rcd, rxm);
727                 } else {
728                         struct rte_mbuf *start = rxq->start_seg;
729
730                         VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
731
732                         start->pkt_len += rxm->data_len;
733                         start->nb_segs++;
734
735                         rxq->last_seg->next = rxm;
736                 }
737                 rxq->last_seg = rxm;
738
739                 if (rcd->eop) {
740                         rx_pkts[nb_rx++] = rxq->start_seg;
741                         rxq->start_seg = NULL;
742                 }
743
744 rcd_done:
745                 rxq->cmd_ring[ring_idx].next2comp = idx;
746                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
747
748                 /* It's time to allocate some new buf and renew descriptors */
749                 vmxnet3_post_rx_bufs(rxq, ring_idx);
750                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
751                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
752                                                rxq->cmd_ring[ring_idx].next2fill);
753                 }
754
755                 /* Advance to the next descriptor in comp_ring */
756                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
757
758                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
759                 nb_rxd++;
760                 if (nb_rxd > rxq->cmd_ring[0].size) {
761                         PMD_RX_LOG(ERR,
762                                    "Used up quota of receiving packets,"
763                                    " relinquish control.");
764                         break;
765                 }
766         }
767
768         return nb_rx;
769 }
770
771 /*
772  * Create memzone for device rings. malloc can't be used as the physical address is
773  * needed. If the memzone is already created, then this function returns a ptr
774  * to the old one.
775  */
776 static const struct rte_memzone *
777 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
778                       uint16_t queue_id, uint32_t ring_size, int socket_id)
779 {
780         char z_name[RTE_MEMZONE_NAMESIZE];
781         const struct rte_memzone *mz;
782
783         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
784                         dev->driver->pci_drv.name, ring_name,
785                         dev->data->port_id, queue_id);
786
787         mz = rte_memzone_lookup(z_name);
788         if (mz)
789                 return mz;
790
791         return rte_memzone_reserve_aligned(z_name, ring_size,
792                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
793 }
794
795 int
796 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
797                            uint16_t queue_idx,
798                            uint16_t nb_desc,
799                            unsigned int socket_id,
800                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
801 {
802         struct vmxnet3_hw *hw = dev->data->dev_private;
803         const struct rte_memzone *mz;
804         struct vmxnet3_tx_queue *txq;
805         struct vmxnet3_cmd_ring *ring;
806         struct vmxnet3_comp_ring *comp_ring;
807         struct vmxnet3_data_ring *data_ring;
808         int size;
809
810         PMD_INIT_FUNC_TRACE();
811
812         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
813             ETH_TXQ_FLAGS_NOXSUMSCTP) {
814                 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
815                 return -EINVAL;
816         }
817
818         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
819         if (txq == NULL) {
820                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
821                 return -ENOMEM;
822         }
823
824         txq->queue_id = queue_idx;
825         txq->port_id = dev->data->port_id;
826         txq->shared = &hw->tqd_start[queue_idx];
827         txq->hw = hw;
828         txq->qid = queue_idx;
829         txq->stopped = TRUE;
830
831         ring = &txq->cmd_ring;
832         comp_ring = &txq->comp_ring;
833         data_ring = &txq->data_ring;
834
835         /* Tx vmxnet ring length should be between 512-4096 */
836         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
837                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
838                              VMXNET3_DEF_TX_RING_SIZE);
839                 return -EINVAL;
840         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
841                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
842                              VMXNET3_TX_RING_MAX_SIZE);
843                 return -EINVAL;
844         } else {
845                 ring->size = nb_desc;
846                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
847         }
848         comp_ring->size = data_ring->size = ring->size;
849
850         /* Tx vmxnet rings structure initialization*/
851         ring->next2fill = 0;
852         ring->next2comp = 0;
853         ring->gen = VMXNET3_INIT_GEN;
854         comp_ring->next2proc = 0;
855         comp_ring->gen = VMXNET3_INIT_GEN;
856
857         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
858         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
859         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
860
861         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
862         if (mz == NULL) {
863                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
864                 return -ENOMEM;
865         }
866         memset(mz->addr, 0, mz->len);
867
868         /* cmd_ring initialization */
869         ring->base = mz->addr;
870         ring->basePA = mz->phys_addr;
871
872         /* comp_ring initialization */
873         comp_ring->base = ring->base + ring->size;
874         comp_ring->basePA = ring->basePA +
875                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
876
877         /* data_ring initialization */
878         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
879         data_ring->basePA = comp_ring->basePA +
880                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
881
882         /* cmd_ring0 buf_info allocation */
883         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
884                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
885         if (ring->buf_info == NULL) {
886                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
887                 return -ENOMEM;
888         }
889
890         /* Update the data portion with txq */
891         dev->data->tx_queues[queue_idx] = txq;
892
893         return 0;
894 }
895
896 int
897 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
898                            uint16_t queue_idx,
899                            uint16_t nb_desc,
900                            unsigned int socket_id,
901                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
902                            struct rte_mempool *mp)
903 {
904         const struct rte_memzone *mz;
905         struct vmxnet3_rx_queue *rxq;
906         struct vmxnet3_hw     *hw = dev->data->dev_private;
907         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
908         struct vmxnet3_comp_ring *comp_ring;
909         int size;
910         uint8_t i;
911         char mem_name[32];
912
913         PMD_INIT_FUNC_TRACE();
914
915         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
916         if (rxq == NULL) {
917                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
918                 return -ENOMEM;
919         }
920
921         rxq->mp = mp;
922         rxq->queue_id = queue_idx;
923         rxq->port_id = dev->data->port_id;
924         rxq->shared = &hw->rqd_start[queue_idx];
925         rxq->hw = hw;
926         rxq->qid1 = queue_idx;
927         rxq->qid2 = queue_idx + hw->num_rx_queues;
928         rxq->stopped = TRUE;
929
930         ring0 = &rxq->cmd_ring[0];
931         ring1 = &rxq->cmd_ring[1];
932         comp_ring = &rxq->comp_ring;
933
934         /* Rx vmxnet rings length should be between 256-4096 */
935         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
936                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
937                 return -EINVAL;
938         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
939                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
940                 return -EINVAL;
941         } else {
942                 ring0->size = nb_desc;
943                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
944                 ring1->size = ring0->size;
945         }
946
947         comp_ring->size = ring0->size + ring1->size;
948
949         /* Rx vmxnet rings structure initialization */
950         ring0->next2fill = 0;
951         ring1->next2fill = 0;
952         ring0->next2comp = 0;
953         ring1->next2comp = 0;
954         ring0->gen = VMXNET3_INIT_GEN;
955         ring1->gen = VMXNET3_INIT_GEN;
956         comp_ring->next2proc = 0;
957         comp_ring->gen = VMXNET3_INIT_GEN;
958
959         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
960         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
961
962         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
963         if (mz == NULL) {
964                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
965                 return -ENOMEM;
966         }
967         memset(mz->addr, 0, mz->len);
968
969         /* cmd_ring0 initialization */
970         ring0->base = mz->addr;
971         ring0->basePA = mz->phys_addr;
972
973         /* cmd_ring1 initialization */
974         ring1->base = ring0->base + ring0->size;
975         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
976
977         /* comp_ring initialization */
978         comp_ring->base = ring1->base + ring1->size;
979         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
980                 ring1->size;
981
982         /* cmd_ring0-cmd_ring1 buf_info allocation */
983         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
984
985                 ring = &rxq->cmd_ring[i];
986                 ring->rid = i;
987                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
988
989                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
990                 if (ring->buf_info == NULL) {
991                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
992                         return -ENOMEM;
993                 }
994         }
995
996         /* Update the data portion with rxq */
997         dev->data->rx_queues[queue_idx] = rxq;
998
999         return 0;
1000 }
1001
1002 /*
1003  * Initializes Receive Unit
1004  * Load mbufs in rx queue in advance
1005  */
1006 int
1007 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1008 {
1009         struct vmxnet3_hw *hw = dev->data->dev_private;
1010
1011         int i, ret;
1012         uint8_t j;
1013
1014         PMD_INIT_FUNC_TRACE();
1015
1016         for (i = 0; i < hw->num_rx_queues; i++) {
1017                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1018
1019                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1020                         /* Passing 0 as alloc_num will allocate full ring */
1021                         ret = vmxnet3_post_rx_bufs(rxq, j);
1022                         if (ret <= 0) {
1023                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
1024                                 return -ret;
1025                         }
1026                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
1027                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1028                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1029                                                        rxq->cmd_ring[j].next2fill);
1030                         }
1031                 }
1032                 rxq->stopped = FALSE;
1033                 rxq->start_seg = NULL;
1034         }
1035
1036         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1037                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1038
1039                 txq->stopped = FALSE;
1040         }
1041
1042         return 0;
1043 }
1044
1045 static uint8_t rss_intel_key[40] = {
1046         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1047         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1048         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1049         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1050         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1051 };
1052
1053 /*
1054  * Configure RSS feature
1055  */
1056 int
1057 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1058 {
1059         struct vmxnet3_hw *hw = dev->data->dev_private;
1060         struct VMXNET3_RSSConf *dev_rss_conf;
1061         struct rte_eth_rss_conf *port_rss_conf;
1062         uint64_t rss_hf;
1063         uint8_t i, j;
1064
1065         PMD_INIT_FUNC_TRACE();
1066
1067         dev_rss_conf = hw->rss_conf;
1068         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1069
1070         /* loading hashFunc */
1071         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1072         /* loading hashKeySize */
1073         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1074         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1075         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1076
1077         if (port_rss_conf->rss_key == NULL) {
1078                 /* Default hash key */
1079                 port_rss_conf->rss_key = rss_intel_key;
1080         }
1081
1082         /* loading hashKey */
1083         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1084
1085         /* loading indTable */
1086         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1087                 if (j == dev->data->nb_rx_queues)
1088                         j = 0;
1089                 dev_rss_conf->indTable[i] = j;
1090         }
1091
1092         /* loading hashType */
1093         dev_rss_conf->hashType = 0;
1094         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1095         if (rss_hf & ETH_RSS_IPV4)
1096                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1097         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1098                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1099         if (rss_hf & ETH_RSS_IPV6)
1100                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1101         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1102                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1103
1104         return VMXNET3_SUCCESS;
1105 }