4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
73 #include "base/vmxnet3_defs.h"
74 #include "vmxnet3_ring.h"
76 #include "vmxnet3_logs.h"
77 #include "vmxnet3_ethdev.h"
79 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
81 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
82 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
83 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
84 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
85 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
88 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
90 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
98 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
99 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
101 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
102 (unsigned long)rxq->cmd_ring[0].basePA,
103 (unsigned long)rxq->cmd_ring[1].basePA,
104 (unsigned long)rxq->comp_ring.basePA);
106 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
108 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
109 (uint32_t)rxq->cmd_ring[0].size, avail,
110 rxq->comp_ring.next2proc,
111 rxq->cmd_ring[0].size - avail);
113 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
114 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
115 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
116 rxq->cmd_ring[1].size - avail);
121 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
128 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
129 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
130 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
131 (unsigned long)txq->cmd_ring.basePA,
132 (unsigned long)txq->comp_ring.basePA,
133 (unsigned long)txq->data_ring.basePA);
135 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
136 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
137 (uint32_t)txq->cmd_ring.size, avail,
138 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
143 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
145 while (ring->next2comp != ring->next2fill) {
146 /* No need to worry about desc ownership, device is quiesced by now. */
147 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
150 rte_pktmbuf_free(buf_info->m);
155 vmxnet3_cmd_ring_adv_next2comp(ring);
160 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
164 for (i = 0; i < ring->size; i++) {
165 /* No need to worry about desc ownership, device is quiesced by now. */
166 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
169 rte_pktmbuf_free_seg(buf_info->m);
174 vmxnet3_cmd_ring_adv_next2comp(ring);
179 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
181 rte_free(ring->buf_info);
182 ring->buf_info = NULL;
186 vmxnet3_dev_tx_queue_release(void *txq)
188 vmxnet3_tx_queue_t *tq = txq;
192 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
193 /* Release the cmd_ring */
194 vmxnet3_cmd_ring_release(&tq->cmd_ring);
195 /* Release the memzone */
196 rte_memzone_free(tq->mz);
197 /* Release the queue */
203 vmxnet3_dev_rx_queue_release(void *rxq)
206 vmxnet3_rx_queue_t *rq = rxq;
210 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
211 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
213 /* Release both the cmd_rings */
214 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
215 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
217 /* Release the memzone */
218 rte_memzone_free(rq->mz);
220 /* Release the queue */
226 vmxnet3_dev_tx_queue_reset(void *txq)
228 vmxnet3_tx_queue_t *tq = txq;
229 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
230 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
231 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
235 /* Release the cmd_ring mbufs */
236 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
239 /* Tx vmxnet rings structure initialization*/
242 ring->gen = VMXNET3_INIT_GEN;
243 comp_ring->next2proc = 0;
244 comp_ring->gen = VMXNET3_INIT_GEN;
246 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
247 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
248 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
250 memset(ring->base, 0, size);
254 vmxnet3_dev_rx_queue_reset(void *rxq)
257 vmxnet3_rx_queue_t *rq = rxq;
258 struct vmxnet3_cmd_ring *ring0, *ring1;
259 struct vmxnet3_comp_ring *comp_ring;
262 /* Release both the cmd_rings mbufs */
263 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
264 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
266 ring0 = &rq->cmd_ring[0];
267 ring1 = &rq->cmd_ring[1];
268 comp_ring = &rq->comp_ring;
270 /* Rx vmxnet rings structure initialization */
271 ring0->next2fill = 0;
272 ring1->next2fill = 0;
273 ring0->next2comp = 0;
274 ring1->next2comp = 0;
275 ring0->gen = VMXNET3_INIT_GEN;
276 ring1->gen = VMXNET3_INIT_GEN;
277 comp_ring->next2proc = 0;
278 comp_ring->gen = VMXNET3_INIT_GEN;
280 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
281 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
283 memset(ring0->base, 0, size);
287 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
291 PMD_INIT_FUNC_TRACE();
293 for (i = 0; i < dev->data->nb_tx_queues; i++) {
294 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
298 vmxnet3_dev_tx_queue_reset(txq);
302 for (i = 0; i < dev->data->nb_rx_queues; i++) {
303 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
307 vmxnet3_dev_rx_queue_reset(rxq);
313 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
316 struct rte_mbuf *mbuf;
318 /* Release cmd_ring descriptor and free mbuf */
319 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
321 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
323 rte_panic("EOP desc does not point to a valid mbuf");
324 rte_pktmbuf_free(mbuf);
326 txq->cmd_ring.buf_info[eop_idx].m = NULL;
328 while (txq->cmd_ring.next2comp != eop_idx) {
329 /* no out-of-order completion */
330 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
331 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
335 /* Mark the txd for which tcd was generated as completed */
336 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
338 return completed + 1;
342 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
345 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
346 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
347 (comp_ring->base + comp_ring->next2proc);
349 while (tcd->gen == comp_ring->gen) {
350 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
352 vmxnet3_comp_ring_adv_next2proc(comp_ring);
353 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
354 comp_ring->next2proc);
357 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
361 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
365 vmxnet3_tx_queue_t *txq = tx_queue;
366 struct vmxnet3_hw *hw = txq->hw;
367 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
368 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
370 if (unlikely(txq->stopped)) {
371 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
375 /* Free up the comp_descriptors aggressively */
376 vmxnet3_tq_tx_complete(txq);
379 while (nb_tx < nb_pkts) {
380 Vmxnet3_GenericDesc *gdesc;
381 vmxnet3_buf_info_t *tbi;
382 uint32_t first2fill, avail, dw2;
383 struct rte_mbuf *txm = tx_pkts[nb_tx];
384 struct rte_mbuf *m_seg = txm;
386 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
387 /* # of descriptors needed for a packet. */
388 unsigned count = txm->nb_segs;
390 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
392 /* Is command ring full? */
393 if (unlikely(avail == 0)) {
394 PMD_TX_LOG(DEBUG, "No free ring descriptors");
395 txq->stats.tx_ring_full++;
396 txq->stats.drop_total += (nb_pkts - nb_tx);
400 /* Command ring is not full but cannot handle the
401 * multi-segmented packet. Let's try the next packet
404 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
405 "(avail %d needed %d)", avail, count);
406 txq->stats.drop_total++;
408 txq->stats.drop_tso++;
409 rte_pktmbuf_free(txm);
414 /* Drop non-TSO packet that is excessively fragmented */
415 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
416 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
417 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
418 txq->stats.drop_too_many_segs++;
419 txq->stats.drop_total++;
420 rte_pktmbuf_free(txm);
425 if (txm->nb_segs == 1 &&
426 rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
427 struct Vmxnet3_TxDataDesc *tdd;
429 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
430 copy_size = rte_pktmbuf_pkt_len(txm);
431 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
434 /* use the previous gen bit for the SOP desc */
435 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
436 first2fill = txq->cmd_ring.next2fill;
438 /* Remember the transmit buffer for cleanup */
439 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
441 /* NB: the following assumes that VMXNET3 maximum
442 * transmit buffer size (16K) is greater than
443 * maximum size of mbuf segment size.
445 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
447 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
448 txq->cmd_ring.next2fill *
449 sizeof(struct Vmxnet3_TxDataDesc));
451 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
453 gdesc->dword[2] = dw2 | m_seg->data_len;
456 /* move to the next2fill descriptor */
457 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
459 /* use the right gen for non-SOP desc */
460 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
461 } while ((m_seg = m_seg->next) != NULL);
463 /* set the last buf_info for the pkt */
465 /* Update the EOP descriptor */
466 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
468 /* Add VLAN tag if present */
469 gdesc = txq->cmd_ring.base + first2fill;
470 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
472 gdesc->txd.tci = txm->vlan_tci;
476 uint16_t mss = txm->tso_segsz;
480 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
481 gdesc->txd.om = VMXNET3_OM_TSO;
482 gdesc->txd.msscof = mss;
484 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
485 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
486 gdesc->txd.om = VMXNET3_OM_CSUM;
487 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
489 switch (txm->ol_flags & PKT_TX_L4_MASK) {
490 case PKT_TX_TCP_CKSUM:
491 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
493 case PKT_TX_UDP_CKSUM:
494 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
497 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
498 txm->ol_flags & PKT_TX_L4_MASK);
504 gdesc->txd.om = VMXNET3_OM_NONE;
505 gdesc->txd.msscof = 0;
509 /* flip the GEN bit on the SOP */
510 rte_compiler_barrier();
511 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
513 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
517 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
519 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
520 txq_ctrl->txNumDeferred = 0;
521 /* Notify vSwitch that packets are available. */
522 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
523 txq->cmd_ring.next2fill);
530 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
531 struct rte_mbuf *mbuf)
534 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
535 struct Vmxnet3_RxDesc *rxd =
536 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
537 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
540 val = VMXNET3_RXD_BTYPE_HEAD;
542 val = VMXNET3_RXD_BTYPE_BODY;
545 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
546 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
548 rxd->addr = buf_info->bufPA;
550 rxd->len = buf_info->len;
551 rxd->gen = ring->gen;
553 vmxnet3_cmd_ring_adv_next2fill(ring);
556 * Allocates mbufs and clusters. Post rx descriptors with buffer details
557 * so that device can receive packets in those buffers.
559 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
560 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
561 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
562 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
566 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
569 uint32_t i = 0, val = 0;
570 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
573 /* Usually: One HEAD type buf per packet
574 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
575 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
578 /* We use single packet buffer so all heads here */
579 val = VMXNET3_RXD_BTYPE_HEAD;
581 /* All BODY type buffers for 2nd ring */
582 val = VMXNET3_RXD_BTYPE_BODY;
585 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
586 struct Vmxnet3_RxDesc *rxd;
587 struct rte_mbuf *mbuf;
588 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
590 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
592 /* Allocate blank mbuf for the current Rx Descriptor */
593 mbuf = rte_mbuf_raw_alloc(rxq->mp);
594 if (unlikely(mbuf == NULL)) {
595 PMD_RX_LOG(ERR, "Error allocating mbuf");
596 rxq->stats.rx_buf_alloc_failure++;
602 * Load mbuf pointer into buf_info[ring_size]
603 * buf_info structure is equivalent to cookie for virtio-virtqueue
606 buf_info->len = (uint16_t)(mbuf->buf_len -
607 RTE_PKTMBUF_HEADROOM);
608 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
610 /* Load Rx Descriptor with the buffer's GPA */
611 rxd->addr = buf_info->bufPA;
613 /* After this point rxd->addr MUST not be NULL */
615 rxd->len = buf_info->len;
616 /* Flip gen bit at the end to change ownership */
617 rxd->gen = ring->gen;
619 vmxnet3_cmd_ring_adv_next2fill(ring);
623 /* Return error only if no buffers are posted at present */
624 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
631 /* Receive side checksum and other offloads */
633 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
636 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
637 rxm->ol_flags |= PKT_RX_RSS_HASH;
638 rxm->hash.rss = rcd->rssHash;
641 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
643 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
644 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
646 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
647 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
649 rxm->packet_type = RTE_PTYPE_L3_IPV4;
653 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
655 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
656 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
662 * Process the Rx Completion Ring of given vmxnet3_rx_queue
663 * for nb_pkts burst and return the number of packets received
666 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
669 uint32_t nb_rxd, idx;
671 vmxnet3_rx_queue_t *rxq;
672 Vmxnet3_RxCompDesc *rcd;
673 vmxnet3_buf_info_t *rbi;
675 struct rte_mbuf *rxm = NULL;
676 struct vmxnet3_hw *hw;
686 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
688 if (unlikely(rxq->stopped)) {
689 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
693 while (rcd->gen == rxq->comp_ring.gen) {
694 struct rte_mbuf *newm;
696 if (nb_rx >= nb_pkts)
699 newm = rte_mbuf_raw_alloc(rxq->mp);
700 if (unlikely(newm == NULL)) {
701 PMD_RX_LOG(ERR, "Error allocating mbuf");
702 rxq->stats.rx_buf_alloc_failure++;
707 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
708 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
709 RTE_SET_USED(rxd); /* used only for assert when enabled */
710 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
712 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
714 RTE_ASSERT(rcd->len <= rxd->len);
717 /* Get the packet buffer pointer from buf_info */
720 /* Clear descriptor associated buf_info to be reused */
724 /* Update the index that we received a packet */
725 rxq->cmd_ring[ring_idx].next2comp = idx;
727 /* For RCD with EOP set, check if there is frame error */
728 if (unlikely(rcd->eop && rcd->err)) {
729 rxq->stats.drop_total++;
730 rxq->stats.drop_err++;
733 rxq->stats.drop_fcs++;
734 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
736 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
737 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
738 rxq->comp_ring.base), rcd->rxdIdx);
739 rte_pktmbuf_free_seg(rxm);
740 if (rxq->start_seg) {
741 struct rte_mbuf *start = rxq->start_seg;
743 rxq->start_seg = NULL;
744 rte_pktmbuf_free(start);
749 /* Initialize newly received packet buffer */
750 rxm->port = rxq->port_id;
753 rxm->pkt_len = (uint16_t)rcd->len;
754 rxm->data_len = (uint16_t)rcd->len;
755 rxm->data_off = RTE_PKTMBUF_HEADROOM;
760 * If this is the first buffer of the received packet,
761 * set the pointer to the first mbuf of the packet
762 * Otherwise, update the total length and the number of segments
763 * of the current scattered packet, and update the pointer to
764 * the last mbuf of the current packet.
767 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
769 if (unlikely(rcd->len == 0)) {
770 RTE_ASSERT(rcd->eop);
773 "Rx buf was skipped. rxring[%d][%d])",
775 rte_pktmbuf_free_seg(rxm);
779 rxq->start_seg = rxm;
780 vmxnet3_rx_offload(rcd, rxm);
782 struct rte_mbuf *start = rxq->start_seg;
784 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
786 start->pkt_len += rxm->data_len;
789 rxq->last_seg->next = rxm;
794 struct rte_mbuf *start = rxq->start_seg;
796 /* Check for hardware stripped VLAN tag */
798 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
799 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
802 rx_pkts[nb_rx++] = start;
803 rxq->start_seg = NULL;
807 rxq->cmd_ring[ring_idx].next2comp = idx;
808 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
809 rxq->cmd_ring[ring_idx].size);
811 /* It's time to renew descriptors */
812 vmxnet3_renew_desc(rxq, ring_idx, newm);
813 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
814 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
815 rxq->cmd_ring[ring_idx].next2fill);
818 /* Advance to the next descriptor in comp_ring */
819 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
821 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
823 if (nb_rxd > rxq->cmd_ring[0].size) {
824 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
825 " relinquish control.");
834 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
837 unsigned int socket_id,
838 __rte_unused const struct rte_eth_txconf *tx_conf)
840 struct vmxnet3_hw *hw = dev->data->dev_private;
841 const struct rte_memzone *mz;
842 struct vmxnet3_tx_queue *txq;
843 struct vmxnet3_cmd_ring *ring;
844 struct vmxnet3_comp_ring *comp_ring;
845 struct vmxnet3_data_ring *data_ring;
848 PMD_INIT_FUNC_TRACE();
850 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
851 ETH_TXQ_FLAGS_NOXSUMSCTP) {
852 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
856 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
857 RTE_CACHE_LINE_SIZE);
859 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
863 txq->queue_id = queue_idx;
864 txq->port_id = dev->data->port_id;
865 txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
867 txq->qid = queue_idx;
870 ring = &txq->cmd_ring;
871 comp_ring = &txq->comp_ring;
872 data_ring = &txq->data_ring;
874 /* Tx vmxnet ring length should be between 512-4096 */
875 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
876 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
877 VMXNET3_DEF_TX_RING_SIZE);
879 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
880 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
881 VMXNET3_TX_RING_MAX_SIZE);
884 ring->size = nb_desc;
885 ring->size &= ~VMXNET3_RING_SIZE_MASK;
887 comp_ring->size = data_ring->size = ring->size;
889 /* Tx vmxnet rings structure initialization*/
892 ring->gen = VMXNET3_INIT_GEN;
893 comp_ring->next2proc = 0;
894 comp_ring->gen = VMXNET3_INIT_GEN;
896 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
897 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
898 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
900 mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
901 VMXNET3_RING_BA_ALIGN, socket_id);
903 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
907 memset(mz->addr, 0, mz->len);
909 /* cmd_ring initialization */
910 ring->base = mz->addr;
911 ring->basePA = mz->phys_addr;
913 /* comp_ring initialization */
914 comp_ring->base = ring->base + ring->size;
915 comp_ring->basePA = ring->basePA +
916 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
918 /* data_ring initialization */
919 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
920 data_ring->basePA = comp_ring->basePA +
921 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
923 /* cmd_ring0 buf_info allocation */
924 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
925 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
926 if (ring->buf_info == NULL) {
927 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
931 /* Update the data portion with txq */
932 dev->data->tx_queues[queue_idx] = txq;
938 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
941 unsigned int socket_id,
942 __rte_unused const struct rte_eth_rxconf *rx_conf,
943 struct rte_mempool *mp)
945 const struct rte_memzone *mz;
946 struct vmxnet3_rx_queue *rxq;
947 struct vmxnet3_hw *hw = dev->data->dev_private;
948 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
949 struct vmxnet3_comp_ring *comp_ring;
954 PMD_INIT_FUNC_TRACE();
956 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
957 RTE_CACHE_LINE_SIZE);
959 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
964 rxq->queue_id = queue_idx;
965 rxq->port_id = dev->data->port_id;
966 rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
968 rxq->qid1 = queue_idx;
969 rxq->qid2 = queue_idx + hw->num_rx_queues;
972 ring0 = &rxq->cmd_ring[0];
973 ring1 = &rxq->cmd_ring[1];
974 comp_ring = &rxq->comp_ring;
976 /* Rx vmxnet rings length should be between 256-4096 */
977 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
978 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
980 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
981 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
984 ring0->size = nb_desc;
985 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
986 ring1->size = ring0->size;
989 comp_ring->size = ring0->size + ring1->size;
991 /* Rx vmxnet rings structure initialization */
992 ring0->next2fill = 0;
993 ring1->next2fill = 0;
994 ring0->next2comp = 0;
995 ring1->next2comp = 0;
996 ring0->gen = VMXNET3_INIT_GEN;
997 ring1->gen = VMXNET3_INIT_GEN;
998 comp_ring->next2proc = 0;
999 comp_ring->gen = VMXNET3_INIT_GEN;
1001 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1002 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1004 mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1005 VMXNET3_RING_BA_ALIGN, socket_id);
1007 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1011 memset(mz->addr, 0, mz->len);
1013 /* cmd_ring0 initialization */
1014 ring0->base = mz->addr;
1015 ring0->basePA = mz->phys_addr;
1017 /* cmd_ring1 initialization */
1018 ring1->base = ring0->base + ring0->size;
1019 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1021 /* comp_ring initialization */
1022 comp_ring->base = ring1->base + ring1->size;
1023 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1026 /* cmd_ring0-cmd_ring1 buf_info allocation */
1027 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1029 ring = &rxq->cmd_ring[i];
1031 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1033 ring->buf_info = rte_zmalloc(mem_name,
1034 ring->size * sizeof(vmxnet3_buf_info_t),
1035 RTE_CACHE_LINE_SIZE);
1036 if (ring->buf_info == NULL) {
1037 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1042 /* Update the data portion with rxq */
1043 dev->data->rx_queues[queue_idx] = rxq;
1049 * Initializes Receive Unit
1050 * Load mbufs in rx queue in advance
1053 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1055 struct vmxnet3_hw *hw = dev->data->dev_private;
1060 PMD_INIT_FUNC_TRACE();
1062 for (i = 0; i < hw->num_rx_queues; i++) {
1063 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1065 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1066 /* Passing 0 as alloc_num will allocate full ring */
1067 ret = vmxnet3_post_rx_bufs(rxq, j);
1070 "ERROR: Posting Rxq: %d buffers ring: %d",
1075 * Updating device with the index:next2fill to fill the
1076 * mbufs for coming packets.
1078 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1079 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1080 rxq->cmd_ring[j].next2fill);
1083 rxq->stopped = FALSE;
1084 rxq->start_seg = NULL;
1087 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1088 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1090 txq->stopped = FALSE;
1096 static uint8_t rss_intel_key[40] = {
1097 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1098 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1099 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1100 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1101 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1105 * Configure RSS feature
1108 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1110 struct vmxnet3_hw *hw = dev->data->dev_private;
1111 struct VMXNET3_RSSConf *dev_rss_conf;
1112 struct rte_eth_rss_conf *port_rss_conf;
1116 PMD_INIT_FUNC_TRACE();
1118 dev_rss_conf = hw->rss_conf;
1119 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1121 /* loading hashFunc */
1122 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1123 /* loading hashKeySize */
1124 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1125 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1126 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1128 if (port_rss_conf->rss_key == NULL) {
1129 /* Default hash key */
1130 port_rss_conf->rss_key = rss_intel_key;
1133 /* loading hashKey */
1134 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1135 dev_rss_conf->hashKeySize);
1137 /* loading indTable */
1138 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1139 if (j == dev->data->nb_rx_queues)
1141 dev_rss_conf->indTable[i] = j;
1144 /* loading hashType */
1145 dev_rss_conf->hashType = 0;
1146 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1147 if (rss_hf & ETH_RSS_IPV4)
1148 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1149 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1150 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1151 if (rss_hf & ETH_RSS_IPV6)
1152 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1153 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1154 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1156 return VMXNET3_SUCCESS;