4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
73 #include "base/vmxnet3_defs.h"
74 #include "vmxnet3_ring.h"
76 #include "vmxnet3_logs.h"
77 #include "vmxnet3_ethdev.h"
79 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
81 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
82 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
83 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
84 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
85 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
88 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
90 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
98 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
99 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
101 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
102 (unsigned long)rxq->cmd_ring[0].basePA,
103 (unsigned long)rxq->cmd_ring[1].basePA,
104 (unsigned long)rxq->comp_ring.basePA);
106 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
108 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
109 (uint32_t)rxq->cmd_ring[0].size, avail,
110 rxq->comp_ring.next2proc,
111 rxq->cmd_ring[0].size - avail);
113 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
114 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
115 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
116 rxq->cmd_ring[1].size - avail);
121 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
128 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
129 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
130 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
131 (unsigned long)txq->cmd_ring.basePA,
132 (unsigned long)txq->comp_ring.basePA,
133 (unsigned long)txq->data_ring.basePA);
135 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
136 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
137 (uint32_t)txq->cmd_ring.size, avail,
138 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
143 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
145 while (ring->next2comp != ring->next2fill) {
146 /* No need to worry about desc ownership, device is quiesced by now. */
147 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
150 rte_pktmbuf_free(buf_info->m);
155 vmxnet3_cmd_ring_adv_next2comp(ring);
160 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
164 for (i = 0; i < ring->size; i++) {
165 /* No need to worry about desc ownership, device is quiesced by now. */
166 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
169 rte_pktmbuf_free_seg(buf_info->m);
174 vmxnet3_cmd_ring_adv_next2comp(ring);
179 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
181 rte_free(ring->buf_info);
182 ring->buf_info = NULL;
186 vmxnet3_dev_tx_queue_release(void *txq)
188 vmxnet3_tx_queue_t *tq = txq;
192 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
193 /* Release the cmd_ring */
194 vmxnet3_cmd_ring_release(&tq->cmd_ring);
195 /* Release the memzone */
196 rte_memzone_free(tq->mz);
201 vmxnet3_dev_rx_queue_release(void *rxq)
204 vmxnet3_rx_queue_t *rq = rxq;
208 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
209 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
211 /* Release both the cmd_rings */
212 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
213 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
215 /* Release the memzone */
216 rte_memzone_free(rq->mz);
221 vmxnet3_dev_tx_queue_reset(void *txq)
223 vmxnet3_tx_queue_t *tq = txq;
224 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
225 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
226 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
230 /* Release the cmd_ring mbufs */
231 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
234 /* Tx vmxnet rings structure initialization*/
237 ring->gen = VMXNET3_INIT_GEN;
238 comp_ring->next2proc = 0;
239 comp_ring->gen = VMXNET3_INIT_GEN;
241 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
242 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
243 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
245 memset(ring->base, 0, size);
249 vmxnet3_dev_rx_queue_reset(void *rxq)
252 vmxnet3_rx_queue_t *rq = rxq;
253 struct vmxnet3_cmd_ring *ring0, *ring1;
254 struct vmxnet3_comp_ring *comp_ring;
258 /* Release both the cmd_rings mbufs */
259 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
260 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
263 ring0 = &rq->cmd_ring[0];
264 ring1 = &rq->cmd_ring[1];
265 comp_ring = &rq->comp_ring;
267 /* Rx vmxnet rings structure initialization */
268 ring0->next2fill = 0;
269 ring1->next2fill = 0;
270 ring0->next2comp = 0;
271 ring1->next2comp = 0;
272 ring0->gen = VMXNET3_INIT_GEN;
273 ring1->gen = VMXNET3_INIT_GEN;
274 comp_ring->next2proc = 0;
275 comp_ring->gen = VMXNET3_INIT_GEN;
277 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
278 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
280 memset(ring0->base, 0, size);
284 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
288 PMD_INIT_FUNC_TRACE();
290 for (i = 0; i < dev->data->nb_tx_queues; i++) {
291 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
295 vmxnet3_dev_tx_queue_reset(txq);
299 for (i = 0; i < dev->data->nb_rx_queues; i++) {
300 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
304 vmxnet3_dev_rx_queue_reset(rxq);
310 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
313 struct rte_mbuf *mbuf;
315 /* Release cmd_ring descriptor and free mbuf */
316 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
318 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
320 rte_panic("EOP desc does not point to a valid mbuf");
321 rte_pktmbuf_free(mbuf);
323 txq->cmd_ring.buf_info[eop_idx].m = NULL;
325 while (txq->cmd_ring.next2comp != eop_idx) {
326 /* no out-of-order completion */
327 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
328 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
332 /* Mark the txd for which tcd was generated as completed */
333 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
335 return completed + 1;
339 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
342 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
343 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
344 (comp_ring->base + comp_ring->next2proc);
346 while (tcd->gen == comp_ring->gen) {
347 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
349 vmxnet3_comp_ring_adv_next2proc(comp_ring);
350 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
351 comp_ring->next2proc);
354 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
358 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
362 vmxnet3_tx_queue_t *txq = tx_queue;
363 struct vmxnet3_hw *hw = txq->hw;
364 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
365 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
367 if (unlikely(txq->stopped)) {
368 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
372 /* Free up the comp_descriptors aggressively */
373 vmxnet3_tq_tx_complete(txq);
376 while (nb_tx < nb_pkts) {
377 Vmxnet3_GenericDesc *gdesc;
378 vmxnet3_buf_info_t *tbi;
379 uint32_t first2fill, avail, dw2;
380 struct rte_mbuf *txm = tx_pkts[nb_tx];
381 struct rte_mbuf *m_seg = txm;
383 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
384 /* # of descriptors needed for a packet. */
385 unsigned count = txm->nb_segs;
387 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
389 /* Is command ring full? */
390 if (unlikely(avail == 0)) {
391 PMD_TX_LOG(DEBUG, "No free ring descriptors");
392 txq->stats.tx_ring_full++;
393 txq->stats.drop_total += (nb_pkts - nb_tx);
397 /* Command ring is not full but cannot handle the
398 * multi-segmented packet. Let's try the next packet
401 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
402 "(avail %d needed %d)", avail, count);
403 txq->stats.drop_total++;
405 txq->stats.drop_tso++;
406 rte_pktmbuf_free(txm);
411 /* Drop non-TSO packet that is excessively fragmented */
412 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
413 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
414 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
415 txq->stats.drop_too_many_segs++;
416 txq->stats.drop_total++;
417 rte_pktmbuf_free(txm);
422 if (txm->nb_segs == 1 &&
423 rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
424 struct Vmxnet3_TxDataDesc *tdd;
426 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
427 copy_size = rte_pktmbuf_pkt_len(txm);
428 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
431 /* use the previous gen bit for the SOP desc */
432 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
433 first2fill = txq->cmd_ring.next2fill;
435 /* Remember the transmit buffer for cleanup */
436 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
438 /* NB: the following assumes that VMXNET3 maximum
439 * transmit buffer size (16K) is greater than
440 * maximum size of mbuf segment size.
442 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
444 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
445 txq->cmd_ring.next2fill *
446 sizeof(struct Vmxnet3_TxDataDesc));
448 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
450 gdesc->dword[2] = dw2 | m_seg->data_len;
453 /* move to the next2fill descriptor */
454 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
456 /* use the right gen for non-SOP desc */
457 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
458 } while ((m_seg = m_seg->next) != NULL);
460 /* set the last buf_info for the pkt */
462 /* Update the EOP descriptor */
463 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
465 /* Add VLAN tag if present */
466 gdesc = txq->cmd_ring.base + first2fill;
467 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
469 gdesc->txd.tci = txm->vlan_tci;
473 uint16_t mss = txm->tso_segsz;
477 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
478 gdesc->txd.om = VMXNET3_OM_TSO;
479 gdesc->txd.msscof = mss;
481 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
482 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
483 gdesc->txd.om = VMXNET3_OM_CSUM;
484 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
486 switch (txm->ol_flags & PKT_TX_L4_MASK) {
487 case PKT_TX_TCP_CKSUM:
488 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
490 case PKT_TX_UDP_CKSUM:
491 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
494 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
495 txm->ol_flags & PKT_TX_L4_MASK);
501 gdesc->txd.om = VMXNET3_OM_NONE;
502 gdesc->txd.msscof = 0;
506 /* flip the GEN bit on the SOP */
507 rte_compiler_barrier();
508 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
510 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
514 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
516 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
517 txq_ctrl->txNumDeferred = 0;
518 /* Notify vSwitch that packets are available. */
519 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
520 txq->cmd_ring.next2fill);
527 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
528 struct rte_mbuf *mbuf)
531 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
532 struct Vmxnet3_RxDesc *rxd =
533 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
534 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
537 val = VMXNET3_RXD_BTYPE_HEAD;
539 val = VMXNET3_RXD_BTYPE_BODY;
542 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
543 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
545 rxd->addr = buf_info->bufPA;
547 rxd->len = buf_info->len;
548 rxd->gen = ring->gen;
550 vmxnet3_cmd_ring_adv_next2fill(ring);
553 * Allocates mbufs and clusters. Post rx descriptors with buffer details
554 * so that device can receive packets in those buffers.
556 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
557 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
558 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
559 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
563 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
566 uint32_t i = 0, val = 0;
567 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
570 /* Usually: One HEAD type buf per packet
571 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
572 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
575 /* We use single packet buffer so all heads here */
576 val = VMXNET3_RXD_BTYPE_HEAD;
578 /* All BODY type buffers for 2nd ring */
579 val = VMXNET3_RXD_BTYPE_BODY;
582 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
583 struct Vmxnet3_RxDesc *rxd;
584 struct rte_mbuf *mbuf;
585 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
587 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
589 /* Allocate blank mbuf for the current Rx Descriptor */
590 mbuf = rte_mbuf_raw_alloc(rxq->mp);
591 if (unlikely(mbuf == NULL)) {
592 PMD_RX_LOG(ERR, "Error allocating mbuf");
593 rxq->stats.rx_buf_alloc_failure++;
599 * Load mbuf pointer into buf_info[ring_size]
600 * buf_info structure is equivalent to cookie for virtio-virtqueue
603 buf_info->len = (uint16_t)(mbuf->buf_len -
604 RTE_PKTMBUF_HEADROOM);
605 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
607 /* Load Rx Descriptor with the buffer's GPA */
608 rxd->addr = buf_info->bufPA;
610 /* After this point rxd->addr MUST not be NULL */
612 rxd->len = buf_info->len;
613 /* Flip gen bit at the end to change ownership */
614 rxd->gen = ring->gen;
616 vmxnet3_cmd_ring_adv_next2fill(ring);
620 /* Return error only if no buffers are posted at present */
621 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
628 /* Receive side checksum and other offloads */
630 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
633 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
634 rxm->ol_flags |= PKT_RX_RSS_HASH;
635 rxm->hash.rss = rcd->rssHash;
638 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
640 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
641 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
643 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
644 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
646 rxm->packet_type = RTE_PTYPE_L3_IPV4;
650 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
652 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
653 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
659 * Process the Rx Completion Ring of given vmxnet3_rx_queue
660 * for nb_pkts burst and return the number of packets received
663 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
666 uint32_t nb_rxd, idx;
668 vmxnet3_rx_queue_t *rxq;
669 Vmxnet3_RxCompDesc *rcd;
670 vmxnet3_buf_info_t *rbi;
672 struct rte_mbuf *rxm = NULL;
673 struct vmxnet3_hw *hw;
683 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
685 if (unlikely(rxq->stopped)) {
686 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
690 while (rcd->gen == rxq->comp_ring.gen) {
691 struct rte_mbuf *newm;
693 if (nb_rx >= nb_pkts)
696 newm = rte_mbuf_raw_alloc(rxq->mp);
697 if (unlikely(newm == NULL)) {
698 PMD_RX_LOG(ERR, "Error allocating mbuf");
699 rxq->stats.rx_buf_alloc_failure++;
704 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
705 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
706 RTE_SET_USED(rxd); /* used only for assert when enabled */
707 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
709 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
711 RTE_ASSERT(rcd->len <= rxd->len);
714 /* Get the packet buffer pointer from buf_info */
717 /* Clear descriptor associated buf_info to be reused */
721 /* Update the index that we received a packet */
722 rxq->cmd_ring[ring_idx].next2comp = idx;
724 /* For RCD with EOP set, check if there is frame error */
725 if (unlikely(rcd->eop && rcd->err)) {
726 rxq->stats.drop_total++;
727 rxq->stats.drop_err++;
730 rxq->stats.drop_fcs++;
731 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
733 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
734 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
735 rxq->comp_ring.base), rcd->rxdIdx);
736 rte_pktmbuf_free_seg(rxm);
737 if (rxq->start_seg) {
738 struct rte_mbuf *start = rxq->start_seg;
740 rxq->start_seg = NULL;
741 rte_pktmbuf_free(start);
746 /* Initialize newly received packet buffer */
747 rxm->port = rxq->port_id;
750 rxm->pkt_len = (uint16_t)rcd->len;
751 rxm->data_len = (uint16_t)rcd->len;
752 rxm->data_off = RTE_PKTMBUF_HEADROOM;
757 * If this is the first buffer of the received packet,
758 * set the pointer to the first mbuf of the packet
759 * Otherwise, update the total length and the number of segments
760 * of the current scattered packet, and update the pointer to
761 * the last mbuf of the current packet.
764 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
766 if (unlikely(rcd->len == 0)) {
767 RTE_ASSERT(rcd->eop);
770 "Rx buf was skipped. rxring[%d][%d])",
772 rte_pktmbuf_free_seg(rxm);
776 rxq->start_seg = rxm;
777 vmxnet3_rx_offload(rcd, rxm);
779 struct rte_mbuf *start = rxq->start_seg;
781 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
783 start->pkt_len += rxm->data_len;
786 rxq->last_seg->next = rxm;
791 struct rte_mbuf *start = rxq->start_seg;
793 /* Check for hardware stripped VLAN tag */
795 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
796 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
799 rx_pkts[nb_rx++] = start;
800 rxq->start_seg = NULL;
804 rxq->cmd_ring[ring_idx].next2comp = idx;
805 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
806 rxq->cmd_ring[ring_idx].size);
808 /* It's time to renew descriptors */
809 vmxnet3_renew_desc(rxq, ring_idx, newm);
810 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
811 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
812 rxq->cmd_ring[ring_idx].next2fill);
815 /* Advance to the next descriptor in comp_ring */
816 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
818 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
820 if (nb_rxd > rxq->cmd_ring[0].size) {
821 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
822 " relinquish control.");
831 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
834 unsigned int socket_id,
835 __rte_unused const struct rte_eth_txconf *tx_conf)
837 struct vmxnet3_hw *hw = dev->data->dev_private;
838 const struct rte_memzone *mz;
839 struct vmxnet3_tx_queue *txq;
840 struct vmxnet3_cmd_ring *ring;
841 struct vmxnet3_comp_ring *comp_ring;
842 struct vmxnet3_data_ring *data_ring;
845 PMD_INIT_FUNC_TRACE();
847 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
848 ETH_TXQ_FLAGS_NOXSUMSCTP) {
849 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
853 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
854 RTE_CACHE_LINE_SIZE);
856 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
860 txq->queue_id = queue_idx;
861 txq->port_id = dev->data->port_id;
862 txq->shared = &hw->tqd_start[queue_idx];
864 txq->qid = queue_idx;
867 ring = &txq->cmd_ring;
868 comp_ring = &txq->comp_ring;
869 data_ring = &txq->data_ring;
871 /* Tx vmxnet ring length should be between 512-4096 */
872 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
873 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
874 VMXNET3_DEF_TX_RING_SIZE);
876 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
877 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
878 VMXNET3_TX_RING_MAX_SIZE);
881 ring->size = nb_desc;
882 ring->size &= ~VMXNET3_RING_SIZE_MASK;
884 comp_ring->size = data_ring->size = ring->size;
886 /* Tx vmxnet rings structure initialization*/
889 ring->gen = VMXNET3_INIT_GEN;
890 comp_ring->next2proc = 0;
891 comp_ring->gen = VMXNET3_INIT_GEN;
893 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
894 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
895 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
897 mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
898 VMXNET3_RING_BA_ALIGN, socket_id);
900 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
904 memset(mz->addr, 0, mz->len);
906 /* cmd_ring initialization */
907 ring->base = mz->addr;
908 ring->basePA = mz->phys_addr;
910 /* comp_ring initialization */
911 comp_ring->base = ring->base + ring->size;
912 comp_ring->basePA = ring->basePA +
913 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
915 /* data_ring initialization */
916 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
917 data_ring->basePA = comp_ring->basePA +
918 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
920 /* cmd_ring0 buf_info allocation */
921 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
922 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
923 if (ring->buf_info == NULL) {
924 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
928 /* Update the data portion with txq */
929 dev->data->tx_queues[queue_idx] = txq;
935 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
938 unsigned int socket_id,
939 __rte_unused const struct rte_eth_rxconf *rx_conf,
940 struct rte_mempool *mp)
942 const struct rte_memzone *mz;
943 struct vmxnet3_rx_queue *rxq;
944 struct vmxnet3_hw *hw = dev->data->dev_private;
945 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
946 struct vmxnet3_comp_ring *comp_ring;
951 PMD_INIT_FUNC_TRACE();
953 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
954 RTE_CACHE_LINE_SIZE);
956 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
961 rxq->queue_id = queue_idx;
962 rxq->port_id = dev->data->port_id;
963 rxq->shared = &hw->rqd_start[queue_idx];
965 rxq->qid1 = queue_idx;
966 rxq->qid2 = queue_idx + hw->num_rx_queues;
969 ring0 = &rxq->cmd_ring[0];
970 ring1 = &rxq->cmd_ring[1];
971 comp_ring = &rxq->comp_ring;
973 /* Rx vmxnet rings length should be between 256-4096 */
974 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
975 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
977 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
978 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
981 ring0->size = nb_desc;
982 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
983 ring1->size = ring0->size;
986 comp_ring->size = ring0->size + ring1->size;
988 /* Rx vmxnet rings structure initialization */
989 ring0->next2fill = 0;
990 ring1->next2fill = 0;
991 ring0->next2comp = 0;
992 ring1->next2comp = 0;
993 ring0->gen = VMXNET3_INIT_GEN;
994 ring1->gen = VMXNET3_INIT_GEN;
995 comp_ring->next2proc = 0;
996 comp_ring->gen = VMXNET3_INIT_GEN;
998 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
999 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1001 mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1002 VMXNET3_RING_BA_ALIGN, socket_id);
1004 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1008 memset(mz->addr, 0, mz->len);
1010 /* cmd_ring0 initialization */
1011 ring0->base = mz->addr;
1012 ring0->basePA = mz->phys_addr;
1014 /* cmd_ring1 initialization */
1015 ring1->base = ring0->base + ring0->size;
1016 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1018 /* comp_ring initialization */
1019 comp_ring->base = ring1->base + ring1->size;
1020 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1023 /* cmd_ring0-cmd_ring1 buf_info allocation */
1024 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1026 ring = &rxq->cmd_ring[i];
1028 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1030 ring->buf_info = rte_zmalloc(mem_name,
1031 ring->size * sizeof(vmxnet3_buf_info_t),
1032 RTE_CACHE_LINE_SIZE);
1033 if (ring->buf_info == NULL) {
1034 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1039 /* Update the data portion with rxq */
1040 dev->data->rx_queues[queue_idx] = rxq;
1046 * Initializes Receive Unit
1047 * Load mbufs in rx queue in advance
1050 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1052 struct vmxnet3_hw *hw = dev->data->dev_private;
1057 PMD_INIT_FUNC_TRACE();
1059 for (i = 0; i < hw->num_rx_queues; i++) {
1060 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1062 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1063 /* Passing 0 as alloc_num will allocate full ring */
1064 ret = vmxnet3_post_rx_bufs(rxq, j);
1067 "ERROR: Posting Rxq: %d buffers ring: %d",
1072 * Updating device with the index:next2fill to fill the
1073 * mbufs for coming packets.
1075 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1076 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1077 rxq->cmd_ring[j].next2fill);
1080 rxq->stopped = FALSE;
1081 rxq->start_seg = NULL;
1084 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1085 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1087 txq->stopped = FALSE;
1093 static uint8_t rss_intel_key[40] = {
1094 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1095 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1096 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1097 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1098 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1102 * Configure RSS feature
1105 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1107 struct vmxnet3_hw *hw = dev->data->dev_private;
1108 struct VMXNET3_RSSConf *dev_rss_conf;
1109 struct rte_eth_rss_conf *port_rss_conf;
1113 PMD_INIT_FUNC_TRACE();
1115 dev_rss_conf = hw->rss_conf;
1116 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1118 /* loading hashFunc */
1119 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1120 /* loading hashKeySize */
1121 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1122 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1123 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1125 if (port_rss_conf->rss_key == NULL) {
1126 /* Default hash key */
1127 port_rss_conf->rss_key = rss_intel_key;
1130 /* loading hashKey */
1131 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1132 dev_rss_conf->hashKeySize);
1134 /* loading indTable */
1135 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1136 if (j == dev->data->nb_rx_queues)
1138 dev_rss_conf->indTable[i] = j;
1141 /* loading hashType */
1142 dev_rss_conf->hashType = 0;
1143 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1144 if (rss_hf & ETH_RSS_IPV4)
1145 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1146 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1147 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1148 if (rss_hf & ETH_RSS_IPV6)
1149 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1150 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1151 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1153 return VMXNET3_SUCCESS;