4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
73 #include "base/vmxnet3_defs.h"
74 #include "vmxnet3_ring.h"
76 #include "vmxnet3_logs.h"
77 #include "vmxnet3_ethdev.h"
79 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
81 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
82 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
83 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
84 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
85 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
88 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
90 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
98 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
99 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
101 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
102 (unsigned long)rxq->cmd_ring[0].basePA,
103 (unsigned long)rxq->cmd_ring[1].basePA,
104 (unsigned long)rxq->comp_ring.basePA);
106 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
108 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
109 (uint32_t)rxq->cmd_ring[0].size, avail,
110 rxq->comp_ring.next2proc,
111 rxq->cmd_ring[0].size - avail);
113 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
114 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
115 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
116 rxq->cmd_ring[1].size - avail);
121 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
128 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
129 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
130 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
131 (unsigned long)txq->cmd_ring.basePA,
132 (unsigned long)txq->comp_ring.basePA,
133 (unsigned long)txq->data_ring.basePA);
135 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
136 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
137 (uint32_t)txq->cmd_ring.size, avail,
138 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
143 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
145 while (ring->next2comp != ring->next2fill) {
146 /* No need to worry about desc ownership, device is quiesced by now. */
147 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
150 rte_pktmbuf_free(buf_info->m);
155 vmxnet3_cmd_ring_adv_next2comp(ring);
160 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
164 for (i = 0; i < ring->size; i++) {
165 /* No need to worry about desc ownership, device is quiesced by now. */
166 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
169 rte_pktmbuf_free_seg(buf_info->m);
174 vmxnet3_cmd_ring_adv_next2comp(ring);
179 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
181 rte_free(ring->buf_info);
182 ring->buf_info = NULL;
186 vmxnet3_dev_tx_queue_release(void *txq)
188 vmxnet3_tx_queue_t *tq = txq;
192 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
193 /* Release the cmd_ring */
194 vmxnet3_cmd_ring_release(&tq->cmd_ring);
199 vmxnet3_dev_rx_queue_release(void *rxq)
202 vmxnet3_rx_queue_t *rq = rxq;
206 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
207 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
209 /* Release both the cmd_rings */
210 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
211 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
216 vmxnet3_dev_tx_queue_reset(void *txq)
218 vmxnet3_tx_queue_t *tq = txq;
219 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
220 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
221 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
225 /* Release the cmd_ring mbufs */
226 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
229 /* Tx vmxnet rings structure initialization*/
232 ring->gen = VMXNET3_INIT_GEN;
233 comp_ring->next2proc = 0;
234 comp_ring->gen = VMXNET3_INIT_GEN;
236 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
237 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
238 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
240 memset(ring->base, 0, size);
244 vmxnet3_dev_rx_queue_reset(void *rxq)
247 vmxnet3_rx_queue_t *rq = rxq;
248 struct vmxnet3_cmd_ring *ring0, *ring1;
249 struct vmxnet3_comp_ring *comp_ring;
253 /* Release both the cmd_rings mbufs */
254 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
255 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
258 ring0 = &rq->cmd_ring[0];
259 ring1 = &rq->cmd_ring[1];
260 comp_ring = &rq->comp_ring;
262 /* Rx vmxnet rings structure initialization */
263 ring0->next2fill = 0;
264 ring1->next2fill = 0;
265 ring0->next2comp = 0;
266 ring1->next2comp = 0;
267 ring0->gen = VMXNET3_INIT_GEN;
268 ring1->gen = VMXNET3_INIT_GEN;
269 comp_ring->next2proc = 0;
270 comp_ring->gen = VMXNET3_INIT_GEN;
272 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
273 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
275 memset(ring0->base, 0, size);
279 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
283 PMD_INIT_FUNC_TRACE();
285 for (i = 0; i < dev->data->nb_tx_queues; i++) {
286 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
290 vmxnet3_dev_tx_queue_reset(txq);
294 for (i = 0; i < dev->data->nb_rx_queues; i++) {
295 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
299 vmxnet3_dev_rx_queue_reset(rxq);
305 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
308 struct rte_mbuf *mbuf;
310 /* Release cmd_ring descriptor and free mbuf */
311 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
313 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
315 rte_panic("EOP desc does not point to a valid mbuf");
316 rte_pktmbuf_free(mbuf);
318 txq->cmd_ring.buf_info[eop_idx].m = NULL;
320 while (txq->cmd_ring.next2comp != eop_idx) {
321 /* no out-of-order completion */
322 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
323 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
327 /* Mark the txd for which tcd was generated as completed */
328 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
330 return completed + 1;
334 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
337 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
338 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
339 (comp_ring->base + comp_ring->next2proc);
341 while (tcd->gen == comp_ring->gen) {
342 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
344 vmxnet3_comp_ring_adv_next2proc(comp_ring);
345 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
346 comp_ring->next2proc);
349 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
353 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
357 vmxnet3_tx_queue_t *txq = tx_queue;
358 struct vmxnet3_hw *hw = txq->hw;
359 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
360 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
362 if (unlikely(txq->stopped)) {
363 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
367 /* Free up the comp_descriptors aggressively */
368 vmxnet3_tq_tx_complete(txq);
371 while (nb_tx < nb_pkts) {
372 Vmxnet3_GenericDesc *gdesc;
373 vmxnet3_buf_info_t *tbi;
374 uint32_t first2fill, avail, dw2;
375 struct rte_mbuf *txm = tx_pkts[nb_tx];
376 struct rte_mbuf *m_seg = txm;
378 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
379 /* # of descriptors needed for a packet. */
380 unsigned count = txm->nb_segs;
382 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
384 /* Is command ring full? */
385 if (unlikely(avail == 0)) {
386 PMD_TX_LOG(DEBUG, "No free ring descriptors");
387 txq->stats.tx_ring_full++;
388 txq->stats.drop_total += (nb_pkts - nb_tx);
392 /* Command ring is not full but cannot handle the
393 * multi-segmented packet. Let's try the next packet
396 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
397 "(avail %d needed %d)", avail, count);
398 txq->stats.drop_total++;
400 txq->stats.drop_tso++;
401 rte_pktmbuf_free(txm);
406 /* Drop non-TSO packet that is excessively fragmented */
407 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
408 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
409 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
410 txq->stats.drop_too_many_segs++;
411 txq->stats.drop_total++;
412 rte_pktmbuf_free(txm);
417 if (txm->nb_segs == 1 &&
418 rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
419 struct Vmxnet3_TxDataDesc *tdd;
421 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
422 copy_size = rte_pktmbuf_pkt_len(txm);
423 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
426 /* use the previous gen bit for the SOP desc */
427 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
428 first2fill = txq->cmd_ring.next2fill;
430 /* Remember the transmit buffer for cleanup */
431 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
433 /* NB: the following assumes that VMXNET3 maximum
434 * transmit buffer size (16K) is greater than
435 * maximum size of mbuf segment size.
437 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
439 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
440 txq->cmd_ring.next2fill *
441 sizeof(struct Vmxnet3_TxDataDesc));
443 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
445 gdesc->dword[2] = dw2 | m_seg->data_len;
448 /* move to the next2fill descriptor */
449 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
451 /* use the right gen for non-SOP desc */
452 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
453 } while ((m_seg = m_seg->next) != NULL);
455 /* set the last buf_info for the pkt */
457 /* Update the EOP descriptor */
458 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
460 /* Add VLAN tag if present */
461 gdesc = txq->cmd_ring.base + first2fill;
462 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
464 gdesc->txd.tci = txm->vlan_tci;
468 uint16_t mss = txm->tso_segsz;
472 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
473 gdesc->txd.om = VMXNET3_OM_TSO;
474 gdesc->txd.msscof = mss;
476 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
477 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
478 gdesc->txd.om = VMXNET3_OM_CSUM;
479 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
481 switch (txm->ol_flags & PKT_TX_L4_MASK) {
482 case PKT_TX_TCP_CKSUM:
483 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
485 case PKT_TX_UDP_CKSUM:
486 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
489 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
490 txm->ol_flags & PKT_TX_L4_MASK);
496 gdesc->txd.om = VMXNET3_OM_NONE;
497 gdesc->txd.msscof = 0;
501 /* flip the GEN bit on the SOP */
502 rte_compiler_barrier();
503 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
505 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
509 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
511 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
512 txq_ctrl->txNumDeferred = 0;
513 /* Notify vSwitch that packets are available. */
514 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
515 txq->cmd_ring.next2fill);
522 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
523 struct rte_mbuf *mbuf)
526 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
527 struct Vmxnet3_RxDesc *rxd =
528 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
529 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
532 val = VMXNET3_RXD_BTYPE_HEAD;
534 val = VMXNET3_RXD_BTYPE_BODY;
537 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
538 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
540 rxd->addr = buf_info->bufPA;
542 rxd->len = buf_info->len;
543 rxd->gen = ring->gen;
545 vmxnet3_cmd_ring_adv_next2fill(ring);
548 * Allocates mbufs and clusters. Post rx descriptors with buffer details
549 * so that device can receive packets in those buffers.
551 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
552 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
553 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
554 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
558 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
561 uint32_t i = 0, val = 0;
562 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
565 /* Usually: One HEAD type buf per packet
566 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
567 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
570 /* We use single packet buffer so all heads here */
571 val = VMXNET3_RXD_BTYPE_HEAD;
573 /* All BODY type buffers for 2nd ring */
574 val = VMXNET3_RXD_BTYPE_BODY;
577 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
578 struct Vmxnet3_RxDesc *rxd;
579 struct rte_mbuf *mbuf;
580 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
582 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
584 /* Allocate blank mbuf for the current Rx Descriptor */
585 mbuf = rte_mbuf_raw_alloc(rxq->mp);
586 if (unlikely(mbuf == NULL)) {
587 PMD_RX_LOG(ERR, "Error allocating mbuf");
588 rxq->stats.rx_buf_alloc_failure++;
594 * Load mbuf pointer into buf_info[ring_size]
595 * buf_info structure is equivalent to cookie for virtio-virtqueue
598 buf_info->len = (uint16_t)(mbuf->buf_len -
599 RTE_PKTMBUF_HEADROOM);
600 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
602 /* Load Rx Descriptor with the buffer's GPA */
603 rxd->addr = buf_info->bufPA;
605 /* After this point rxd->addr MUST not be NULL */
607 rxd->len = buf_info->len;
608 /* Flip gen bit at the end to change ownership */
609 rxd->gen = ring->gen;
611 vmxnet3_cmd_ring_adv_next2fill(ring);
615 /* Return error only if no buffers are posted at present */
616 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
623 /* Receive side checksum and other offloads */
625 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
628 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
629 rxm->ol_flags |= PKT_RX_RSS_HASH;
630 rxm->hash.rss = rcd->rssHash;
633 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
635 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
636 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
638 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
639 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
641 rxm->packet_type = RTE_PTYPE_L3_IPV4;
645 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
647 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
648 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
654 * Process the Rx Completion Ring of given vmxnet3_rx_queue
655 * for nb_pkts burst and return the number of packets received
658 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
661 uint32_t nb_rxd, idx;
663 vmxnet3_rx_queue_t *rxq;
664 Vmxnet3_RxCompDesc *rcd;
665 vmxnet3_buf_info_t *rbi;
667 struct rte_mbuf *rxm = NULL;
668 struct vmxnet3_hw *hw;
678 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
680 if (unlikely(rxq->stopped)) {
681 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
685 while (rcd->gen == rxq->comp_ring.gen) {
686 struct rte_mbuf *newm;
688 if (nb_rx >= nb_pkts)
691 newm = rte_mbuf_raw_alloc(rxq->mp);
692 if (unlikely(newm == NULL)) {
693 PMD_RX_LOG(ERR, "Error allocating mbuf");
694 rxq->stats.rx_buf_alloc_failure++;
699 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
700 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
701 RTE_SET_USED(rxd); /* used only for assert when enabled */
702 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
704 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
706 RTE_ASSERT(rcd->len <= rxd->len);
709 /* Get the packet buffer pointer from buf_info */
712 /* Clear descriptor associated buf_info to be reused */
716 /* Update the index that we received a packet */
717 rxq->cmd_ring[ring_idx].next2comp = idx;
719 /* For RCD with EOP set, check if there is frame error */
720 if (unlikely(rcd->eop && rcd->err)) {
721 rxq->stats.drop_total++;
722 rxq->stats.drop_err++;
725 rxq->stats.drop_fcs++;
726 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
728 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
729 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
730 rxq->comp_ring.base), rcd->rxdIdx);
731 rte_pktmbuf_free_seg(rxm);
735 /* Initialize newly received packet buffer */
736 rxm->port = rxq->port_id;
739 rxm->pkt_len = (uint16_t)rcd->len;
740 rxm->data_len = (uint16_t)rcd->len;
741 rxm->data_off = RTE_PKTMBUF_HEADROOM;
746 * If this is the first buffer of the received packet,
747 * set the pointer to the first mbuf of the packet
748 * Otherwise, update the total length and the number of segments
749 * of the current scattered packet, and update the pointer to
750 * the last mbuf of the current packet.
753 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
755 if (unlikely(rcd->len == 0)) {
756 RTE_ASSERT(rcd->eop);
759 "Rx buf was skipped. rxring[%d][%d])",
761 rte_pktmbuf_free_seg(rxm);
765 rxq->start_seg = rxm;
766 vmxnet3_rx_offload(rcd, rxm);
768 struct rte_mbuf *start = rxq->start_seg;
770 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
772 start->pkt_len += rxm->data_len;
775 rxq->last_seg->next = rxm;
780 struct rte_mbuf *start = rxq->start_seg;
782 /* Check for hardware stripped VLAN tag */
784 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
785 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
788 rx_pkts[nb_rx++] = start;
789 rxq->start_seg = NULL;
793 rxq->cmd_ring[ring_idx].next2comp = idx;
794 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
795 rxq->cmd_ring[ring_idx].size);
797 /* It's time to renew descriptors */
798 vmxnet3_renew_desc(rxq, ring_idx, newm);
799 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
800 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
801 rxq->cmd_ring[ring_idx].next2fill);
804 /* Advance to the next descriptor in comp_ring */
805 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
807 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
809 if (nb_rxd > rxq->cmd_ring[0].size) {
810 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
811 " relinquish control.");
820 * Create memzone for device rings. malloc can't be used as the physical address is
821 * needed. If the memzone is already created, then this function returns a ptr
824 static const struct rte_memzone *
825 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
826 uint16_t queue_id, uint32_t ring_size, int socket_id)
828 char z_name[RTE_MEMZONE_NAMESIZE];
829 const struct rte_memzone *mz;
831 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
832 dev->driver->pci_drv.driver.name, ring_name,
833 dev->data->port_id, queue_id);
835 mz = rte_memzone_lookup(z_name);
839 return rte_memzone_reserve_aligned(z_name, ring_size,
840 socket_id, 0, VMXNET3_RING_BA_ALIGN);
844 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
847 unsigned int socket_id,
848 __rte_unused const struct rte_eth_txconf *tx_conf)
850 struct vmxnet3_hw *hw = dev->data->dev_private;
851 const struct rte_memzone *mz;
852 struct vmxnet3_tx_queue *txq;
853 struct vmxnet3_cmd_ring *ring;
854 struct vmxnet3_comp_ring *comp_ring;
855 struct vmxnet3_data_ring *data_ring;
858 PMD_INIT_FUNC_TRACE();
860 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
861 ETH_TXQ_FLAGS_NOXSUMSCTP) {
862 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
866 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
867 RTE_CACHE_LINE_SIZE);
869 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
873 txq->queue_id = queue_idx;
874 txq->port_id = dev->data->port_id;
875 txq->shared = &hw->tqd_start[queue_idx];
877 txq->qid = queue_idx;
880 ring = &txq->cmd_ring;
881 comp_ring = &txq->comp_ring;
882 data_ring = &txq->data_ring;
884 /* Tx vmxnet ring length should be between 512-4096 */
885 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
886 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
887 VMXNET3_DEF_TX_RING_SIZE);
889 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
890 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
891 VMXNET3_TX_RING_MAX_SIZE);
894 ring->size = nb_desc;
895 ring->size &= ~VMXNET3_RING_SIZE_MASK;
897 comp_ring->size = data_ring->size = ring->size;
899 /* Tx vmxnet rings structure initialization*/
902 ring->gen = VMXNET3_INIT_GEN;
903 comp_ring->next2proc = 0;
904 comp_ring->gen = VMXNET3_INIT_GEN;
906 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
907 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
908 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
910 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
912 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
915 memset(mz->addr, 0, mz->len);
917 /* cmd_ring initialization */
918 ring->base = mz->addr;
919 ring->basePA = mz->phys_addr;
921 /* comp_ring initialization */
922 comp_ring->base = ring->base + ring->size;
923 comp_ring->basePA = ring->basePA +
924 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
926 /* data_ring initialization */
927 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
928 data_ring->basePA = comp_ring->basePA +
929 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
931 /* cmd_ring0 buf_info allocation */
932 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
933 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
934 if (ring->buf_info == NULL) {
935 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
939 /* Update the data portion with txq */
940 dev->data->tx_queues[queue_idx] = txq;
946 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
949 unsigned int socket_id,
950 __rte_unused const struct rte_eth_rxconf *rx_conf,
951 struct rte_mempool *mp)
953 const struct rte_memzone *mz;
954 struct vmxnet3_rx_queue *rxq;
955 struct vmxnet3_hw *hw = dev->data->dev_private;
956 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
957 struct vmxnet3_comp_ring *comp_ring;
962 PMD_INIT_FUNC_TRACE();
964 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
965 RTE_CACHE_LINE_SIZE);
967 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
972 rxq->queue_id = queue_idx;
973 rxq->port_id = dev->data->port_id;
974 rxq->shared = &hw->rqd_start[queue_idx];
976 rxq->qid1 = queue_idx;
977 rxq->qid2 = queue_idx + hw->num_rx_queues;
980 ring0 = &rxq->cmd_ring[0];
981 ring1 = &rxq->cmd_ring[1];
982 comp_ring = &rxq->comp_ring;
984 /* Rx vmxnet rings length should be between 256-4096 */
985 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
986 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
988 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
989 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
992 ring0->size = nb_desc;
993 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
994 ring1->size = ring0->size;
997 comp_ring->size = ring0->size + ring1->size;
999 /* Rx vmxnet rings structure initialization */
1000 ring0->next2fill = 0;
1001 ring1->next2fill = 0;
1002 ring0->next2comp = 0;
1003 ring1->next2comp = 0;
1004 ring0->gen = VMXNET3_INIT_GEN;
1005 ring1->gen = VMXNET3_INIT_GEN;
1006 comp_ring->next2proc = 0;
1007 comp_ring->gen = VMXNET3_INIT_GEN;
1009 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1010 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1012 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
1014 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1017 memset(mz->addr, 0, mz->len);
1019 /* cmd_ring0 initialization */
1020 ring0->base = mz->addr;
1021 ring0->basePA = mz->phys_addr;
1023 /* cmd_ring1 initialization */
1024 ring1->base = ring0->base + ring0->size;
1025 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1027 /* comp_ring initialization */
1028 comp_ring->base = ring1->base + ring1->size;
1029 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1032 /* cmd_ring0-cmd_ring1 buf_info allocation */
1033 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1035 ring = &rxq->cmd_ring[i];
1037 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1039 ring->buf_info = rte_zmalloc(mem_name,
1040 ring->size * sizeof(vmxnet3_buf_info_t),
1041 RTE_CACHE_LINE_SIZE);
1042 if (ring->buf_info == NULL) {
1043 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1048 /* Update the data portion with rxq */
1049 dev->data->rx_queues[queue_idx] = rxq;
1055 * Initializes Receive Unit
1056 * Load mbufs in rx queue in advance
1059 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1061 struct vmxnet3_hw *hw = dev->data->dev_private;
1066 PMD_INIT_FUNC_TRACE();
1068 for (i = 0; i < hw->num_rx_queues; i++) {
1069 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1071 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1072 /* Passing 0 as alloc_num will allocate full ring */
1073 ret = vmxnet3_post_rx_bufs(rxq, j);
1076 "ERROR: Posting Rxq: %d buffers ring: %d",
1081 * Updating device with the index:next2fill to fill the
1082 * mbufs for coming packets.
1084 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1085 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1086 rxq->cmd_ring[j].next2fill);
1089 rxq->stopped = FALSE;
1090 rxq->start_seg = NULL;
1093 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1094 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1096 txq->stopped = FALSE;
1102 static uint8_t rss_intel_key[40] = {
1103 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1104 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1105 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1106 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1107 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1111 * Configure RSS feature
1114 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1116 struct vmxnet3_hw *hw = dev->data->dev_private;
1117 struct VMXNET3_RSSConf *dev_rss_conf;
1118 struct rte_eth_rss_conf *port_rss_conf;
1122 PMD_INIT_FUNC_TRACE();
1124 dev_rss_conf = hw->rss_conf;
1125 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1127 /* loading hashFunc */
1128 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1129 /* loading hashKeySize */
1130 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1131 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1132 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1134 if (port_rss_conf->rss_key == NULL) {
1135 /* Default hash key */
1136 port_rss_conf->rss_key = rss_intel_key;
1139 /* loading hashKey */
1140 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1141 dev_rss_conf->hashKeySize);
1143 /* loading indTable */
1144 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1145 if (j == dev->data->nb_rx_queues)
1147 dev_rss_conf->indTable[i] = j;
1150 /* loading hashType */
1151 dev_rss_conf->hashType = 0;
1152 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1153 if (rss_hf & ETH_RSS_IPV4)
1154 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1155 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1156 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1157 if (rss_hf & ETH_RSS_IPV6)
1158 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1159 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1160 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1162 return VMXNET3_SUCCESS;