4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
82 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
83 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
84 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
85 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
86 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
89 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
91 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
99 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
100 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
102 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
103 (unsigned long)rxq->cmd_ring[0].basePA,
104 (unsigned long)rxq->cmd_ring[1].basePA,
105 (unsigned long)rxq->comp_ring.basePA);
107 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
109 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
110 (uint32_t)rxq->cmd_ring[0].size, avail,
111 rxq->comp_ring.next2proc,
112 rxq->cmd_ring[0].size - avail);
114 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
115 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
116 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
117 rxq->cmd_ring[1].size - avail);
122 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
129 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
130 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
131 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
132 (unsigned long)txq->cmd_ring.basePA,
133 (unsigned long)txq->comp_ring.basePA,
134 (unsigned long)txq->data_ring.basePA);
136 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
137 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
138 (uint32_t)txq->cmd_ring.size, avail,
139 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
144 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
146 while (ring->next2comp != ring->next2fill) {
147 /* No need to worry about desc ownership, device is quiesced by now. */
148 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
151 rte_pktmbuf_free(buf_info->m);
156 vmxnet3_cmd_ring_adv_next2comp(ring);
161 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
165 for (i = 0; i < ring->size; i++) {
166 /* No need to worry about desc ownership, device is quiesced by now. */
167 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
170 rte_pktmbuf_free_seg(buf_info->m);
175 vmxnet3_cmd_ring_adv_next2comp(ring);
180 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
182 rte_free(ring->buf_info);
183 ring->buf_info = NULL;
188 vmxnet3_dev_tx_queue_release(void *txq)
190 vmxnet3_tx_queue_t *tq = txq;
194 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
195 /* Release the cmd_ring */
196 vmxnet3_cmd_ring_release(&tq->cmd_ring);
201 vmxnet3_dev_rx_queue_release(void *rxq)
204 vmxnet3_rx_queue_t *rq = rxq;
208 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
209 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
211 /* Release both the cmd_rings */
212 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
213 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
218 vmxnet3_dev_tx_queue_reset(void *txq)
220 vmxnet3_tx_queue_t *tq = txq;
221 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
222 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
223 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
227 /* Release the cmd_ring mbufs */
228 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
231 /* Tx vmxnet rings structure initialization*/
234 ring->gen = VMXNET3_INIT_GEN;
235 comp_ring->next2proc = 0;
236 comp_ring->gen = VMXNET3_INIT_GEN;
238 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
239 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
240 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
242 memset(ring->base, 0, size);
246 vmxnet3_dev_rx_queue_reset(void *rxq)
249 vmxnet3_rx_queue_t *rq = rxq;
250 struct vmxnet3_cmd_ring *ring0, *ring1;
251 struct vmxnet3_comp_ring *comp_ring;
255 /* Release both the cmd_rings mbufs */
256 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
257 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
260 ring0 = &rq->cmd_ring[0];
261 ring1 = &rq->cmd_ring[1];
262 comp_ring = &rq->comp_ring;
264 /* Rx vmxnet rings structure initialization */
265 ring0->next2fill = 0;
266 ring1->next2fill = 0;
267 ring0->next2comp = 0;
268 ring1->next2comp = 0;
269 ring0->gen = VMXNET3_INIT_GEN;
270 ring1->gen = VMXNET3_INIT_GEN;
271 comp_ring->next2proc = 0;
272 comp_ring->gen = VMXNET3_INIT_GEN;
274 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
275 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
277 memset(ring0->base, 0, size);
281 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
285 PMD_INIT_FUNC_TRACE();
287 for (i = 0; i < dev->data->nb_tx_queues; i++) {
288 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
292 vmxnet3_dev_tx_queue_reset(txq);
296 for (i = 0; i < dev->data->nb_rx_queues; i++) {
297 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
301 vmxnet3_dev_rx_queue_reset(rxq);
307 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
310 struct rte_mbuf *mbuf;
312 /* Release cmd_ring descriptor and free mbuf */
313 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
315 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
317 rte_panic("EOP desc does not point to a valid mbuf");
318 rte_pktmbuf_free(mbuf);
320 txq->cmd_ring.buf_info[eop_idx].m = NULL;
322 while (txq->cmd_ring.next2comp != eop_idx) {
323 /* no out-of-order completion */
324 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
325 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
329 /* Mark the txd for which tcd was generated as completed */
330 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
332 return completed + 1;
336 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
339 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
340 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
341 (comp_ring->base + comp_ring->next2proc);
343 while (tcd->gen == comp_ring->gen) {
344 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
346 vmxnet3_comp_ring_adv_next2proc(comp_ring);
347 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
348 comp_ring->next2proc);
351 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
355 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
359 vmxnet3_tx_queue_t *txq = tx_queue;
360 struct vmxnet3_hw *hw = txq->hw;
361 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
362 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
364 if (unlikely(txq->stopped)) {
365 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
369 /* Free up the comp_descriptors aggressively */
370 vmxnet3_tq_tx_complete(txq);
373 while (nb_tx < nb_pkts) {
374 Vmxnet3_GenericDesc *gdesc;
375 vmxnet3_buf_info_t *tbi;
376 uint32_t first2fill, avail, dw2;
377 struct rte_mbuf *txm = tx_pkts[nb_tx];
378 struct rte_mbuf *m_seg = txm;
380 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
381 /* # of descriptors needed for a packet. */
382 unsigned count = txm->nb_segs;
384 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
386 /* Is command ring full? */
387 if (unlikely(avail == 0)) {
388 PMD_TX_LOG(DEBUG, "No free ring descriptors");
389 txq->stats.tx_ring_full++;
390 txq->stats.drop_total += (nb_pkts - nb_tx);
394 /* Command ring is not full but cannot handle the
395 * multi-segmented packet. Let's try the next packet
398 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
399 "(avail %d needed %d)", avail, count);
400 txq->stats.drop_total++;
402 txq->stats.drop_tso++;
403 rte_pktmbuf_free(txm);
408 /* Drop non-TSO packet that is excessively fragmented */
409 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
410 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
411 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
412 txq->stats.drop_too_many_segs++;
413 txq->stats.drop_total++;
414 rte_pktmbuf_free(txm);
419 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
420 struct Vmxnet3_TxDataDesc *tdd;
422 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
423 copy_size = rte_pktmbuf_pkt_len(txm);
424 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
427 /* use the previous gen bit for the SOP desc */
428 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
429 first2fill = txq->cmd_ring.next2fill;
431 /* Remember the transmit buffer for cleanup */
432 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
434 /* NB: the following assumes that VMXNET3 maximum
435 * transmit buffer size (16K) is greater than
436 * maximum size of mbuf segment size.
438 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
440 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
441 txq->cmd_ring.next2fill *
442 sizeof(struct Vmxnet3_TxDataDesc));
444 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
446 gdesc->dword[2] = dw2 | m_seg->data_len;
449 /* move to the next2fill descriptor */
450 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
452 /* use the right gen for non-SOP desc */
453 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
454 } while ((m_seg = m_seg->next) != NULL);
456 /* set the last buf_info for the pkt */
458 /* Update the EOP descriptor */
459 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
461 /* Add VLAN tag if present */
462 gdesc = txq->cmd_ring.base + first2fill;
463 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
465 gdesc->txd.tci = txm->vlan_tci;
469 uint16_t mss = txm->tso_segsz;
473 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
474 gdesc->txd.om = VMXNET3_OM_TSO;
475 gdesc->txd.msscof = mss;
477 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
478 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
479 gdesc->txd.om = VMXNET3_OM_CSUM;
480 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
482 switch (txm->ol_flags & PKT_TX_L4_MASK) {
483 case PKT_TX_TCP_CKSUM:
484 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
486 case PKT_TX_UDP_CKSUM:
487 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
490 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
491 txm->ol_flags & PKT_TX_L4_MASK);
497 gdesc->txd.om = VMXNET3_OM_NONE;
498 gdesc->txd.msscof = 0;
502 /* flip the GEN bit on the SOP */
503 rte_compiler_barrier();
504 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
506 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
510 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
512 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
513 txq_ctrl->txNumDeferred = 0;
514 /* Notify vSwitch that packets are available. */
515 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
516 txq->cmd_ring.next2fill);
523 * Allocates mbufs and clusters. Post rx descriptors with buffer details
524 * so that device can receive packets in those buffers.
526 * Among the two rings, 1st ring contains buffers of type 0 and type1.
527 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
528 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
529 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
534 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
537 uint32_t i = 0, val = 0;
538 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
541 /* Usually: One HEAD type buf per packet
542 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
543 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
546 /* We use single packet buffer so all heads here */
547 val = VMXNET3_RXD_BTYPE_HEAD;
549 /* All BODY type buffers for 2nd ring */
550 val = VMXNET3_RXD_BTYPE_BODY;
553 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
554 struct Vmxnet3_RxDesc *rxd;
555 struct rte_mbuf *mbuf;
556 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
558 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
560 /* Allocate blank mbuf for the current Rx Descriptor */
561 mbuf = rte_mbuf_raw_alloc(rxq->mp);
562 if (unlikely(mbuf == NULL)) {
563 PMD_RX_LOG(ERR, "Error allocating mbuf");
564 rxq->stats.rx_buf_alloc_failure++;
570 * Load mbuf pointer into buf_info[ring_size]
571 * buf_info structure is equivalent to cookie for virtio-virtqueue
574 buf_info->len = (uint16_t)(mbuf->buf_len -
575 RTE_PKTMBUF_HEADROOM);
577 rte_mbuf_data_dma_addr_default(mbuf);
579 /* Load Rx Descriptor with the buffer's GPA */
580 rxd->addr = buf_info->bufPA;
582 /* After this point rxd->addr MUST not be NULL */
584 rxd->len = buf_info->len;
585 /* Flip gen bit at the end to change ownership */
586 rxd->gen = ring->gen;
588 vmxnet3_cmd_ring_adv_next2fill(ring);
592 /* Return error only if no buffers are posted at present */
593 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
600 /* Receive side checksum and other offloads */
602 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
605 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
606 rxm->ol_flags |= PKT_RX_RSS_HASH;
607 rxm->hash.rss = rcd->rssHash;
610 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
612 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
613 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
615 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
616 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
618 rxm->packet_type = RTE_PTYPE_L3_IPV4;
622 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
624 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
625 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
631 * Process the Rx Completion Ring of given vmxnet3_rx_queue
632 * for nb_pkts burst and return the number of packets received
635 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
638 uint32_t nb_rxd, idx;
640 vmxnet3_rx_queue_t *rxq;
641 Vmxnet3_RxCompDesc *rcd;
642 vmxnet3_buf_info_t *rbi;
644 struct rte_mbuf *rxm = NULL;
645 struct vmxnet3_hw *hw;
655 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
657 if (unlikely(rxq->stopped)) {
658 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
662 while (rcd->gen == rxq->comp_ring.gen) {
663 if (nb_rx >= nb_pkts)
667 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
668 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
669 RTE_SET_USED(rxd); /* used only for assert when enabled */
670 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
672 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
674 RTE_ASSERT(rcd->len <= rxd->len);
677 /* Get the packet buffer pointer from buf_info */
680 /* Clear descriptor associated buf_info to be reused */
684 /* Update the index that we received a packet */
685 rxq->cmd_ring[ring_idx].next2comp = idx;
687 /* For RCD with EOP set, check if there is frame error */
688 if (unlikely(rcd->eop && rcd->err)) {
689 rxq->stats.drop_total++;
690 rxq->stats.drop_err++;
693 rxq->stats.drop_fcs++;
694 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
696 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
697 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
698 rxq->comp_ring.base), rcd->rxdIdx);
699 rte_pktmbuf_free_seg(rxm);
704 /* Initialize newly received packet buffer */
705 rxm->port = rxq->port_id;
708 rxm->pkt_len = (uint16_t)rcd->len;
709 rxm->data_len = (uint16_t)rcd->len;
710 rxm->data_off = RTE_PKTMBUF_HEADROOM;
715 * If this is the first buffer of the received packet,
716 * set the pointer to the first mbuf of the packet
717 * Otherwise, update the total length and the number of segments
718 * of the current scattered packet, and update the pointer to
719 * the last mbuf of the current packet.
722 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
724 if (unlikely(rcd->len == 0)) {
725 RTE_ASSERT(rcd->eop);
728 "Rx buf was skipped. rxring[%d][%d])",
730 rte_pktmbuf_free_seg(rxm);
734 rxq->start_seg = rxm;
735 vmxnet3_rx_offload(rcd, rxm);
737 struct rte_mbuf *start = rxq->start_seg;
739 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
741 start->pkt_len += rxm->data_len;
744 rxq->last_seg->next = rxm;
749 struct rte_mbuf *start = rxq->start_seg;
751 /* Check for hardware stripped VLAN tag */
753 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
754 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
757 rx_pkts[nb_rx++] = start;
758 rxq->start_seg = NULL;
762 rxq->cmd_ring[ring_idx].next2comp = idx;
763 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
765 /* It's time to allocate some new buf and renew descriptors */
766 vmxnet3_post_rx_bufs(rxq, ring_idx);
767 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
768 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
769 rxq->cmd_ring[ring_idx].next2fill);
772 /* Advance to the next descriptor in comp_ring */
773 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
775 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
777 if (nb_rxd > rxq->cmd_ring[0].size) {
779 "Used up quota of receiving packets,"
780 " relinquish control.");
789 * Create memzone for device rings. malloc can't be used as the physical address is
790 * needed. If the memzone is already created, then this function returns a ptr
793 static const struct rte_memzone *
794 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
795 uint16_t queue_id, uint32_t ring_size, int socket_id)
797 char z_name[RTE_MEMZONE_NAMESIZE];
798 const struct rte_memzone *mz;
800 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
801 dev->driver->pci_drv.name, ring_name,
802 dev->data->port_id, queue_id);
804 mz = rte_memzone_lookup(z_name);
808 return rte_memzone_reserve_aligned(z_name, ring_size,
809 socket_id, 0, VMXNET3_RING_BA_ALIGN);
813 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
816 unsigned int socket_id,
817 __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
819 struct vmxnet3_hw *hw = dev->data->dev_private;
820 const struct rte_memzone *mz;
821 struct vmxnet3_tx_queue *txq;
822 struct vmxnet3_cmd_ring *ring;
823 struct vmxnet3_comp_ring *comp_ring;
824 struct vmxnet3_data_ring *data_ring;
827 PMD_INIT_FUNC_TRACE();
829 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
830 ETH_TXQ_FLAGS_NOXSUMSCTP) {
831 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
835 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
837 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
841 txq->queue_id = queue_idx;
842 txq->port_id = dev->data->port_id;
843 txq->shared = &hw->tqd_start[queue_idx];
845 txq->qid = queue_idx;
848 ring = &txq->cmd_ring;
849 comp_ring = &txq->comp_ring;
850 data_ring = &txq->data_ring;
852 /* Tx vmxnet ring length should be between 512-4096 */
853 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
854 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
855 VMXNET3_DEF_TX_RING_SIZE);
857 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
858 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
859 VMXNET3_TX_RING_MAX_SIZE);
862 ring->size = nb_desc;
863 ring->size &= ~VMXNET3_RING_SIZE_MASK;
865 comp_ring->size = data_ring->size = ring->size;
867 /* Tx vmxnet rings structure initialization*/
870 ring->gen = VMXNET3_INIT_GEN;
871 comp_ring->next2proc = 0;
872 comp_ring->gen = VMXNET3_INIT_GEN;
874 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
875 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
876 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
878 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
880 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
883 memset(mz->addr, 0, mz->len);
885 /* cmd_ring initialization */
886 ring->base = mz->addr;
887 ring->basePA = mz->phys_addr;
889 /* comp_ring initialization */
890 comp_ring->base = ring->base + ring->size;
891 comp_ring->basePA = ring->basePA +
892 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
894 /* data_ring initialization */
895 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
896 data_ring->basePA = comp_ring->basePA +
897 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
899 /* cmd_ring0 buf_info allocation */
900 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
901 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
902 if (ring->buf_info == NULL) {
903 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
907 /* Update the data portion with txq */
908 dev->data->tx_queues[queue_idx] = txq;
914 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
917 unsigned int socket_id,
918 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
919 struct rte_mempool *mp)
921 const struct rte_memzone *mz;
922 struct vmxnet3_rx_queue *rxq;
923 struct vmxnet3_hw *hw = dev->data->dev_private;
924 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
925 struct vmxnet3_comp_ring *comp_ring;
930 PMD_INIT_FUNC_TRACE();
932 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
934 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
939 rxq->queue_id = queue_idx;
940 rxq->port_id = dev->data->port_id;
941 rxq->shared = &hw->rqd_start[queue_idx];
943 rxq->qid1 = queue_idx;
944 rxq->qid2 = queue_idx + hw->num_rx_queues;
947 ring0 = &rxq->cmd_ring[0];
948 ring1 = &rxq->cmd_ring[1];
949 comp_ring = &rxq->comp_ring;
951 /* Rx vmxnet rings length should be between 256-4096 */
952 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
953 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
955 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
956 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
959 ring0->size = nb_desc;
960 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
961 ring1->size = ring0->size;
964 comp_ring->size = ring0->size + ring1->size;
966 /* Rx vmxnet rings structure initialization */
967 ring0->next2fill = 0;
968 ring1->next2fill = 0;
969 ring0->next2comp = 0;
970 ring1->next2comp = 0;
971 ring0->gen = VMXNET3_INIT_GEN;
972 ring1->gen = VMXNET3_INIT_GEN;
973 comp_ring->next2proc = 0;
974 comp_ring->gen = VMXNET3_INIT_GEN;
976 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
977 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
979 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
981 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
984 memset(mz->addr, 0, mz->len);
986 /* cmd_ring0 initialization */
987 ring0->base = mz->addr;
988 ring0->basePA = mz->phys_addr;
990 /* cmd_ring1 initialization */
991 ring1->base = ring0->base + ring0->size;
992 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
994 /* comp_ring initialization */
995 comp_ring->base = ring1->base + ring1->size;
996 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
999 /* cmd_ring0-cmd_ring1 buf_info allocation */
1000 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1002 ring = &rxq->cmd_ring[i];
1004 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1006 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1007 if (ring->buf_info == NULL) {
1008 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1013 /* Update the data portion with rxq */
1014 dev->data->rx_queues[queue_idx] = rxq;
1020 * Initializes Receive Unit
1021 * Load mbufs in rx queue in advance
1024 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1026 struct vmxnet3_hw *hw = dev->data->dev_private;
1031 PMD_INIT_FUNC_TRACE();
1033 for (i = 0; i < hw->num_rx_queues; i++) {
1034 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1036 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1037 /* Passing 0 as alloc_num will allocate full ring */
1038 ret = vmxnet3_post_rx_bufs(rxq, j);
1040 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
1043 /* Updating device with the index:next2fill to fill the mbufs for coming packets */
1044 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1045 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1046 rxq->cmd_ring[j].next2fill);
1049 rxq->stopped = FALSE;
1050 rxq->start_seg = NULL;
1053 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1054 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1056 txq->stopped = FALSE;
1062 static uint8_t rss_intel_key[40] = {
1063 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1064 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1065 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1066 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1067 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1071 * Configure RSS feature
1074 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1076 struct vmxnet3_hw *hw = dev->data->dev_private;
1077 struct VMXNET3_RSSConf *dev_rss_conf;
1078 struct rte_eth_rss_conf *port_rss_conf;
1082 PMD_INIT_FUNC_TRACE();
1084 dev_rss_conf = hw->rss_conf;
1085 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1087 /* loading hashFunc */
1088 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1089 /* loading hashKeySize */
1090 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1091 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1092 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1094 if (port_rss_conf->rss_key == NULL) {
1095 /* Default hash key */
1096 port_rss_conf->rss_key = rss_intel_key;
1099 /* loading hashKey */
1100 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1102 /* loading indTable */
1103 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1104 if (j == dev->data->nb_rx_queues)
1106 dev_rss_conf->indTable[i] = j;
1109 /* loading hashType */
1110 dev_rss_conf->hashType = 0;
1111 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1112 if (rss_hf & ETH_RSS_IPV4)
1113 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1114 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1115 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1116 if (rss_hf & ETH_RSS_IPV6)
1117 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1118 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1119 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1121 return VMXNET3_SUCCESS;