From 005e4d50416d15368a6687076c7c934f89470f72 Mon Sep 17 00:00:00 2001 From: Steven Date: Fri, 2 Nov 2018 16:28:52 -0700 Subject: [PATCH] vmxnet3: reduce calling vmxnet3_reg_write_inline In output.c, we buffer the descriptors and call vmxnet3_reg_write_inline once outside the loop. This change improves the performance dramatically. When refilling the ring, there is no need to inform the device unless explicitly specified by the device (ctrl.update_prod == 1) Change-Id: I7031d58bff0d249e913d14236d416c91eb6ab94a Signed-off-by: Steven --- src/plugins/vmxnet3/input.c | 5 +++-- src/plugins/vmxnet3/output.c | 11 +++++++---- src/plugins/vmxnet3/vmxnet3.h | 10 ++++++++-- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/plugins/vmxnet3/input.c b/src/plugins/vmxnet3/input.c index 9392d57747d..dffad6c7d47 100644 --- a/src/plugins/vmxnet3/input.c +++ b/src/plugins/vmxnet3/input.c @@ -100,8 +100,9 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, next = nexts; rx_comp = &rxq->rx_comp[comp_ring->next]; - while (PREDICT_TRUE (n_rx_packets < VLIB_FRAME_SIZE) && - (comp_ring->gen == (rx_comp->flags & VMXNET3_RXCF_GEN))) + while (PREDICT_TRUE ((n_rx_packets < VLIB_FRAME_SIZE) && + (comp_ring->gen == + (rx_comp->flags & VMXNET3_RXCF_GEN)))) { vlib_buffer_t *b0; u32 bi0; diff --git a/src/plugins/vmxnet3/output.c b/src/plugins/vmxnet3/output.c index c80ad0706cc..2886c31edf7 100644 --- a/src/plugins/vmxnet3/output.c +++ b/src/plugins/vmxnet3/output.c @@ -109,7 +109,7 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm, u16 n_left = frame->n_vectors; vmxnet3_txq_t *txq; u32 thread_index = vm->thread_index; - u16 qid = thread_index; + u16 qid = thread_index, produce; if (PREDICT_FALSE (!(vd->flags & VMXNET3_DEVICE_F_LINK_UP))) { @@ -124,7 +124,8 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm, vmxnet3_txq_release (vm, vd, txq); - while (n_left) + produce = txq->tx_ring.produce; + while (PREDICT_TRUE (n_left)) { u16 space_needed = 1, i; vlib_buffer_t *b; @@ -195,13 +196,15 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm, * Device can start reading the packet */ txq->tx_desc[first_idx].flags[0] ^= VMXNET3_TXF_GEN; - vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_TXPROD, - txq->tx_ring.produce); buffers++; n_left--; } + if (PREDICT_TRUE (produce != txq->tx_ring.produce)) + vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_TXPROD, + txq->tx_ring.produce); + clib_spinlock_unlock_if_init (&txq->lock); return (frame->n_vectors - n_left); diff --git a/src/plugins/vmxnet3/vmxnet3.h b/src/plugins/vmxnet3/vmxnet3.h index 87ccd407073..a3306895ec0 100644 --- a/src/plugins/vmxnet3/vmxnet3.h +++ b/src/plugins/vmxnet3/vmxnet3.h @@ -590,6 +590,7 @@ vmxnet3_rxq_refill_ring0 (vlib_main_t * vm, vmxnet3_device_t * vd, vmxnet3_rx_desc *rxd; u16 n_refill, n_alloc; vmxnet3_rx_ring *ring; + vmxnet3_queues *q; ring = &rxq->rx_ring[0]; n_refill = rxq->size - ring->fill; @@ -620,7 +621,9 @@ vmxnet3_rxq_refill_ring0 (vlib_main_t * vm, vmxnet3_device_t * vd, n_alloc--; } - vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_RXPROD, ring->produce); + q = &vd->dma->queues; + if (PREDICT_FALSE (q->rx.ctrl.update_prod)) + vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_RXPROD, ring->produce); return 0; } @@ -632,6 +635,7 @@ vmxnet3_rxq_refill_ring1 (vlib_main_t * vm, vmxnet3_device_t * vd, vmxnet3_rx_desc *rxd; u16 n_refill, n_alloc; vmxnet3_rx_ring *ring; + vmxnet3_queues *q; ring = &rxq->rx_ring[1]; n_refill = rxq->size - ring->fill; @@ -662,7 +666,9 @@ vmxnet3_rxq_refill_ring1 (vlib_main_t * vm, vmxnet3_device_t * vd, n_alloc--; } - vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_RXPROD2, ring->produce); + q = &vd->dma->queues; + if (PREDICT_FALSE (q->rx.ctrl.update_prod)) + vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_RXPROD2, ring->produce); return 0; } -- 2.16.6