vmxnet3: reduce calling vmxnet3_reg_write_inline 79/15679/3
authorSteven <sluong@cisco.com>
Fri, 2 Nov 2018 23:28:52 +0000 (16:28 -0700)
committerDamjan Marion <dmarion@me.com>
Wed, 7 Nov 2018 01:13:01 +0000 (01:13 +0000)
In output.c, we buffer the descriptors and call vmxnet3_reg_write_inline
once outside the loop. This change improves the performance dramatically.

When refilling the ring, there is no need to inform the device unless
explicitly specified by the device (ctrl.update_prod == 1)

Change-Id: I7031d58bff0d249e913d14236d416c91eb6ab94a
Signed-off-by: Steven <sluong@cisco.com>
src/plugins/vmxnet3/input.c
src/plugins/vmxnet3/output.c
src/plugins/vmxnet3/vmxnet3.h

index 9392d57..dffad6c 100644 (file)
@@ -100,8 +100,9 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
   next = nexts;
   rx_comp = &rxq->rx_comp[comp_ring->next];
 
-  while (PREDICT_TRUE (n_rx_packets < VLIB_FRAME_SIZE) &&
-        (comp_ring->gen == (rx_comp->flags & VMXNET3_RXCF_GEN)))
+  while (PREDICT_TRUE ((n_rx_packets < VLIB_FRAME_SIZE) &&
+                      (comp_ring->gen ==
+                       (rx_comp->flags & VMXNET3_RXCF_GEN))))
     {
       vlib_buffer_t *b0;
       u32 bi0;
index c80ad07..2886c31 100644 (file)
@@ -109,7 +109,7 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm,
   u16 n_left = frame->n_vectors;
   vmxnet3_txq_t *txq;
   u32 thread_index = vm->thread_index;
-  u16 qid = thread_index;
+  u16 qid = thread_index, produce;
 
   if (PREDICT_FALSE (!(vd->flags & VMXNET3_DEVICE_F_LINK_UP)))
     {
@@ -124,7 +124,8 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm,
 
   vmxnet3_txq_release (vm, vd, txq);
 
-  while (n_left)
+  produce = txq->tx_ring.produce;
+  while (PREDICT_TRUE (n_left))
     {
       u16 space_needed = 1, i;
       vlib_buffer_t *b;
@@ -195,13 +196,15 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm,
        * Device can start reading the packet
        */
       txq->tx_desc[first_idx].flags[0] ^= VMXNET3_TXF_GEN;
-      vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_TXPROD,
-                               txq->tx_ring.produce);
 
       buffers++;
       n_left--;
     }
 
+  if (PREDICT_TRUE (produce != txq->tx_ring.produce))
+    vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_TXPROD,
+                             txq->tx_ring.produce);
+
   clib_spinlock_unlock_if_init (&txq->lock);
 
   return (frame->n_vectors - n_left);
index 87ccd40..a330689 100644 (file)
@@ -590,6 +590,7 @@ vmxnet3_rxq_refill_ring0 (vlib_main_t * vm, vmxnet3_device_t * vd,
   vmxnet3_rx_desc *rxd;
   u16 n_refill, n_alloc;
   vmxnet3_rx_ring *ring;
+  vmxnet3_queues *q;
 
   ring = &rxq->rx_ring[0];
   n_refill = rxq->size - ring->fill;
@@ -620,7 +621,9 @@ vmxnet3_rxq_refill_ring0 (vlib_main_t * vm, vmxnet3_device_t * vd,
       n_alloc--;
     }
 
-  vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_RXPROD, ring->produce);
+  q = &vd->dma->queues;
+  if (PREDICT_FALSE (q->rx.ctrl.update_prod))
+    vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_RXPROD, ring->produce);
 
   return 0;
 }
@@ -632,6 +635,7 @@ vmxnet3_rxq_refill_ring1 (vlib_main_t * vm, vmxnet3_device_t * vd,
   vmxnet3_rx_desc *rxd;
   u16 n_refill, n_alloc;
   vmxnet3_rx_ring *ring;
+  vmxnet3_queues *q;
 
   ring = &rxq->rx_ring[1];
   n_refill = rxq->size - ring->fill;
@@ -662,7 +666,9 @@ vmxnet3_rxq_refill_ring1 (vlib_main_t * vm, vmxnet3_device_t * vd,
       n_alloc--;
     }
 
-  vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_RXPROD2, ring->produce);
+  q = &vd->dma->queues;
+  if (PREDICT_FALSE (q->rx.ctrl.update_prod))
+    vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_RXPROD2, ring->produce);
 
   return 0;
 }