bond: tx performance enhancement part deux 29/15029/4
authorSteven <sluong@cisco.com>
Fri, 28 Sep 2018 03:06:26 +0000 (20:06 -0700)
committerDamjan Marion <dmarion@me.com>
Fri, 28 Sep 2018 19:39:50 +0000 (19:39 +0000)
- Reduce per packet cost by buffering the output packet buffer indexes in the queue and
process the queue outside the packet processing loop.
- Move unnecessary variable initialization outside of the while loop.
- There is no need to save the old interface if tracing is not enabled.

Test result for 256 bytes packet comparison. Other packet size shows similar improvement.

With the patch
--------------
BondEthernet0-output             active              52836        13526016               0          1.71e1          256.00
BondEthernet0-tx                 active              52836        13526016               0          2.68e1          256.00
TenGigabitEthernet6/0/0-output   active              52836         6762896               0          9.17e0          127.99
TenGigabitEthernet6/0/0-tx       active              52836         6762896               0          6.97e1          127.99
TenGigabitEthernet6/0/1-output   active              52836         6763120               0          9.40e0          128.00
TenGigabitEthernet6/0/1-tx       active              52836         6763120               0          7.00e1          128.00
bond-input                       active              52836        13526016               0          1.76e1          256.00

Without the patch
-----------------
BondEthernet0-output             active              60858        15579648               0          1.73e1          256.00
BondEthernet0-tx                 active              60858        15579648               0          2.94e1          256.00
TenGigabitEthernet6/0/0-output   active              60858         7789626               0          9.29e0          127.99
TenGigabitEthernet6/0/0-tx       active              60858         7789626               0          7.01e1          127.99
TenGigabitEthernet6/0/1-output   active              60858         7790022               0          9.31e0          128.00
TenGigabitEthernet6/0/1-tx       active              60858         7790022               0          7.10e1          128.00
bond-input                       active              60858        15579648               0          1.77e1          256.00

Change-Id: Ib6d73a63ceeaa2f1397ceaf4c5391c57fd865b04
Signed-off-by: Steven <sluong@cisco.com>
src/vnet/bonding/cli.c
src/vnet/bonding/device.c
src/vnet/bonding/node.h

index ec34b47..522d13a 100644 (file)
@@ -198,7 +198,6 @@ bond_delete_if (vlib_main_t * vm, u32 sw_if_index)
   slave_if_t *sif;
   vnet_hw_interface_t *hw;
   u32 *sif_sw_if_index;
-  u32 thread_index;
   u32 **s_list = 0;
   u32 i;
 
@@ -232,12 +231,6 @@ bond_delete_if (vlib_main_t * vm, u32 sw_if_index)
 
   clib_bitmap_free (bif->port_number_bitmap);
   hash_unset (bm->bond_by_sw_if_index, bif->sw_if_index);
-  for (thread_index = 0; thread_index < vlib_get_thread_main ()->n_vlib_mains;
-       thread_index++)
-    {
-      vec_free (bif->per_thread_info[thread_index].frame);
-    }
-  vec_free (bif->per_thread_info);
   memset (bif, 0, sizeof (*bif));
   pool_put (bm->interfaces, bif);
 
@@ -310,9 +303,6 @@ bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args)
   sw = vnet_get_hw_sw_interface (vnm, bif->hw_if_index);
   bif->sw_if_index = sw->sw_if_index;
   bif->group = bif->sw_if_index;
-  vec_validate_aligned (bif->per_thread_info,
-                       vlib_get_thread_main ()->n_vlib_mains - 1,
-                       CLIB_CACHE_LINE_BYTES);
   if (vlib_get_thread_main ()->n_vlib_mains > 1)
     clib_spinlock_init (&bif->lockp);
 
@@ -431,6 +421,8 @@ bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args)
   vnet_interface_main_t *im = &vnm->interface_main;
   vnet_hw_interface_t *bif_hw, *sif_hw;
   vnet_sw_interface_t *sw;
+  u32 thread_index;
+  u32 sif_if_index;
 
   bif = bond_get_master_by_sw_if_index (args->group);
   if (!bif)
@@ -527,6 +519,20 @@ bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args)
       bond_enable_collecting_distributing (vm, sif);
     }
 
+  vec_foreach_index (thread_index, bm->per_thread_data)
+  {
+    bond_per_thread_data_t *ptd = vec_elt_at_index (bm->per_thread_data,
+                                                   thread_index);
+
+    vec_validate_aligned (ptd->per_port_queue, sif->sw_if_index,
+                         CLIB_CACHE_LINE_BYTES);
+
+    vec_foreach_index (sif_if_index, ptd->per_port_queue)
+    {
+      ptd->per_port_queue[sif_if_index].n_buffers = 0;
+    }
+  }
+
   args->rv = vnet_feature_enable_disable ("device-input", "bond-input",
                                          sif_hw->hw_if_index, 1, 0, 0);
 
@@ -755,6 +761,9 @@ bond_cli_init (vlib_main_t * vm)
   bm->vlib_main = vm;
   bm->vnet_main = vnet_get_main ();
   vec_validate_aligned (bm->slave_by_sw_if_index, 1, CLIB_CACHE_LINE_BYTES);
+  vec_validate_aligned (bm->per_thread_data,
+                       vlib_get_thread_main ()->n_vlib_mains - 1,
+                       CLIB_CACHE_LINE_BYTES);
 
   return 0;
 }
index 7f98284..79ca2fa 100644 (file)
@@ -136,29 +136,25 @@ bond_load_balance_broadcast (vlib_main_t * vm, vlib_node_runtime_t * node,
                             bond_if_t * bif, vlib_buffer_t * b0,
                             uword slave_count)
 {
-  vnet_main_t *vnm = vnet_get_main ();
+  bond_main_t *bm = &bond_main;
   vlib_buffer_t *c0;
   int port;
-  u32 *to_next = 0;
   u32 sw_if_index;
-  vlib_frame_t *f;
   u16 thread_index = vm->thread_index;
+  bond_per_thread_data_t *ptd = vec_elt_at_index (bm->per_thread_data,
+                                                 thread_index);
 
   for (port = 1; port < slave_count; port++)
     {
       sw_if_index = *vec_elt_at_index (bif->active_slaves, port);
-      if (bif->per_thread_info[thread_index].frame[port] == 0)
-       bif->per_thread_info[thread_index].frame[port] =
-         vnet_get_frame_to_sw_interface (vnm, sw_if_index);
-      f = bif->per_thread_info[thread_index].frame[port];
-      to_next = vlib_frame_vector_args (f);
-      to_next += f->n_vectors;
       c0 = vlib_buffer_copy (vm, b0);
       if (PREDICT_TRUE (c0 != 0))
        {
          vnet_buffer (c0)->sw_if_index[VLIB_TX] = sw_if_index;
-         to_next[0] = vlib_get_buffer_index (vm, c0);
-         f->n_vectors++;
+         ptd->per_port_queue[sw_if_index].buffers[ptd->per_port_queue
+                                                  [sw_if_index].n_buffers] =
+           vlib_get_buffer_index (vm, c0);
+         ptd->per_port_queue[sw_if_index].n_buffers++;
        }
     }
 
@@ -399,16 +395,18 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
   u32 *from = vlib_frame_vector_args (frame);
   ethernet_header_t *eth;
-  u32 port, n_left;
-  u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3;
+  u32 n_left;
+  u32 sw_if_index;
   bond_packet_trace_t *t0;
   uword n_trace = vlib_get_trace_count (vm, node);
   u16 thread_index = vm->thread_index;
   vnet_main_t *vnm = vnet_get_main ();
   u32 *to_next;
-  u32 sif_if_index, sif_if_index1, sif_if_index2, sif_if_index3;
   vlib_frame_t *f;
   uword slave_count;
+  u32 port0 = 0, port1 = 0, port2 = 0, port3 = 0;
+  bond_per_thread_data_t *ptd = vec_elt_at_index (bm->per_thread_data,
+                                                 thread_index);
 
   if (PREDICT_FALSE (bif->admin_up == 0))
     {
@@ -438,14 +436,10 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
       return frame->n_vectors;
     }
 
-  vec_validate_aligned (bif->per_thread_info[thread_index].frame, slave_count,
-                       CLIB_CACHE_LINE_BYTES);
-
   b = bufs;
   while (n_left >= 4)
     {
-      u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0;
-      u32 port0 = 0, port1 = 0, port2 = 0, port3 = 0;
+      u32 sif_if_index0, sif_if_index1, sif_if_index2, sif_if_index3;
 
       // Prefetch next iteration
       if (n_left >= 8)
@@ -468,11 +462,6 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]);
       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]);
 
-      sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
-      sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
-      sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
-      sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_TX];
-
       if (PREDICT_TRUE (slave_count > 1))
        {
          port0 =
@@ -493,69 +482,22 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
                                                             slave_count);
        }
 
-      sif_if_index = *vec_elt_at_index (bif->active_slaves, port0);
+      sif_if_index0 = *vec_elt_at_index (bif->active_slaves, port0);
       sif_if_index1 = *vec_elt_at_index (bif->active_slaves, port1);
       sif_if_index2 = *vec_elt_at_index (bif->active_slaves, port2);
       sif_if_index3 = *vec_elt_at_index (bif->active_slaves, port3);
 
-      vnet_buffer (b[0])->sw_if_index[VLIB_TX] = sif_if_index;
-      vnet_buffer (b[1])->sw_if_index[VLIB_TX] = sif_if_index1;
-      vnet_buffer (b[2])->sw_if_index[VLIB_TX] = sif_if_index2;
-      vnet_buffer (b[3])->sw_if_index[VLIB_TX] = sif_if_index3;
-
-      if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port0]
-                         == 0)))
-       bif->per_thread_info[thread_index].frame[port0] =
-         vnet_get_frame_to_sw_interface (vnm, sif_if_index);
-
-      if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port1]
-                         == 0)))
-       bif->per_thread_info[thread_index].frame[port1] =
-         vnet_get_frame_to_sw_interface (vnm, sif_if_index1);
-
-      if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port2]
-                         == 0)))
-       bif->per_thread_info[thread_index].frame[port2] =
-         vnet_get_frame_to_sw_interface (vnm, sif_if_index2);
-
-      if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port3]
-                         == 0)))
-       bif->per_thread_info[thread_index].frame[port3] =
-         vnet_get_frame_to_sw_interface (vnm, sif_if_index3);
-
-      f = bif->per_thread_info[thread_index].frame[port0];
-      to_next = vlib_frame_vector_args (f);
-      to_next += f->n_vectors;
-      to_next[0] = vlib_get_buffer_index (vm, b[0]);
-      f->n_vectors++;
-
-      f = bif->per_thread_info[thread_index].frame[port1];
-      to_next = vlib_frame_vector_args (f);
-      to_next += f->n_vectors;
-      to_next[0] = vlib_get_buffer_index (vm, b[1]);
-      f->n_vectors++;
-
-      f = bif->per_thread_info[thread_index].frame[port2];
-      to_next = vlib_frame_vector_args (f);
-      to_next += f->n_vectors;
-      to_next[0] = vlib_get_buffer_index (vm, b[2]);
-      f->n_vectors++;
-
-      f = bif->per_thread_info[thread_index].frame[port3];
-      to_next = vlib_frame_vector_args (f);
-      to_next += f->n_vectors;
-      to_next[0] = vlib_get_buffer_index (vm, b[3]);
-      f->n_vectors++;
-
+      /* Do the tracing before the interface is overwritten */
       if (PREDICT_FALSE (n_trace > 0))
        {
+         u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0;
          vlib_trace_buffer (vm, node, next0, b[0], 0 /* follow_chain */ );
          vlib_set_trace_count (vm, node, --n_trace);
          t0 = vlib_add_trace (vm, node, b[0], sizeof (*t0));
          eth = (ethernet_header_t *) vlib_buffer_get_current (b[0]);
          t0->ethernet = *eth;
-         t0->sw_if_index = sw_if_index;
-         t0->bond_sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+         t0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+         t0->bond_sw_if_index = sif_if_index0;
 
          if (PREDICT_TRUE (n_trace > 0))
            {
@@ -565,8 +507,8 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
              t0 = vlib_add_trace (vm, node, b[1], sizeof (*t0));
              eth = (ethernet_header_t *) vlib_buffer_get_current (b[1]);
              t0->ethernet = *eth;
-             t0->sw_if_index = sw_if_index1;
-             t0->bond_sw_if_index = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
+             t0->sw_if_index = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
+             t0->bond_sw_if_index = sif_if_index1;
 
              if (PREDICT_TRUE (n_trace > 0))
                {
@@ -576,9 +518,8 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
                  t0 = vlib_add_trace (vm, node, b[2], sizeof (*t0));
                  eth = (ethernet_header_t *) vlib_buffer_get_current (b[2]);
                  t0->ethernet = *eth;
-                 t0->sw_if_index = sw_if_index2;
-                 t0->bond_sw_if_index =
-                   vnet_buffer (b[2])->sw_if_index[VLIB_TX];
+                 t0->sw_if_index = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
+                 t0->bond_sw_if_index = sif_if_index2;
 
                  if (PREDICT_TRUE (n_trace > 0))
                    {
@@ -589,68 +530,93 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
                      eth =
                        (ethernet_header_t *) vlib_buffer_get_current (b[3]);
                      t0->ethernet = *eth;
-                     t0->sw_if_index = sw_if_index3;
-                     t0->bond_sw_if_index =
+                     t0->sw_if_index =
                        vnet_buffer (b[3])->sw_if_index[VLIB_TX];
+                     t0->bond_sw_if_index = sif_if_index3;
                    }
                }
            }
        }
-      from += 4;
+
+      vnet_buffer (b[0])->sw_if_index[VLIB_TX] = sif_if_index0;
+      vnet_buffer (b[1])->sw_if_index[VLIB_TX] = sif_if_index1;
+      vnet_buffer (b[2])->sw_if_index[VLIB_TX] = sif_if_index2;
+      vnet_buffer (b[3])->sw_if_index[VLIB_TX] = sif_if_index3;
+
+      ptd->per_port_queue[sif_if_index0].buffers[ptd->per_port_queue
+                                                [sif_if_index0].n_buffers] =
+       vlib_get_buffer_index (vm, b[0]);
+      ptd->per_port_queue[sif_if_index0].n_buffers++;
+
+      ptd->per_port_queue[sif_if_index1].buffers[ptd->per_port_queue
+                                                [sif_if_index1].n_buffers] =
+       vlib_get_buffer_index (vm, b[1]);
+      ptd->per_port_queue[sif_if_index1].n_buffers++;
+
+      ptd->per_port_queue[sif_if_index2].buffers[ptd->per_port_queue
+                                                [sif_if_index2].n_buffers] =
+       vlib_get_buffer_index (vm, b[2]);
+      ptd->per_port_queue[sif_if_index2].n_buffers++;
+
+      ptd->per_port_queue[sif_if_index3].buffers[ptd->per_port_queue
+                                                [sif_if_index3].n_buffers] =
+       vlib_get_buffer_index (vm, b[3]);
+      ptd->per_port_queue[sif_if_index3].n_buffers++;
+
       n_left -= 4;
       b += 4;
     }
 
   while (n_left > 0)
     {
-      u32 next0 = 0;
-      u32 port0 = 0;
+      u32 sif_if_index0;
 
       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
 
-      sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
-
       if (PREDICT_TRUE (slave_count > 1))
        port0 =
          (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif,
                                                           b[0], slave_count);
-      sif_if_index = *vec_elt_at_index (bif->active_slaves, port0);
-      vnet_buffer (b[0])->sw_if_index[VLIB_TX] = sif_if_index;
-      if (PREDICT_FALSE
-         ((bif->per_thread_info[thread_index].frame[port0] == 0)))
-       bif->per_thread_info[thread_index].frame[port0] =
-         vnet_get_frame_to_sw_interface (vnm, sif_if_index);
-      f = bif->per_thread_info[thread_index].frame[port0];
-      to_next = vlib_frame_vector_args (f);
-      to_next += f->n_vectors;
-      to_next[0] = vlib_get_buffer_index (vm, b[0]);
-      f->n_vectors++;
+      sif_if_index0 = *vec_elt_at_index (bif->active_slaves, port0);
 
+      /* Do the tracing before the old interface is overwritten */
       if (PREDICT_FALSE (n_trace > 0))
        {
+         u32 next0 = 0;
+
          vlib_trace_buffer (vm, node, next0, b[0], 0 /* follow_chain */ );
          vlib_set_trace_count (vm, node, --n_trace);
          t0 = vlib_add_trace (vm, node, b[0], sizeof (*t0));
          eth = (ethernet_header_t *) vlib_buffer_get_current (b[0]);
          t0->ethernet = *eth;
-         t0->sw_if_index = sw_if_index;
-         t0->bond_sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+         t0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+         t0->bond_sw_if_index = sif_if_index0;
        }
 
-      from += 1;
+      vnet_buffer (b[0])->sw_if_index[VLIB_TX] = sif_if_index0;
+
+      ptd->per_port_queue[sif_if_index0].buffers[ptd->per_port_queue
+                                                [sif_if_index0].n_buffers] =
+       vlib_get_buffer_index (vm, b[0]);
+      ptd->per_port_queue[sif_if_index0].n_buffers++;
+
       n_left -= 1;
       b += 1;
     }
 
-  for (port = 0; port < slave_count; port++)
+  for (port0 = 0; port0 < slave_count; port0++)
     {
-      f = bif->per_thread_info[thread_index].frame[port];
-      if (f == 0)
-       continue;
-
-      sw_if_index = *vec_elt_at_index (bif->active_slaves, port);
-      vnet_put_frame_to_sw_interface (vnm, sw_if_index, f);
-      bif->per_thread_info[thread_index].frame[port] = 0;
+      sw_if_index = *vec_elt_at_index (bif->active_slaves, port0);
+      if (PREDICT_TRUE (ptd->per_port_queue[sw_if_index].n_buffers))
+       {
+         f = vnet_get_frame_to_sw_interface (vnm, sw_if_index);
+         f->n_vectors = ptd->per_port_queue[sw_if_index].n_buffers;
+         to_next = vlib_frame_vector_args (f);
+         clib_memcpy (to_next, ptd->per_port_queue[sw_if_index].buffers,
+                      f->n_vectors << 2);
+         vnet_put_frame_to_sw_interface (vnm, sw_if_index, f);
+         ptd->per_port_queue[sw_if_index].n_buffers = 0;
+       }
     }
 
   vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters
index 104b7b1..6b13a46 100644 (file)
@@ -139,9 +139,15 @@ typedef CLIB_PACKED (struct
 
 typedef struct
 {
-  vlib_frame_t **frame;
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+  u32 buffers[VLIB_FRAME_SIZE];
+  u32 n_buffers;
+} bond_per_port_queue_t;
 
-} bond_if_per_thread_t;
+typedef struct
+{
+  bond_per_port_queue_t *per_port_queue;
+} bond_per_thread_data_t;
 
 typedef struct
 {
@@ -175,7 +181,6 @@ typedef struct
   u8 hw_address[6];
 
   clib_spinlock_t lockp;
-  bond_if_per_thread_t *per_thread_info;
 } bond_if_t;
 
 typedef struct
@@ -292,7 +297,7 @@ typedef struct
   /* pool of bonding interfaces */
   bond_if_t *interfaces;
 
-  /* pool of lacp neighbors */
+  /* pool of slave interfaces */
   slave_if_t *neighbors;
 
   /* rapidly find a bond by vlib software interface index */
@@ -308,6 +313,8 @@ typedef struct
   lacp_enable_disable_func lacp_enable_disable;
 
   uword *slave_by_sw_if_index;
+
+  bond_per_thread_data_t *per_thread_data;
 } bond_main_t;
 
 /* bond packet trace capture */