From 16de39e1a1d922d42b33dcd1b33c3716e3f766f6 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 26 Sep 2018 10:15:41 +0200 Subject: [PATCH] bond: tx perf improvement, part trois Introduce bond_tx_inline which takes lb as a constant for gcc to do the optimization The number appears a tad better for 256 bytes frame. with the patch -------------- Thread 2 vpp_wk_1 (lcore 3) Time 4.3, average vectors/node 224.00, last 128 main loops 40.00 per node 222.61 vector rates in 8.4836e6, out 1.6967e7, drop 0.0000e0, punt 0.0000e0 Name State Calls Vectors Suspends Clocks Vectors/Call BondEthernet0-output active 141054 36109824 0 2.51e1 256.00 BondEthernet0-tx active 141054 36109824 0 2.55e1 256.00 TenGigabitEthernet6/0/0-output active 141054 18055469 0 9.43e0 128.00 TenGigabitEthernet6/0/0-tx active 141054 18055469 0 6.97e1 128.00 TenGigabitEthernet6/0/1-output active 141054 18054355 0 9.54e0 127.99 TenGigabitEthernet6/0/1-tx active 141054 18054355 0 7.05e1 127.99 bond-input active 141054 36109824 0 1.76e1 256.00 dpdk-input polling 70527 36109824 0 5.03e1 512.00 ethernet-input active 141054 36109824 0 6.12e1 256.00 ip4-input active 141054 36109824 0 3.26e1 256.00 ip4-lookup active 141054 36109824 0 2.94e1 256.00 ip4-rewrite active 141054 36109824 0 3.27e1 256.00 without the patch ----------------- Thread 2 vpp_wk_1 (lcore 3) Time 4.3, average vectors/node 224.00, last 128 main loops 40.00 per node 222.61 vector rates in 8.4443e6, out 1.6889e7, drop 0.0000e0, punt 0.0000e0 Name State Calls Vectors Suspends Clocks Vectors/Call BondEthernet0-output active 142744 36542464 0 2.51e1 256.00 BondEthernet0-tx active 142744 36542464 0 2.67e1 256.00 TenGigabitEthernet6/0/0-output active 142744 18270813 0 9.19e0 127.99 TenGigabitEthernet6/0/0-tx active 142744 18270813 0 6.98e1 127.99 TenGigabitEthernet6/0/1-output active 142744 18271651 0 9.43e0 128.00 TenGigabitEthernet6/0/1-tx active 142744 18271651 0 7.02e1 128.00 bond-input active 142744 36542464 0 1.76e1 256.00 dpdk-input polling 71372 36542464 0 5.08e1 512.00 ethernet-input active 142744 36542464 0 6.15e1 256.00 ip4-input active 142744 36542464 0 3.23e1 256.00 ip4-lookup active 142744 36542464 0 2.96e1 256.00 ip4-rewrite active 142744 36542464 0 3.28e1 256.00 Change-Id: I9fd43eda3c735cbff680ac6d2f01ecdae81f0eda Signed-off-by: Damjan Marion --- src/vnet/bonding/device.c | 229 +++++++++++++++++++++++++++++++++------------- src/vnet/bonding/node.h | 4 +- 2 files changed, 166 insertions(+), 67 deletions(-) diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c index 79ca2faf164..8a78728ccd8 100644 --- a/src/vnet/bonding/device.c +++ b/src/vnet/bonding/device.c @@ -379,63 +379,28 @@ bond_load_balance_active_backup (vlib_main_t * vm, return 0; } -static bond_load_balance_func_t bond_load_balance_table[] = { -#define _(v,f,s, p) { bond_load_balance_##p }, - foreach_bond_lb_algo -#undef _ -}; - -VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +static_always_inline void +bond_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, bond_if_t * bif, + uword slave_count, u32 lb_alg) { - vnet_interface_output_runtime_t *rund = (void *) node->runtime_data; bond_main_t *bm = &bond_main; - bond_if_t *bif = pool_elt_at_index (bm->interfaces, rund->dev_instance); - vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; - u32 *from = vlib_frame_vector_args (frame); - ethernet_header_t *eth; - u32 n_left; - u32 sw_if_index; + vnet_main_t *vnm = vnet_get_main (); + u16 thread_index = vm->thread_index; bond_packet_trace_t *t0; uword n_trace = vlib_get_trace_count (vm, node); - u16 thread_index = vm->thread_index; - vnet_main_t *vnm = vnet_get_main (); u32 *to_next; vlib_frame_t *f; - uword slave_count; + ethernet_header_t *eth; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u32 *from = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + u32 sw_if_index; u32 port0 = 0, port1 = 0, port2 = 0, port3 = 0; bond_per_thread_data_t *ptd = vec_elt_at_index (bm->per_thread_data, thread_index); - if (PREDICT_FALSE (bif->admin_up == 0)) - { - vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); - vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + - VNET_INTERFACE_COUNTER_DROP, - thread_index, bif->sw_if_index, - frame->n_vectors); - vlib_error_count (vm, node->node_index, BOND_TX_ERROR_IF_DOWN, - frame->n_vectors); - return frame->n_vectors; - } - - n_left = frame->n_vectors; vlib_get_buffers (vm, from, bufs, n_left); - - slave_count = vec_len (bif->active_slaves); - if (PREDICT_FALSE (slave_count == 0)) - { - vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); - vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + - VNET_INTERFACE_COUNTER_DROP, - thread_index, bif->sw_if_index, - frame->n_vectors); - vlib_error_count (vm, node->node_index, BOND_TX_ERROR_NO_SLAVE, - frame->n_vectors); - return frame->n_vectors; - } - b = bufs; while (n_left >= 4) { @@ -464,22 +429,72 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm, if (PREDICT_TRUE (slave_count > 1)) { - port0 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, - bif, b[0], - slave_count); - port1 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, - bif, b[1], - slave_count); - port2 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, - bif, b[2], - slave_count); - port3 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, - bif, b[3], - slave_count); + if (lb_alg == BOND_LB_L2) + { + port0 = bond_load_balance_l2 (vm, node, bif, b[0], slave_count); + port1 = bond_load_balance_l2 (vm, node, bif, b[1], slave_count); + port2 = bond_load_balance_l2 (vm, node, bif, b[2], slave_count); + port3 = bond_load_balance_l2 (vm, node, bif, b[3], slave_count); + } + else if (lb_alg == BOND_LB_L34) + { + port0 = bond_load_balance_l34 (vm, node, bif, b[0], + slave_count); + port1 = bond_load_balance_l34 (vm, node, bif, b[1], + slave_count); + port2 = bond_load_balance_l34 (vm, node, bif, b[2], + slave_count); + port3 = bond_load_balance_l34 (vm, node, bif, b[3], + slave_count); + } + else if (lb_alg == BOND_LB_L23) + { + port0 = bond_load_balance_l23 (vm, node, bif, b[0], + slave_count); + port1 = bond_load_balance_l23 (vm, node, bif, b[1], + slave_count); + port2 = bond_load_balance_l23 (vm, node, bif, b[2], + slave_count); + port3 = bond_load_balance_l23 (vm, node, bif, b[3], + slave_count); + } + else if (lb_alg == BOND_LB_RR) + { + port0 = bond_load_balance_round_robin (vm, node, bif, b[0], + slave_count); + port1 = bond_load_balance_round_robin (vm, node, bif, b[1], + slave_count); + port2 = bond_load_balance_round_robin (vm, node, bif, b[2], + slave_count); + port3 = bond_load_balance_round_robin (vm, node, bif, b[3], + slave_count); + } + else if (lb_alg == BOND_LB_BC) + { + port0 = bond_load_balance_broadcast (vm, node, bif, b[0], + slave_count); + port1 = bond_load_balance_broadcast (vm, node, bif, b[1], + slave_count); + port2 = bond_load_balance_broadcast (vm, node, bif, b[2], + slave_count); + port3 = bond_load_balance_broadcast (vm, node, bif, b[3], + slave_count); + } + else if (lb_alg == BOND_LB_AB) + { + port0 = bond_load_balance_active_backup (vm, node, bif, b[0], + slave_count); + port1 = bond_load_balance_active_backup (vm, node, bif, b[1], + slave_count); + port2 = bond_load_balance_active_backup (vm, node, bif, b[2], + slave_count); + port3 = bond_load_balance_active_backup (vm, node, bif, b[3], + slave_count); + } + else + { + ASSERT (0); + } } sif_if_index0 = *vec_elt_at_index (bif->active_slaves, port0); @@ -574,9 +589,42 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm, VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); if (PREDICT_TRUE (slave_count > 1)) - port0 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b[0], slave_count); + { + if (bif->lb == BOND_LB_L2) + { + port0 = bond_load_balance_l2 (vm, node, bif, b[0], slave_count); + } + else if (bif->lb == BOND_LB_L34) + { + port0 = bond_load_balance_l34 (vm, node, bif, b[0], + slave_count); + } + else if (bif->lb == BOND_LB_L23) + { + port0 = bond_load_balance_l23 (vm, node, bif, b[0], + slave_count); + } + else if (bif->lb == BOND_LB_RR) + { + port0 = bond_load_balance_round_robin (vm, node, bif, b[0], + slave_count); + } + else if (bif->lb == BOND_LB_BC) + { + port0 = bond_load_balance_broadcast (vm, node, bif, b[0], + slave_count); + } + else if (bif->lb == BOND_LB_AB) + { + port0 = bond_load_balance_active_backup (vm, node, bif, b[0], + slave_count); + } + else + { + ASSERT (0); + } + } + sif_if_index0 = *vec_elt_at_index (bif->active_slaves, port0); /* Do the tracing before the old interface is overwritten */ @@ -622,6 +670,57 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm, vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + VNET_INTERFACE_COUNTER_TX, thread_index, bif->sw_if_index, frame->n_vectors); +} + +VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_interface_output_runtime_t *rund = (void *) node->runtime_data; + bond_main_t *bm = &bond_main; + u16 thread_index = vm->thread_index; + bond_if_t *bif = pool_elt_at_index (bm->interfaces, rund->dev_instance); + uword slave_count; + + if (PREDICT_FALSE (bif->admin_up == 0)) + { + vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); + vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + + VNET_INTERFACE_COUNTER_DROP, + thread_index, bif->sw_if_index, + frame->n_vectors); + vlib_error_count (vm, node->node_index, BOND_TX_ERROR_IF_DOWN, + frame->n_vectors); + return frame->n_vectors; + } + + slave_count = vec_len (bif->active_slaves); + if (PREDICT_FALSE (slave_count == 0)) + { + vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); + vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + + VNET_INTERFACE_COUNTER_DROP, + thread_index, bif->sw_if_index, + frame->n_vectors); + vlib_error_count (vm, node->node_index, BOND_TX_ERROR_NO_SLAVE, + frame->n_vectors); + return frame->n_vectors; + } + + if (bif->lb == BOND_LB_L2) + bond_tx_inline (vm, node, frame, bif, slave_count, BOND_LB_L2); + else if (bif->lb == BOND_LB_L34) + bond_tx_inline (vm, node, frame, bif, slave_count, BOND_LB_L34); + else if (bif->lb == BOND_LB_L23) + bond_tx_inline (vm, node, frame, bif, slave_count, BOND_LB_L23); + else if (bif->lb == BOND_LB_RR) + bond_tx_inline (vm, node, frame, bif, slave_count, BOND_LB_RR); + else if (bif->lb == BOND_LB_BC) + bond_tx_inline (vm, node, frame, bif, slave_count, BOND_LB_BC); + else if (bif->lb == BOND_LB_AB) + bond_tx_inline (vm, node, frame, bif, slave_count, BOND_LB_AB); + else + ASSERT (0); return frame->n_vectors; } diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h index 6b13a46a355..e1359d0a5f0 100644 --- a/src/vnet/bonding/node.h +++ b/src/vnet/bonding/node.h @@ -51,13 +51,13 @@ typedef enum /* configurable load-balances */ #define foreach_bond_lb \ _ (2, L23, "l23", l23) \ - _ (1, l34 , "l34", l34) \ + _ (1, L34 , "l34", l34) \ _ (0, L2, "l2", l2) /* load-balance functions implemented in bond-output */ #define foreach_bond_lb_algo \ _ (0, L2, "l2", l2) \ - _ (1, l34 , "l34", l34) \ + _ (1, L34 , "l34", l34) \ _ (2, L23, "l23", l23) \ _ (3, RR, "round-robin", round_robin) \ _ (4, BC, "broadcast", broadcast) \ -- 2.16.6