X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fbonding%2Fdevice.c;h=53123dd7c4f1041fd04e5fb7f146868d4d1aefa4;hb=067cd6229a47ea3ba8b59a2a04090e80afb5bd2c;hp=a27524089aa1afbe74ddeeb26bb5cd1c36426669;hpb=4f8863b21405d1ab3e067e978a60be72a343358b;p=vpp.git diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c index a27524089aa..53123dd7c4f 100644 --- a/src/vnet/bonding/device.c +++ b/src/vnet/bonding/device.c @@ -22,6 +22,9 @@ #include #include #include +#include +#include +#include #define foreach_bond_tx_error \ _(NONE, "no error") \ @@ -99,18 +102,6 @@ bond_set_l2_mode_function (vnet_main_t * vnm, ethernet_set_rx_redirect (vnm, sif_hw, 1); } } - else if ((bif_hw->l2_if_count == 0) && (l2_if_adjust == -1)) - { - /* Just removed last L2 subinterface on this port */ - vec_foreach (sw_if_index, bif->slaves) - { - sif_hw = vnet_get_sup_hw_interface (vnm, *sw_if_index); - ethernet_set_flags (vnm, sif_hw->hw_if_index, 0); - - /* Allow ip packets to go directly to ip4-input etc */ - ethernet_set_rx_redirect (vnm, sif_hw, 0); - } - } return 0; } @@ -138,7 +129,7 @@ bond_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) return 0; } -static inline u32 +static_always_inline u32 bond_load_balance_broadcast (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) @@ -149,7 +140,7 @@ bond_load_balance_broadcast (vlib_main_t * vm, vlib_node_runtime_t * node, u32 *to_next = 0; u32 sw_if_index; vlib_frame_t *f; - u16 thread_index = vlib_get_thread_index (); + u16 thread_index = vm->thread_index; for (port = 1; port < slave_count; port++) { @@ -172,29 +163,26 @@ bond_load_balance_broadcast (vlib_main_t * vm, vlib_node_runtime_t * node, return 0; } -static inline u32 +static_always_inline u32 bond_load_balance_l2 (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) { ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0); - u32 a = 0, b = 0, c = 0, t1, t2; - u16 t11, t22; + u32 c; + u64 *dst = (u64 *) & eth->dst_address[0]; + u64 a = clib_mem_unaligned (dst, u64); + u32 *src = (u32 *) & eth->src_address[2]; + u32 b = clib_mem_unaligned (src, u32); - memcpy (&t1, eth->src_address, sizeof (t1)); - memcpy (&t11, ð->src_address[4], sizeof (t11)); - a = t1 ^ t11; + c = lb_hash_hash_2_tuples (a, b); - memcpy (&t2, eth->dst_address, sizeof (t2)); - memcpy (&t22, ð->dst_address[4], sizeof (t22)); - b = t2 ^ t22; - - hash_v3_mix32 (a, b, c); - hash_v3_finalize32 (a, b, c); - - return c % slave_count; + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } -static inline u16 * +static_always_inline u16 * bond_locate_ethertype (ethernet_header_t * eth) { u16 *ethertype_p; @@ -217,7 +205,7 @@ bond_locate_ethertype (ethernet_header_t * eth) return ethertype_p; } -static inline u32 +static_always_inline u32 bond_load_balance_l23 (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) { @@ -225,9 +213,10 @@ bond_load_balance_l23 (vlib_main_t * vm, vlib_node_runtime_t * node, u8 ip_version; ip4_header_t *ip4; u16 ethertype, *ethertype_p; + u32 *mac1, *mac2, *mac3; ethertype_p = bond_locate_ethertype (eth); - ethertype = *ethertype_p; + ethertype = clib_mem_unaligned (ethertype_p, u16); if ((ethertype != htons (ETHERNET_TYPE_IP4)) && (ethertype != htons (ETHERNET_TYPE_IP6))) @@ -238,55 +227,63 @@ bond_load_balance_l23 (vlib_main_t * vm, vlib_node_runtime_t * node, if (ip_version == 0x4) { - u16 t11, t22; - u32 a = 0, b = 0, c = 0, t1, t2; - - memcpy (&t1, eth->src_address, sizeof (t1)); - memcpy (&t11, ð->src_address[4], sizeof (t11)); - a = t1 ^ t11; - - memcpy (&t2, eth->dst_address, sizeof (t2)); - memcpy (&t22, ð->dst_address[4], sizeof (t22)); - b = t2 ^ t22; - - c = ip4->src_address.data_u32 ^ ip4->dst_address.data_u32; - - hash_v3_mix32 (a, b, c); - hash_v3_finalize32 (a, b, c); - - return c % slave_count; + u32 a, c; + + mac1 = (u32 *) & eth->dst_address[0]; + mac2 = (u32 *) & eth->dst_address[4]; + mac3 = (u32 *) & eth->src_address[2]; + + a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^ + clib_mem_unaligned (mac3, u32); + c = + lb_hash_hash_2_tuples (clib_mem_unaligned (&ip4->address_pair, u64), + a); + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } else if (ip_version == 0x6) { - u64 a, b, c; - u64 t1 = 0, t2 = 0; + u64 a; + u32 c; ip6_header_t *ip6 = (ip6_header_t *) (eth + 1); - memcpy (&t1, eth->src_address, sizeof (eth->src_address)); - memcpy (&t2, eth->dst_address, sizeof (eth->dst_address)); - a = t1 ^ t2; - - b = (ip6->src_address.as_u64[0] ^ ip6->src_address.as_u64[1]); - c = (ip6->dst_address.as_u64[0] ^ ip6->dst_address.as_u64[1]); - - hash_mix64 (a, b, c); - return c % slave_count; + mac1 = (u32 *) & eth->dst_address[0]; + mac2 = (u32 *) & eth->dst_address[4]; + mac3 = (u32 *) & eth->src_address[2]; + + a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^ + clib_mem_unaligned (mac3, u32); + c = + lb_hash_hash (clib_mem_unaligned + (&ip6->src_address.as_uword[0], uword), + clib_mem_unaligned (&ip6->src_address.as_uword[1], + uword), + clib_mem_unaligned (&ip6->dst_address.as_uword[0], + uword), + clib_mem_unaligned (&ip6->dst_address.as_uword[1], + uword), a); + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } return (bond_load_balance_l2 (vm, node, bif, b0, slave_count)); } -static inline u32 +static_always_inline u32 bond_load_balance_l34 (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) { ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0); u8 ip_version; - uword is_tcp_udp = 0; + uword is_tcp_udp; ip4_header_t *ip4; u16 ethertype, *ethertype_p; ethertype_p = bond_locate_ethertype (eth); - ethertype = *ethertype_p; + ethertype = clib_mem_unaligned (ethertype_p, u16); if ((ethertype != htons (ETHERNET_TYPE_IP4)) && (ethertype != htons (ETHERNET_TYPE_IP6))) @@ -297,29 +294,30 @@ bond_load_balance_l34 (vlib_main_t * vm, vlib_node_runtime_t * node, if (ip_version == 0x4) { - u32 a = 0, b = 0, c = 0, t1, t2; + u32 a, c, t1, t2; tcp_header_t *tcp = (void *) (ip4 + 1); + is_tcp_udp = (ip4->protocol == IP_PROTOCOL_TCP) || (ip4->protocol == IP_PROTOCOL_UDP); - - a = ip4->src_address.data_u32 ^ ip4->dst_address.data_u32; - - t1 = is_tcp_udp ? tcp->src : 0; - t2 = is_tcp_udp ? tcp->dst : 0; - b = t1 + (t2 << 16); - - hash_v3_mix32 (a, b, c); - hash_v3_finalize32 (a, b, c); - - return c % slave_count; + t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0; + t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0; + a = t1 ^ t2; + c = + lb_hash_hash_2_tuples (clib_mem_unaligned (&ip4->address_pair, u64), + a); + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } else if (ip_version == 0x6) { - u64 a, b, c; - u64 t1, t2; + u64 a; + u32 c, t1, t2; ip6_header_t *ip6 = (ip6_header_t *) (eth + 1); tcp_header_t *tcp = (void *) (ip6 + 1); + is_tcp_udp = 0; if (PREDICT_TRUE ((ip6->protocol == IP_PROTOCOL_TCP) || (ip6->protocol == IP_PROTOCOL_UDP))) { @@ -337,33 +335,43 @@ bond_load_balance_l34 (vlib_main_t * vm, vlib_node_runtime_t * node, tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3)); } } - a = (ip6->src_address.as_u64[0] ^ ip6->src_address.as_u64[1]); - b = (ip6->dst_address.as_u64[0] ^ ip6->dst_address.as_u64[1]); - - t1 = is_tcp_udp ? tcp->src : 0; - t2 = is_tcp_udp ? tcp->dst : 0; - c = (t2 << 16) | t1; - hash_mix64 (a, b, c); - - return c % slave_count; + t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0; + t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0; + a = t1 ^ t2; + c = + lb_hash_hash (clib_mem_unaligned + (&ip6->src_address.as_uword[0], uword), + clib_mem_unaligned (&ip6->src_address.as_uword[1], + uword), + clib_mem_unaligned (&ip6->dst_address.as_uword[0], + uword), + clib_mem_unaligned (&ip6->dst_address.as_uword[1], + uword), a); + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } return (bond_load_balance_l2 (vm, node, bif, b0, slave_count)); } -static inline u32 +static_always_inline u32 bond_load_balance_round_robin (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) { bif->lb_rr_last_index++; - bif->lb_rr_last_index %= slave_count; + if (BOND_MODULO_SHORTCUT (slave_count)) + bif->lb_rr_last_index &= slave_count - 1; + else + bif->lb_rr_last_index %= slave_count; return bif->lb_rr_last_index; } -static inline u32 +static_always_inline u32 bond_load_balance_active_backup (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, @@ -391,12 +399,11 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, u32 *from = vlib_frame_vector_args (frame); u32 n_left_from; ethernet_header_t *eth; - u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0; - u32 port, port1, port2, port3; + u32 port; u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3; bond_packet_trace_t *t0; uword n_trace = vlib_get_trace_count (vm, node); - u16 thread_index = vlib_get_thread_index (); + u16 thread_index = vm->thread_index; vnet_main_t *vnm = vnet_get_main (); u32 *to_next; u32 sif_if_index, sif_if_index1, sif_if_index2, sif_if_index3; @@ -447,6 +454,9 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, { while (n_left_from >= 4) { + u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0; + u32 port0 = 0, port1 = 0, port2 = 0, port3 = 0; + // Prefetch next iteration if (n_left_from >= 8) { @@ -457,10 +467,10 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, p6 = vlib_get_buffer (vm, from[6]); p7 = vlib_get_buffer (vm, from[7]); - vlib_prefetch_buffer_header (p4, STORE); - vlib_prefetch_buffer_header (p5, STORE); - vlib_prefetch_buffer_header (p6, STORE); - vlib_prefetch_buffer_header (p7, STORE); + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, LOAD); CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, LOAD); @@ -488,20 +498,27 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_TX]; sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_TX]; - port = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b0, slave_count); - port1 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b1, slave_count); - port2 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b2, slave_count); - port3 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b3, slave_count); - - sif_if_index = *vec_elt_at_index (bif->active_slaves, port); + if (PREDICT_TRUE (slave_count != 1)) + { + port0 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, + bif, b0, + slave_count); + port1 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, + bif, b1, + slave_count); + port2 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, + bif, b2, + slave_count); + port3 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, + bif, b3, + slave_count); + } + + sif_if_index = *vec_elt_at_index (bif->active_slaves, port0); sif_if_index1 = *vec_elt_at_index (bif->active_slaves, port1); sif_if_index2 = *vec_elt_at_index (bif->active_slaves, port2); sif_if_index3 = *vec_elt_at_index (bif->active_slaves, port3); @@ -511,23 +528,27 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b2)->sw_if_index[VLIB_TX] = sif_if_index2; vnet_buffer (b3)->sw_if_index[VLIB_TX] = sif_if_index3; - if (bif->per_thread_info[thread_index].frame[port] == 0) - bif->per_thread_info[thread_index].frame[port] = + if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port0] + == 0))) + bif->per_thread_info[thread_index].frame[port0] = vnet_get_frame_to_sw_interface (vnm, sif_if_index); - if (bif->per_thread_info[thread_index].frame[port1] == 0) + if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port1] + == 0))) bif->per_thread_info[thread_index].frame[port1] = vnet_get_frame_to_sw_interface (vnm, sif_if_index1); - if (bif->per_thread_info[thread_index].frame[port2] == 0) + if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port2] + == 0))) bif->per_thread_info[thread_index].frame[port2] = vnet_get_frame_to_sw_interface (vnm, sif_if_index2); - if (bif->per_thread_info[thread_index].frame[port3] == 0) + if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port3] + == 0))) bif->per_thread_info[thread_index].frame[port3] = vnet_get_frame_to_sw_interface (vnm, sif_if_index3); - f = bif->per_thread_info[thread_index].frame[port]; + f = bif->per_thread_info[thread_index].frame[port0]; to_next = vlib_frame_vector_args (f); to_next += f->n_vectors; to_next[0] = vlib_get_buffer_index (vm, b0); @@ -609,13 +630,16 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, while (n_left_from > 0) { + u32 next0 = 0; + u32 port0 = 0; + // Prefetch next iteration if (n_left_from > 1) { vlib_buffer_t *p2; p2 = vlib_get_buffer (vm, from[1]); - vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p2, LOAD); CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); } @@ -626,15 +650,18 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX]; - port = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b0, slave_count); - sif_if_index = *vec_elt_at_index (bif->active_slaves, port); + if (PREDICT_TRUE (slave_count != 1)) + port0 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, + b0, + slave_count); + sif_if_index = *vec_elt_at_index (bif->active_slaves, port0); vnet_buffer (b0)->sw_if_index[VLIB_TX] = sif_if_index; - if (bif->per_thread_info[thread_index].frame[port] == 0) - bif->per_thread_info[thread_index].frame[port] = + if (PREDICT_FALSE + ((bif->per_thread_info[thread_index].frame[port0] == 0))) + bif->per_thread_info[thread_index].frame[port0] = vnet_get_frame_to_sw_interface (vnm, sif_if_index); - f = bif->per_thread_info[thread_index].frame[port]; + f = bif->per_thread_info[thread_index].frame[port0]; to_next = vlib_frame_vector_args (f); to_next += f->n_vectors; to_next[0] = vlib_get_buffer_index (vm, b0); @@ -675,6 +702,52 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, return frame->n_vectors; } +static walk_rc_t +bond_active_interface_switch_cb (vnet_main_t * vnm, u32 sw_if_index, + void *arg) +{ + bond_main_t *bm = &bond_main; + + send_ip4_garp (bm->vlib_main, sw_if_index); + send_ip6_na (bm->vlib_main, sw_if_index); + + return (WALK_CONTINUE); +} + +static uword +bond_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + vnet_main_t *vnm = vnet_get_main (); + uword event_type, *event_data = 0; + + while (1) + { + u32 i; + u32 hw_if_index; + + vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + ASSERT (event_type == BOND_SEND_GARP_NA); + for (i = 0; i < vec_len (event_data); i++) + { + hw_if_index = event_data[i]; + /* walk hw interface to process all subinterfaces */ + vnet_hw_interface_walk_sw (vnm, hw_if_index, + bond_active_interface_switch_cb, 0); + } + vec_reset_length (event_data); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (bond_process_node) = { + .function = bond_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "bond-process", +}; +/* *INDENT-ON* */ + /* *INDENT-OFF* */ VNET_DEVICE_CLASS (bond_dev_class) = { .name = "bond",