From 2be95c11966af3531ad3d77a6186e300b94f6cb9 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Sat, 19 Nov 2016 13:50:04 +0000 Subject: [PATCH] Dual Loop Load-Balance Nodes Change-Id: I23bc477aa03f97dd546747b3297a878055d32e1c Signed-off-by: Neale Ranns --- vnet/vnet/ip/ip4_forward.c | 141 +++++++++++++++--- vnet/vnet/ip/ip6_forward.c | 209 +++++++++++++++++++------- vnet/vnet/mpls/mpls_lookup.c | 340 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 561 insertions(+), 129 deletions(-) diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c index 85ad10e62b6..fa8ec4e644c 100644 --- a/vnet/vnet/ip/ip4_forward.c +++ b/vnet/vnet/ip/ip4_forward.c @@ -517,6 +517,101 @@ ip4_load_balance (vlib_main_t * vm, to_next, n_left_to_next); + while (n_left_from >= 4 && n_left_to_next >= 2) + { + ip_lookup_next_t next0, next1; + const load_balance_t *lb0, *lb1; + vlib_buffer_t * p0, *p1; + u32 pi0, lbi0, hc0, pi1, lbi1, hc1; + const ip4_header_t *ip0, *ip1; + const dpo_id_t *dpo0, *dpo1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + lb1 = load_balance_get(lbi1); + + /* + * this node is for via FIBs we can re-use the hash value from the + * to node if present. + * We don't want to use the same hash value at each level in the recursion + * graph as that would lead to polarisation + */ + hc0 = vnet_buffer (p0)->ip.flow_hash = 0; + hc1 = vnet_buffer (p1)->ip.flow_hash = 0; + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash)) + { + hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0); + } + } + if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash)) + { + hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1; + } + else + { + hc1 = vnet_buffer(p1)->ip.flow_hash = ip4_compute_flow_hash(ip1, hc1); + } + } + + dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1)); + dpo1 = load_balance_get_bucket_i(lb1, hc1 & (lb1->lb_n_buckets_minus_1)); + + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + vlib_increment_combined_counter + (cm, cpu_index, lbi1, 1, + vlib_buffer_length_in_chain (vm, p1)); + + vlib_validate_buffer_enqueue_x2 (vm, node, next, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) { ip_lookup_next_t next0; @@ -528,6 +623,10 @@ ip4_load_balance (vlib_main_t * vm, pi0 = from[0]; to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; p0 = vlib_get_buffer (vm, pi0); @@ -535,12 +634,21 @@ ip4_load_balance (vlib_main_t * vm, lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; lb0 = load_balance_get(lbi0); - hc0 = lb0->lb_hash_config; - vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0); - dpo0 = load_balance_get_bucket_i(lb0, - vnet_buffer(p0)->ip.flow_hash & - (lb0->lb_n_buckets_minus_1)); + hc0 = vnet_buffer (p0)->ip.flow_hash = 0; + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash)) + { + hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0); + } + } + + dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1)); next0 = dpo0->dpoi_next_node; vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; @@ -549,22 +657,9 @@ ip4_load_balance (vlib_main_t * vm, (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); - from += 1; - to_next += 1; - n_left_to_next -= 1; - n_left_from -= 1; - - if (PREDICT_FALSE (next0 != next)) - { - n_left_to_next += 1; - vlib_put_next_frame (vm, node, next, n_left_to_next); - next = next0; - vlib_get_next_frame (vm, node, next, - to_next, n_left_to_next); - to_next[0] = pi0; - to_next += 1; - n_left_to_next -= 1; - } + vlib_validate_buffer_enqueue_x1 (vm, node, next, + to_next, n_left_to_next, + pi0, next0); } vlib_put_next_frame (vm, node, next, n_left_to_next); @@ -573,15 +668,13 @@ ip4_load_balance (vlib_main_t * vm, return frame->n_vectors; } -static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args); - VLIB_REGISTER_NODE (ip4_load_balance_node) = { .function = ip4_load_balance, .name = "ip4-load-balance", .vector_size = sizeof (u32), .sibling_of = "ip4-lookup", - .format_trace = format_ip4_forward_next_trace, + .format_trace = format_ip4_lookup_trace, }; VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance) diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c index 325ef9b7b67..bc346786283 100644 --- a/vnet/vnet/ip/ip6_forward.c +++ b/vnet/vnet/ip/ip6_forward.c @@ -703,8 +703,8 @@ VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup) always_inline uword ip6_load_balance (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) + vlib_node_runtime_t * node, + vlib_frame_t * frame) { vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; u32 n_left_from, n_left_to_next, * from, * to_next; @@ -722,63 +722,172 @@ ip6_load_balance (vlib_main_t * vm, while (n_left_from > 0) { vlib_get_next_frame (vm, node, next, - to_next, n_left_to_next); + to_next, n_left_to_next); - while (n_left_from > 0 && n_left_to_next > 0) - { - ip_lookup_next_t next0; - const load_balance_t *lb0; - vlib_buffer_t * p0; - u32 pi0, lbi0, hc0; - const ip6_header_t *ip0; - const dpo_id_t *dpo0; - - pi0 = from[0]; - to_next[0] = pi0; + while (n_left_from >= 4 && n_left_to_next >= 2) + { + ip_lookup_next_t next0, next1; + const load_balance_t *lb0, *lb1; + vlib_buffer_t * p0, *p1; + u32 pi0, lbi0, hc0, pi1, lbi1, hc1; + const ip6_header_t *ip0, *ip1; + const dpo_id_t *dpo0, *dpo1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + lb1 = load_balance_get(lbi1); + + /* + * this node is for via FIBs we can re-use the hash value from the + * to node if present. + * We don't want to use the same hash value at each level in the recursion + * graph as that would lead to polarisation + */ + hc0 = vnet_buffer (p0)->ip.flow_hash = 0; + hc1 = vnet_buffer (p1)->ip.flow_hash = 0; + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash)) + { + hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer(p0)->ip.flow_hash = ip6_compute_flow_hash(ip0, hc0); + } + } + if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash)) + { + hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1; + } + else + { + hc1 = vnet_buffer(p1)->ip.flow_hash = ip6_compute_flow_hash(ip1, hc1); + } + } + + dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1)); + dpo1 = load_balance_get_bucket_i(lb1, hc1 & (lb1->lb_n_buckets_minus_1)); + + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + /* Only process the HBH Option Header if explicitly configured to do so */ + if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + next0 = (dpo_is_adj(dpo0) && im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0; + } + /* Only process the HBH Option Header if explicitly configured to do so */ + if (PREDICT_FALSE(ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + next1 = (dpo_is_adj(dpo1) && im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1; + } - p0 = vlib_get_buffer (vm, pi0); + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; - ip0 = vlib_buffer_get_current (p0); - lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + vlib_increment_combined_counter + (cm, cpu_index, lbi1, 1, + vlib_buffer_length_in_chain (vm, p1)); - lb0 = load_balance_get(lbi0); - hc0 = lb0->lb_hash_config; - vnet_buffer(p0)->ip.flow_hash = ip6_compute_flow_hash(ip0, hc0); + vlib_validate_buffer_enqueue_x2 (vm, node, next, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } - dpo0 = load_balance_get_bucket_i(lb0, - vnet_buffer(p0)->ip.flow_hash & - (lb0->lb_n_buckets - 1)); - next0 = dpo0->dpoi_next_node; - /* Only process the HBH Option Header if explicitly configured to do so */ - if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - next0 = (dpo_is_adj(dpo0) && im->hbh_enabled) ? - (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0; - } - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_lookup_next_t next0; + const load_balance_t *lb0; + vlib_buffer_t * p0; + u32 pi0, lbi0, hc0; + const ip6_header_t *ip0; + const dpo_id_t *dpo0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + + hc0 = vnet_buffer (p0)->ip.flow_hash = 0; + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash)) + { + hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer(p0)->ip.flow_hash = ip6_compute_flow_hash(ip0, hc0); + } + } + dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1)); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + /* Only process the HBH Option Header if explicitly configured to do so */ + if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + next0 = (dpo_is_adj(dpo0) && im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0; + } - vlib_increment_combined_counter + vlib_increment_combined_counter (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); - from += 1; - to_next += 1; - n_left_to_next -= 1; - n_left_from -= 1; - - if (PREDICT_FALSE (next0 != next)) - { - n_left_to_next += 1; - vlib_put_next_frame (vm, node, next, n_left_to_next); - next = next0; - vlib_get_next_frame (vm, node, next, - to_next, n_left_to_next); - to_next[0] = pi0; - to_next += 1; - n_left_to_next -= 1; - } - } + vlib_validate_buffer_enqueue_x1 (vm, node, next, + to_next, n_left_to_next, + pi0, next0); + } vlib_put_next_frame (vm, node, next, n_left_to_next); } @@ -791,8 +900,8 @@ VLIB_REGISTER_NODE (ip6_load_balance_node) = { .name = "ip6-load-balance", .vector_size = sizeof (u32), .sibling_of = "ip6-lookup", + .format_trace = format_ip6_lookup_trace, - .n_next_nodes = 0, }; VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance) diff --git a/vnet/vnet/mpls/mpls_lookup.c b/vnet/vnet/mpls/mpls_lookup.c index 9d29cecff90..34ba79e40fd 100644 --- a/vnet/vnet/mpls/mpls_lookup.c +++ b/vnet/vnet/mpls/mpls_lookup.c @@ -28,6 +28,7 @@ typedef struct { u32 lb_index; u32 lfib_index; u32 label_net_byte_order; + u32 hash; } mpls_lookup_trace_t; static u8 * @@ -37,9 +38,9 @@ format_mpls_lookup_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *); - s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d " + s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %d" "label %d eos %d", - t->next_index, t->lfib_index, t->lb_index, + t->next_index, t->lfib_index, t->lb_index, t->hash, vnet_mpls_uc_get_label( clib_net_to_host_u32(t->label_net_byte_order)), vnet_mpls_uc_get_s(t->label_net_byte_order)); @@ -79,6 +80,128 @@ mpls_lookup (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 lbi0, next0, lfib_index0, bi0, hash_c0; + const mpls_unicast_header_t * h0; + const load_balance_t *lb0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + u32 lbi1, next1, lfib_index1, bi1, hash_c1; + const mpls_unicast_header_t * h1; + const load_balance_t *lb1; + const dpo_id_t *dpo1; + vlib_buffer_t * b1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (h0[0]), STORE); + } + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + lfib_index1 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b1)->sw_if_index[VLIB_RX]); + + lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0); + lbi1 = mpls_fib_table_forwarding_lookup (lfib_index1, h1); + lb0 = load_balance_get(lbi0); + lb1 = load_balance_get(lbi1); + + hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; + hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0; + + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + } + if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) + { + hash_c1 = vnet_buffer (b1)->ip.flow_hash = + mpls_compute_flow_hash(h1, lb1->lb_hash_config); + } + + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + dpo1 = load_balance_get_bucket_i(lb1, + (hash_c1 & + (lb1->lb_n_buckets_minus_1))); + + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + vlib_increment_combined_counter + (cm, cpu_index, lbi1, 1, + vlib_buffer_length_in_chain (vm, b1)); + + /* + * pop the label that was just used in the lookup + */ + vlib_buffer_advance(b0, sizeof(*h0)); + vlib_buffer_advance(b1, sizeof(*h1)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->lb_index = lbi0; + tr->lfib_index = lfib_index0; + tr->hash = hash_c0; + tr->label_net_byte_order = h0->label_exp_s_ttl; + } + + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->next_index = next1; + tr->lb_index = lbi1; + tr->lfib_index = lfib_index1; + tr->hash = hash_c1; + tr->label_net_byte_order = h1->label_exp_s_ttl; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) { u32 lbi0, next0, lfib_index0, bi0, hash_c0; @@ -101,26 +224,26 @@ mpls_lookup (vlib_main_t * vm, vnet_buffer(b0)->sw_if_index[VLIB_RX]); lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0); - lb0 = load_balance_get(lbi0); + lb0 = load_balance_get(lbi0); hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { - hash_c0 = vnet_buffer (b0)->ip.flow_hash = + hash_c0 = vnet_buffer (b0)->ip.flow_hash = mpls_compute_flow_hash(h0, lb0->lb_hash_config); } - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); - dpo0 = load_balance_get_bucket_i(lb0, + dpo0 = load_balance_get_bucket_i(lb0, (hash_c0 & (lb0->lb_n_buckets_minus_1))); - next0 = dpo0->dpoi_next_node; - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vlib_increment_combined_counter + vlib_increment_combined_counter (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b0)); @@ -132,10 +255,11 @@ mpls_lookup (vlib_main_t * vm, if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); + b0, sizeof (*tr)); tr->next_index = next0; tr->lb_index = lbi0; tr->lfib_index = lfib_index0; + tr->hash = hash_c0; tr->label_net_byte_order = h0->label_exp_s_ttl; } @@ -177,6 +301,7 @@ VLIB_NODE_FUNCTION_MULTIARCH (mpls_lookup_node, mpls_lookup) typedef struct { u32 next_index; u32 lb_index; + u32 hash; } mpls_load_balance_trace_t; static u8 * @@ -186,20 +311,20 @@ format_mpls_load_balance_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *); - s = format (s, "MPLS: next [%d], LB index %d ", - t->next_index, t->lb_index); + s = format (s, "MPLS: next [%d], LB index %d hash %d", + t->next_index, t->lb_index, t->hash); return s; } always_inline uword mpls_load_balance (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) + vlib_node_runtime_t * node, + vlib_frame_t * frame) { vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; u32 n_left_from, n_left_to_next, * from, * to_next; - ip_lookup_next_t next; u32 cpu_index = os_get_cpu_number(); + u32 next; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -208,57 +333,162 @@ mpls_load_balance (vlib_main_t * vm, while (n_left_from > 0) { vlib_get_next_frame (vm, node, next, - to_next, n_left_to_next); + to_next, n_left_to_next); + + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + mpls_lookup_next_t next0, next1; + const load_balance_t *lb0, *lb1; + vlib_buffer_t * p0, *p1; + u32 pi0, lbi0, hc0, pi1, lbi1, hc1; + const mpls_unicast_header_t *mpls0, *mpls1; + const dpo_id_t *dpo0, *dpo1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (mpls0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (mpls0[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + mpls0 = vlib_buffer_get_current (p0); + mpls1 = vlib_buffer_get_current (p1); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + lb1 = load_balance_get(lbi1); + + /* + * this node is for via FIBs we can re-use the hash value from the + * to node if present. + * We don't want to use the same hash value at each level in the recursion + * graph as that would lead to polarisation + */ + hc0 = vnet_buffer (p0)->ip.flow_hash = 0; + hc1 = vnet_buffer (p1)->ip.flow_hash = 0; + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash)) + { + hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0); + } + } + if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash)) + { + hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1; + } + else + { + hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1); + } + } + + dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1)); + dpo1 = load_balance_get_bucket_i(lb1, hc1 & (lb1->lb_n_buckets_minus_1)); + + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + vlib_increment_combined_counter + (cm, cpu_index, lbi1, 1, + vlib_buffer_length_in_chain (vm, p1)); + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->next_index = next0; + tr->lb_index = lbi0; + tr->hash = hc0; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } - while (n_left_from > 0 && n_left_to_next > 0) - { - const mpls_unicast_header_t *hdr0; - const load_balance_t *lb0; - u32 pi0, lbi0, hc0, next0; - const dpo_id_t *dpo0; - vlib_buffer_t * p0; + { + mpls_lookup_next_t next0; + const load_balance_t *lb0; + vlib_buffer_t * p0; + u32 pi0, lbi0, hc0; + const mpls_unicast_header_t *mpls0; + const dpo_id_t *dpo0; - pi0 = from[0]; - to_next[0] = pi0; + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + p0 = vlib_get_buffer (vm, pi0); - p0 = vlib_get_buffer (vm, pi0); + mpls0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - hdr0 = vlib_buffer_get_current (p0); - lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + lb0 = load_balance_get(lbi0); - lb0 = load_balance_get(lbi0); - hc0 = lb0->lb_hash_config; - vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(hdr0, hc0); + hc0 = vnet_buffer (p0)->ip.flow_hash = 0; + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash)) + { + hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0); + } + } - dpo0 = load_balance_get_bucket_i(lb0, - vnet_buffer(p0)->ip.flow_hash & - (lb0->lb_n_buckets_minus_1)); + dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1)); - next0 = dpo0->dpoi_next_node; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vlib_increment_combined_counter + vlib_increment_combined_counter (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); - from += 1; - to_next += 1; - n_left_to_next -= 1; - n_left_from -= 1; - - if (PREDICT_FALSE (next0 != next)) - { - n_left_to_next += 1; - vlib_put_next_frame (vm, node, next, n_left_to_next); - next = next0; - vlib_get_next_frame (vm, node, next, - to_next, n_left_to_next); - to_next[0] = pi0; - to_next += 1; - n_left_to_next -= 1; - } - } + vlib_validate_buffer_enqueue_x1 (vm, node, next, + to_next, n_left_to_next, + pi0, next0); + } vlib_put_next_frame (vm, node, next, n_left_to_next); } -- 2.16.6