From: Neale Ranns Date: Thu, 16 Mar 2017 11:34:55 +0000 (-0400) Subject: MPLS performance improvments. X-Git-Tag: v17.04-rc1~25 X-Git-Url: https://gerrit.fd.io/r/gitweb?a=commitdiff_plain;ds=sidebyside;h=696e88da9799056036f329676213f3c0c0a1db9c;p=vpp.git MPLS performance improvments. 1 - Quad loop lookup and label imposition. 2 - optimise imposition for the 1 label case 3 - input gets TTL from header directly (no byte swap) Change-Id: I59204c9e5d134b0df75d7afa43e360f946d1ffe7 Signed-off-by: Neale Ranns --- diff --git a/src/vnet.am b/src/vnet.am index 223d5d93dda..9e099f33b2c 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -531,7 +531,7 @@ libvnet_la_SOURCES += \ vnet/mpls/mpls_lookup.c \ vnet/mpls/mpls_output.c \ vnet/mpls/mpls_features.c \ - vnet/mpls/node.c \ + vnet/mpls/mpls_input.c \ vnet/mpls/interface.c \ vnet/mpls/mpls_tunnel.c \ vnet/mpls/pg.c \ diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c index bbdc9666503..be9b28507ba 100644 --- a/src/vnet/dpo/mpls_label_dpo.c +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -160,6 +160,33 @@ typedef struct mpls_label_imposition_trace_t_ mpls_unicast_header_t hdr; } mpls_label_imposition_trace_t; +always_inline mpls_unicast_header_t * +mpls_label_paint (vlib_buffer_t * b0, + mpls_label_dpo_t *mld0, + u8 ttl0) +{ + mpls_unicast_header_t *hdr0; + + vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes)); + + hdr0 = vlib_buffer_get_current(b0); + + if (PREDICT_TRUE(1 == mld0->mld_n_labels)) + { + /* optimise for the common case of one label */ + *hdr0 = mld0->mld_hdr[0]; + } + else + { + clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes); + hdr0 = hdr0 + (mld0->mld_n_labels - 1); + } + /* fixup the TTL for the inner most label */ + ((char*)hdr0)[3] = ttl0; + + return (hdr0); +} + always_inline uword mpls_label_imposition_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -180,45 +207,59 @@ mpls_label_imposition_inline (vlib_main_t * vm, vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); - while (n_left_from >= 4 && n_left_to_next >= 2) + while (n_left_from >= 8 && n_left_to_next >= 4) { - mpls_unicast_header_t *hdr0, *hdr1; - mpls_label_dpo_t *mld0, *mld1; - u32 bi0, mldi0, bi1, mldi1; - vlib_buffer_t * b0, *b1; - u32 next0, next1; - u8 ttl0, ttl1; + u32 bi0, mldi0, bi1, mldi1, bi2, mldi2, bi3, mldi3; + mpls_unicast_header_t *hdr0, *hdr1, *hdr2, *hdr3; + mpls_label_dpo_t *mld0, *mld1, *mld2, *mld3; + vlib_buffer_t * b0, *b1, * b2, *b3; + u32 next0, next1, next2, next3; + u8 ttl0, ttl1,ttl2, ttl3 ; bi0 = to_next[0] = from[0]; bi1 = to_next[1] = from[1]; + bi2 = to_next[2] = from[2]; + bi3 = to_next[3] = from[3]; /* Prefetch next iteration. */ { - vlib_buffer_t * p2, * p3; + vlib_buffer_t * p2, * p3, *p4, *p5; p2 = vlib_get_buffer (vm, from[2]); p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); vlib_prefetch_buffer_header (p2, STORE); vlib_prefetch_buffer_header (p3, STORE); + vlib_prefetch_buffer_header (p4, STORE); + vlib_prefetch_buffer_header (p5, STORE); CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE); CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE); } - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); /* dst lookup was done by ip4 lookup */ mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; mldi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; + mldi2 = vnet_buffer(b2)->ip.adj_index[VLIB_TX]; + mldi3 = vnet_buffer(b3)->ip.adj_index[VLIB_TX]; mld0 = mpls_label_dpo_get(mldi0); mld1 = mpls_label_dpo_get(mldi1); + mld2 = mpls_label_dpo_get(mldi2); + mld3 = mpls_label_dpo_get(mldi3); if (payload_is_ip4) { @@ -227,23 +268,37 @@ mpls_label_imposition_inline (vlib_main_t * vm, */ ip4_header_t * ip0 = vlib_buffer_get_current(b0); ip4_header_t * ip1 = vlib_buffer_get_current(b1); + ip4_header_t * ip2 = vlib_buffer_get_current(b2); + ip4_header_t * ip3 = vlib_buffer_get_current(b3); u32 checksum0; u32 checksum1; + u32 checksum2; + u32 checksum3; checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100); checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100); + checksum2 = ip2->checksum + clib_host_to_net_u16 (0x0100); + checksum3 = ip3->checksum + clib_host_to_net_u16 (0x0100); checksum0 += checksum0 >= 0xffff; checksum1 += checksum1 >= 0xffff; + checksum2 += checksum2 >= 0xffff; + checksum3 += checksum3 >= 0xffff; ip0->checksum = checksum0; ip1->checksum = checksum1; + ip2->checksum = checksum2; + ip3->checksum = checksum3; ip0->ttl -= 1; ip1->ttl -= 1; + ip2->ttl -= 1; + ip3->ttl -= 1; ttl1 = ip1->ttl; ttl0 = ip0->ttl; + ttl3 = ip3->ttl; + ttl2 = ip2->ttl; } else if (payload_is_ip6) { @@ -252,13 +307,18 @@ mpls_label_imposition_inline (vlib_main_t * vm, */ ip6_header_t * ip0 = vlib_buffer_get_current(b0); ip6_header_t * ip1 = vlib_buffer_get_current(b1); - + ip6_header_t * ip2 = vlib_buffer_get_current(b2); + ip6_header_t * ip3 = vlib_buffer_get_current(b3); ip0->hop_limit -= 1; ip1->hop_limit -= 1; + ip2->hop_limit -= 1; + ip3->hop_limit -= 1; ttl0 = ip0->hop_limit; ttl1 = ip1->hop_limit; + ttl2 = ip2->hop_limit; + ttl3 = ip3->hop_limit; } else { @@ -294,30 +354,45 @@ mpls_label_imposition_inline (vlib_main_t * vm, { ttl1 = 255; } + if (PREDICT_TRUE(vnet_buffer(b2)->mpls.first)) + { + ASSERT(2 != vnet_buffer (b2)->mpls.ttl); + + ttl2 = vnet_buffer(b2)->mpls.ttl - 1; + } + else + { + ttl2 = 255; + } + if (PREDICT_TRUE(vnet_buffer(b3)->mpls.first)) + { + ASSERT(1 != vnet_buffer (b3)->mpls.ttl); + ttl3 = vnet_buffer(b3)->mpls.ttl - 1; + } + else + { + ttl3 = 255; + } } vnet_buffer(b0)->mpls.first = 0; vnet_buffer(b1)->mpls.first = 0; + vnet_buffer(b2)->mpls.first = 0; + vnet_buffer(b3)->mpls.first = 0; /* Paint the MPLS header */ - vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes)); - vlib_buffer_advance(b1, -(mld1->mld_n_hdr_bytes)); - - hdr0 = vlib_buffer_get_current(b0); - hdr1 = vlib_buffer_get_current(b1); - - clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes); - clib_memcpy(hdr1, mld1->mld_hdr, mld1->mld_n_hdr_bytes); - - /* fixup the TTL for the inner most label */ - hdr0 = hdr0 + (mld0->mld_n_labels - 1); - hdr1 = hdr1 + (mld1->mld_n_labels - 1); - ((char*)hdr0)[3] = ttl0; - ((char*)hdr1)[3] = ttl1; + hdr0 = mpls_label_paint(b0, mld0, ttl0); + hdr1 = mpls_label_paint(b1, mld1, ttl1); + hdr2 = mpls_label_paint(b2, mld2, ttl2); + hdr3 = mpls_label_paint(b3, mld3, ttl3); next0 = mld0->mld_dpo.dpoi_next_node; next1 = mld1->mld_dpo.dpoi_next_node; + next2 = mld2->mld_dpo.dpoi_next_node; + next3 = mld3->mld_dpo.dpoi_next_node; vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index; vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mld1->mld_dpo.dpoi_index; + vnet_buffer(b2)->ip.adj_index[VLIB_TX] = mld2->mld_dpo.dpoi_index; + vnet_buffer(b3)->ip.adj_index[VLIB_TX] = mld3->mld_dpo.dpoi_index; if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -331,10 +406,23 @@ mpls_label_imposition_inline (vlib_main_t * vm, vlib_add_trace (vm, node, b1, sizeof (*tr)); tr->hdr = *hdr1; } + if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_imposition_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + tr->hdr = *hdr2; + } + if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_imposition_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + tr->hdr = *hdr3; + } - vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + vlib_validate_buffer_enqueue_x4(vm, node, next_index, to_next, n_left_to_next, - bi0, bi1, next0, next1); + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); } while (n_left_from > 0 && n_left_to_next > 0) diff --git a/src/vnet/dpo/mpls_label_dpo.h b/src/vnet/dpo/mpls_label_dpo.h index 89bcb093b04..e23f3d262ff 100644 --- a/src/vnet/dpo/mpls_label_dpo.h +++ b/src/vnet/dpo/mpls_label_dpo.h @@ -61,8 +61,8 @@ typedef struct mpls_label_dpo_t * Should this get any bigger then we will need to reconsider how many labels * can be pushed in one object. */ -_Static_assert((sizeof(mpls_label_dpo_t) <= CLIB_CACHE_LINE_BYTES), - "MPLS label DPO is larger than one cache line."); +STATIC_ASSERT((sizeof(mpls_label_dpo_t) <= CLIB_CACHE_LINE_BYTES), + "MPLS label DPO is larger than one cache line."); /** * @brief Create an MPLS label object diff --git a/src/vnet/mpls/error.def b/src/vnet/mpls/error.def index de8b9665dfb..34a46522894 100644 --- a/src/vnet/mpls/error.def +++ b/src/vnet/mpls/error.def @@ -18,11 +18,11 @@ mpls_error (NONE, "no error") mpls_error (UNKNOWN_PROTOCOL, "unknown protocol") mpls_error (UNSUPPORTED_VERSION, "unsupported version") -mpls_error (PKTS_DECAP, "MPLS-GRE input packets decapsulated") -mpls_error (PKTS_ENCAP, "MPLS-GRE output packets encapsulated") -mpls_error (NO_LABEL, "MPLS-GRE no label for fib/dst") -mpls_error (TTL_EXPIRED, "MPLS-GRE ttl expired") -mpls_error (S_NOT_SET, "MPLS-GRE s-bit not set") +mpls_error (PKTS_DECAP, "MPLS input packets decapsulated") +mpls_error (PKTS_ENCAP, "MPLS output packets encapsulated") +mpls_error (NO_LABEL, "MPLS no label for fib/dst") +mpls_error (TTL_EXPIRED, "MPLS ttl expired") +mpls_error (S_NOT_SET, "MPLS s-bit not set") mpls_error (BAD_LABEL, "invalid FIB id in label") mpls_error (NOT_IP4, "non-ip4 packets dropped") mpls_error (DISALLOWED_FIB, "disallowed FIB id") diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c index 7ae4aa00264..482577b17ef 100644 --- a/src/vnet/mpls/mpls.c +++ b/src/vnet/mpls/mpls.c @@ -161,6 +161,14 @@ u8 * format_mpls_unicast_header_net_byte_order (u8 * s, va_list * args) &h_host); } +typedef struct { + u32 fib_index; + u32 entry_index; + u32 dest; + u32 s_bit; + u32 label; +} show_mpls_fib_t; + int mpls_dest_cmp(void * a1, void * a2) { diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h index b6fdbce7d70..300f2cfd6be 100644 --- a/src/vnet/mpls/mpls.h +++ b/src/vnet/mpls/mpls.h @@ -86,16 +86,12 @@ extern mpls_main_t mpls_main; extern clib_error_t * mpls_feature_init(vlib_main_t * vm); -format_function_t format_mpls_protocol; -format_function_t format_mpls_encap_index; - format_function_t format_mpls_eos_bit; format_function_t format_mpls_unicast_header_net_byte_order; format_function_t format_mpls_unicast_label; format_function_t format_mpls_header; extern vlib_node_registration_t mpls_input_node; -extern vlib_node_registration_t mpls_policy_encap_node; extern vlib_node_registration_t mpls_output_node; extern vlib_node_registration_t mpls_midchain_node; @@ -118,48 +114,6 @@ u8 mpls_sw_interface_is_enabled (u32 sw_if_index); int mpls_fib_reset_labels (u32 fib_id); -#define foreach_mpls_input_next \ -_(DROP, "error-drop") \ -_(LOOKUP, "mpls-lookup") - -typedef enum { -#define _(s,n) MPLS_INPUT_NEXT_##s, - foreach_mpls_input_next -#undef _ - MPLS_INPUT_N_NEXT, -} mpls_input_next_t; - -#define foreach_mpls_lookup_next \ -_(DROP, "error-drop") \ -_(IP4_INPUT, "ip4-input") \ -_(L2_OUTPUT, "l2-output") - -// FIXME remove. -typedef enum { -#define _(s,n) MPLS_LOOKUP_NEXT_##s, - foreach_mpls_lookup_next -#undef _ - MPLS_LOOKUP_N_NEXT, -} mpls_lookup_next_t; - -#define foreach_mpls_output_next \ -_(DROP, "error-drop") - -typedef enum { -#define _(s,n) MPLS_OUTPUT_NEXT_##s, - foreach_mpls_output_next -#undef _ - MPLS_OUTPUT_N_NEXT, -} mpls_output_next_t; - -typedef struct { - u32 fib_index; - u32 entry_index; - u32 dest; - u32 s_bit; - u32 label; -} show_mpls_fib_t; - int mpls_dest_cmp(void * a1, void * a2); diff --git a/src/vnet/mpls/node.c b/src/vnet/mpls/mpls_input.c similarity index 82% rename from src/vnet/mpls/node.c rename to src/vnet/mpls/mpls_input.c index 5b407fae2a2..893c45111d4 100644 --- a/src/vnet/mpls/node.c +++ b/src/vnet/mpls/mpls_input.c @@ -22,9 +22,20 @@ typedef struct { u32 next_index; - u32 label_host_byte_order; + u32 label_net_byte_order; } mpls_input_trace_t; +#define foreach_mpls_input_next \ +_(DROP, "error-drop") \ +_(LOOKUP, "mpls-lookup") + +typedef enum { +#define _(s,n) MPLS_INPUT_NEXT_##s, + foreach_mpls_input_next +#undef _ + MPLS_INPUT_N_NEXT, +} mpls_input_next_t; + static u8 * format_mpls_input_trace (u8 * s, va_list * args) { @@ -32,8 +43,9 @@ format_mpls_input_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); mpls_input_trace_t * t = va_arg (*args, mpls_input_trace_t *); char * next_name; - + u32 label; next_name = "BUG!"; + label = clib_net_to_host_u32(t->label_net_byte_order); #define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b; foreach_mpls_input_next; @@ -41,8 +53,8 @@ format_mpls_input_trace (u8 * s, va_list * args) s = format (s, "MPLS: next %s[%d] label %d ttl %d", next_name, t->next_index, - vnet_mpls_uc_get_label(t->label_host_byte_order), - vnet_mpls_uc_get_ttl(t->label_host_byte_order)); + vnet_mpls_uc_get_label(label), + vnet_mpls_uc_get_ttl(label)); return s; } @@ -88,30 +100,29 @@ mpls_input_inline (vlib_main_t * vm, u32 n_left_to_next; vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); + to_next, n_left_to_next); while (n_left_from >= 4 && n_left_to_next >= 2) { - u32 label0, bi0, next0, sw_if_index0; - u32 label1, bi1, next1, sw_if_index1; - mpls_unicast_header_t *h0, *h1; + u32 bi0, next0, sw_if_index0; + u32 bi1, next1, sw_if_index1; vlib_buffer_t *b0, *b1; + char *h0, *h1; /* Prefetch next iteration. */ { - vlib_buffer_t * p2, * p3; + vlib_buffer_t * p2, * p3; - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); - vlib_prefetch_buffer_header (p2, STORE); - vlib_prefetch_buffer_header (p3, STORE); + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); - CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE); - CLIB_PREFETCH (p3->data, sizeof (h1[0]), STORE); + CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (h1[0]), STORE); } - bi0 = to_next[0] = from[0]; bi1 = to_next[1] = from[1]; @@ -129,62 +140,59 @@ mpls_input_inline (vlib_main_t * vm, sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; - label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl); - label1 = clib_net_to_host_u32 (h1->label_exp_s_ttl); - /* TTL expired? */ - if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0)) - { + if (PREDICT_FALSE(h0[3] == 0)) + { next0 = MPLS_INPUT_NEXT_DROP; b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; - } + } else - { + { next0 = MPLS_INPUT_NEXT_LOOKUP; - vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0); + vnet_feature_arc_start(mm->input_feature_arc_index, + sw_if_index0, &next0, b0); vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); - } + } - if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label1) == 0)) - { + if (PREDICT_FALSE(h1[3] == 0)) + { next1 = MPLS_INPUT_NEXT_DROP; b1->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; - } + } else - { + { next1 = MPLS_INPUT_NEXT_LOOKUP; - vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index1, &next1, b1); + vnet_feature_arc_start(mm->input_feature_arc_index, + sw_if_index1, &next1, b1); vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1); - } + } if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { + { mpls_input_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->next_index = next0; - tr->label_host_byte_order = label0; - } + tr->label_net_byte_order = *((u32*)h0); + } if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) - { + { mpls_input_trace_t *tr = vlib_add_trace (vm, node, b1, sizeof (*tr)); tr->next_index = next1; - tr->label_host_byte_order = label1; - } + tr->label_net_byte_order = *((u32*)h1); + } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, - bi0, bi1, next0, next1); + bi0, bi1, + next0, next1); } while (n_left_from > 0 && n_left_to_next > 0) { - u32 bi0; + u32 sw_if_index0, next0, bi0; vlib_buffer_t * b0; - mpls_unicast_header_t * h0; - u32 label0; - u32 next0 = 0; - u32 sw_if_index0; + char * h0; bi0 = from[0]; to_next[0] = bi0; @@ -197,9 +205,8 @@ mpls_input_inline (vlib_main_t * vm, h0 = vlib_buffer_get_current (b0); sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl); /* TTL expired? */ - if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0)) + if (PREDICT_FALSE(h0[3] == 0)) { next0 = MPLS_INPUT_NEXT_DROP; b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; @@ -216,7 +223,7 @@ mpls_input_inline (vlib_main_t * vm, mpls_input_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->next_index = next0; - tr->label_host_byte_order = label0; + tr->label_net_byte_order = *(u32*)h0; } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c index 2d34cbde341..475bb20454b 100644 --- a/src/vnet/mpls/mpls_lookup.c +++ b/src/vnet/mpls/mpls_lookup.c @@ -80,7 +80,7 @@ mpls_lookup (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_left_from >= 4 && n_left_to_next >= 2) + while (n_left_from >= 8 && n_left_to_next >= 4) { u32 lbi0, next0, lfib_index0, bi0, hash_c0; const mpls_unicast_header_t * h0; @@ -92,46 +92,79 @@ mpls_lookup (vlib_main_t * vm, const load_balance_t *lb1; const dpo_id_t *dpo1; vlib_buffer_t * b1; + u32 lbi2, next2, lfib_index2, bi2, hash_c2; + const mpls_unicast_header_t * h2; + const load_balance_t *lb2; + const dpo_id_t *dpo2; + vlib_buffer_t * b2; + u32 lbi3, next3, lfib_index3, bi3, hash_c3; + const mpls_unicast_header_t * h3; + const load_balance_t *lb3; + const dpo_id_t *dpo3; + vlib_buffer_t * b3; /* Prefetch next iteration. */ { - vlib_buffer_t * p2, * p3; + vlib_buffer_t * p2, * p3, *p4, *p5; p2 = vlib_get_buffer (vm, from[2]); p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); vlib_prefetch_buffer_header (p2, STORE); vlib_prefetch_buffer_header (p3, STORE); + vlib_prefetch_buffer_header (p4, STORE); + vlib_prefetch_buffer_header (p5, STORE); CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE); CLIB_PREFETCH (p3->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p4->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p5->data, sizeof (h0[0]), STORE); } bi0 = to_next[0] = from[0]; bi1 = to_next[1] = from[1]; + bi2 = to_next[2] = from[2]; + bi3 = to_next[3] = from[3]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; + from += 4; + n_left_from -= 4; + to_next += 4; + n_left_to_next -= 4; b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); h0 = vlib_buffer_get_current (b0); h1 = vlib_buffer_get_current (b1); + h2 = vlib_buffer_get_current (b2); + h3 = vlib_buffer_get_current (b3); lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index, vnet_buffer(b0)->sw_if_index[VLIB_RX]); lfib_index1 = vec_elt(mm->fib_index_by_sw_if_index, vnet_buffer(b1)->sw_if_index[VLIB_RX]); + lfib_index2 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b2)->sw_if_index[VLIB_RX]); + lfib_index3 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b3)->sw_if_index[VLIB_RX]); lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0); lbi1 = mpls_fib_table_forwarding_lookup (lfib_index1, h1); + lbi2 = mpls_fib_table_forwarding_lookup (lfib_index2, h2); + lbi3 = mpls_fib_table_forwarding_lookup (lfib_index3, h3); + lb0 = load_balance_get(lbi0); lb1 = load_balance_get(lbi1); + lb2 = load_balance_get(lbi2); + lb3 = load_balance_get(lbi3); hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0; + hash_c2 = vnet_buffer(b2)->ip.flow_hash = 0; + hash_c3 = vnet_buffer(b3)->ip.flow_hash = 0; if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { @@ -143,11 +176,25 @@ mpls_lookup (vlib_main_t * vm, hash_c1 = vnet_buffer (b1)->ip.flow_hash = mpls_compute_flow_hash(h1, lb1->lb_hash_config); } + if (PREDICT_FALSE(lb2->lb_n_buckets > 1)) + { + hash_c2 = vnet_buffer (b2)->ip.flow_hash = + mpls_compute_flow_hash(h2, lb2->lb_hash_config); + } + if (PREDICT_FALSE(lb3->lb_n_buckets > 1)) + { + hash_c3 = vnet_buffer (b3)->ip.flow_hash = + mpls_compute_flow_hash(h3, lb3->lb_hash_config); + } ASSERT (lb0->lb_n_buckets > 0); ASSERT (is_pow2 (lb0->lb_n_buckets)); ASSERT (lb1->lb_n_buckets > 0); ASSERT (is_pow2 (lb1->lb_n_buckets)); + ASSERT (lb2->lb_n_buckets > 0); + ASSERT (is_pow2 (lb2->lb_n_buckets)); + ASSERT (lb3->lb_n_buckets > 0); + ASSERT (is_pow2 (lb3->lb_n_buckets)); dpo0 = load_balance_get_bucket_i(lb0, (hash_c0 & @@ -155,12 +202,22 @@ mpls_lookup (vlib_main_t * vm, dpo1 = load_balance_get_bucket_i(lb1, (hash_c1 & (lb1->lb_n_buckets_minus_1))); + dpo2 = load_balance_get_bucket_i(lb2, + (hash_c2 & + (lb2->lb_n_buckets_minus_1))); + dpo3 = load_balance_get_bucket_i(lb3, + (hash_c3 & + (lb3->lb_n_buckets_minus_1))); next0 = dpo0->dpoi_next_node; next1 = dpo1->dpoi_next_node; + next2 = dpo2->dpoi_next_node; + next3 = dpo3->dpoi_next_node; vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; vlib_increment_combined_counter (cm, cpu_index, lbi0, 1, @@ -168,6 +225,12 @@ mpls_lookup (vlib_main_t * vm, vlib_increment_combined_counter (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b1)); + vlib_increment_combined_counter + (cm, cpu_index, lbi2, 1, + vlib_buffer_length_in_chain (vm, b2)); + vlib_increment_combined_counter + (cm, cpu_index, lbi3, 1, + vlib_buffer_length_in_chain (vm, b3)); /* * before we pop the label copy th values we need to maintain. @@ -181,12 +244,20 @@ mpls_lookup (vlib_main_t * vm, vnet_buffer (b1)->mpls.ttl = ((char*)h1)[3]; vnet_buffer (b1)->mpls.exp = (((char*)h1)[2] & 0xe) >> 1; vnet_buffer (b1)->mpls.first = 1; + vnet_buffer (b2)->mpls.ttl = ((char*)h2)[3]; + vnet_buffer (b2)->mpls.exp = (((char*)h2)[2] & 0xe) >> 1; + vnet_buffer (b2)->mpls.first = 1; + vnet_buffer (b3)->mpls.ttl = ((char*)h3)[3]; + vnet_buffer (b3)->mpls.exp = (((char*)h3)[2] & 0xe) >> 1; + vnet_buffer (b3)->mpls.first = 1; /* * pop the label that was just used in the lookup */ vlib_buffer_advance(b0, sizeof(*h0)); vlib_buffer_advance(b1, sizeof(*h1)); + vlib_buffer_advance(b2, sizeof(*h2)); + vlib_buffer_advance(b3, sizeof(*h3)); if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -210,9 +281,32 @@ mpls_lookup (vlib_main_t * vm, tr->label_net_byte_order = h1->label_exp_s_ttl; } - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b2, sizeof (*tr)); + tr->next_index = next2; + tr->lb_index = lbi2; + tr->lfib_index = lfib_index2; + tr->hash = hash_c2; + tr->label_net_byte_order = h2->label_exp_s_ttl; + } + + if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b3, sizeof (*tr)); + tr->next_index = next3; + tr->lb_index = lbi3; + tr->lfib_index = lfib_index3; + tr->hash = hash_c3; + tr->label_net_byte_order = h3->label_exp_s_ttl; + } + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, n_left_to_next, - bi0, bi1, next0, next1); + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); } while (n_left_from > 0 && n_left_to_next > 0) @@ -361,10 +455,9 @@ mpls_load_balance (vlib_main_t * vm, while (n_left_from >= 4 && n_left_to_next >= 2) { - mpls_lookup_next_t next0, next1; const load_balance_t *lb0, *lb1; vlib_buffer_t * p0, *p1; - u32 pi0, lbi0, hc0, pi1, lbi1, hc1; + u32 pi0, lbi0, hc0, pi1, lbi1, hc1, next0, next1; const mpls_unicast_header_t *mpls0, *mpls1; const dpo_id_t *dpo0, *dpo1; @@ -465,10 +558,9 @@ mpls_load_balance (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next > 0) { - mpls_lookup_next_t next0; const load_balance_t *lb0; vlib_buffer_t * p0; - u32 pi0, lbi0, hc0; + u32 pi0, lbi0, hc0, next0; const mpls_unicast_header_t *mpls0; const dpo_id_t *dpo0; diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c index cf3540067cc..2d8bd0c961f 100644 --- a/src/vnet/mpls/mpls_output.c +++ b/src/vnet/mpls/mpls_output.c @@ -29,6 +29,16 @@ typedef struct { u8 packet_data[64 - 1*sizeof(u32)]; } mpls_output_trace_t; +#define foreach_mpls_output_next \ +_(DROP, "error-drop") + +typedef enum { +#define _(s,n) MPLS_OUTPUT_NEXT_##s, + foreach_mpls_output_next +#undef _ + MPLS_OUTPUT_N_NEXT, +} mpls_output_next_t; + static u8 * format_mpls_output_trace (u8 * s, va_list * args) {