X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fmpls%2Fmpls_output.c;h=a1d2d3baa88fea2297fbca1adc370f6771a0c4ec;hb=4a58e49cf;hp=2d8bd0c961f296b40d63ec76b4a8c86cf264ab3e;hpb=696e88da9799056036f329676213f3c0c0a1db9c;p=vpp.git diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c index 2d8bd0c961f..a1d2d3baa88 100644 --- a/src/vnet/mpls/mpls_output.c +++ b/src/vnet/mpls/mpls_output.c @@ -16,21 +16,25 @@ */ #include -#include #include #include +#include +#include typedef struct { /* Adjacency taken. */ u32 adj_index; u32 flow_hash; - - /* Packet data, possibly *after* rewrite. */ - u8 packet_data[64 - 1*sizeof(u32)]; } mpls_output_trace_t; +typedef enum { + MPLS_OUTPUT_MODE, + MPLS_OUTPUT_MIDCHAIN_MODE +}mpls_output_mode_t; + #define foreach_mpls_output_next \ -_(DROP, "error-drop") +_(DROP, "error-drop") \ +_(FRAG, "mpls-frag") typedef enum { #define _(s,n) MPLS_OUTPUT_NEXT_##s, @@ -45,16 +49,11 @@ format_mpls_output_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); mpls_output_trace_t * t = va_arg (*args, mpls_output_trace_t *); - uword indent = format_get_indent (s); s = format (s, "adj-idx %d : %U flow hash: 0x%08x", t->adj_index, format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash); - s = format (s, "\n%U%U", - format_white_space, indent, - format_ip_adjacency_packet_data, - t->adj_index, t->packet_data, sizeof (t->packet_data)); return s; } @@ -62,14 +61,14 @@ static inline uword mpls_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, - int is_midchain) + mpls_output_mode_t mode) { - u32 n_left_from, next_index, * from, * to_next, cpu_index; + u32 n_left_from, next_index, * from, * to_next, thread_index; vlib_node_runtime_t * error_node; u32 n_left_to_next; mpls_main_t *mm; - cpu_index = os_get_cpu_number(); + thread_index = vlib_get_thread_index(); error_node = vlib_node_get_runtime (vm, mpls_output_node.index); from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -86,12 +85,14 @@ mpls_output_inline (vlib_main_t * vm, ip_adjacency_t * adj0; mpls_unicast_header_t *hdr0; vlib_buffer_t * p0; - u32 pi0, rw_len0, adj_index0, next0, error0; + u32 pi0, adj_index0, next0, error0; + word rw_len0; ip_adjacency_t * adj1; mpls_unicast_header_t *hdr1; vlib_buffer_t * p1; - u32 pi1, rw_len1, adj_index1, next1, error1; + u32 pi1, adj_index1, next1, error1; + word rw_len1; /* Prefetch next iteration. */ { @@ -121,10 +122,6 @@ mpls_output_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT(adj_index0); - ASSERT(adj_index1); - adj0 = adj_get(adj_index0); adj1 = adj_get(adj_index1); hdr0 = vlib_buffer_get_current (p0); @@ -137,17 +134,19 @@ mpls_output_inline (vlib_main_t * vm, /* Update packet buffer attributes/set output interface. */ rw_len0 = adj0[0].rewrite_header.data_bytes; rw_len1 = adj1[0].rewrite_header.data_bytes; + vnet_buffer (p0)->mpls.save_rewrite_length = rw_len0; + vnet_buffer (p1)->mpls.save_rewrite_length = rw_len1; /* Bump the adj counters for packet and bytes */ vlib_increment_combined_counter (&adjacency_counters, - cpu_index, + thread_index, adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); vlib_increment_combined_counter (&adjacency_counters, - cpu_index, + thread_index, adj_index1, 1, vlib_buffer_length_in_chain (vm, p1) + rw_len1); @@ -156,8 +155,7 @@ mpls_output_inline (vlib_main_t * vm, if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <= adj0[0].rewrite_header.max_l3_packet_bytes)) { - p0->current_data -= rw_len0; - p0->current_length += rw_len0; + vlib_buffer_advance(p0, -rw_len0); vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index; @@ -171,14 +169,16 @@ mpls_output_inline (vlib_main_t * vm, } else { - error0 = IP4_ERROR_MTU_EXCEEDED; - next0 = MPLS_OUTPUT_NEXT_DROP; + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = MPLS_OUTPUT_NEXT_FRAG; + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_NEED_FRAG, + 1); } if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <= adj1[0].rewrite_header.max_l3_packet_bytes)) { - p1->current_data -= rw_len1; - p1->current_length += rw_len1; + vlib_buffer_advance(p1, -rw_len1); vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index; @@ -192,44 +192,48 @@ mpls_output_inline (vlib_main_t * vm, } else { - error1 = IP4_ERROR_MTU_EXCEEDED; - next1 = MPLS_OUTPUT_NEXT_DROP; + error1 = IP4_ERROR_MTU_EXCEEDED; + next1 = MPLS_OUTPUT_NEXT_FRAG; + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_NEED_FRAG, + 1); } - if (is_midchain) + if (mode == MPLS_OUTPUT_MIDCHAIN_MODE) { - adj0->sub_type.midchain.fixup_func(vm, adj0, p0); - adj1->sub_type.midchain.fixup_func(vm, adj1, p1); - } - - p0->error = error_node->errors[error0]; - p1->error = error_node->errors[error1]; - - if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) - { - mpls_output_trace_t *tr = vlib_add_trace (vm, node, - p0, sizeof (*tr)); - tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX]; - tr->flow_hash = vnet_buffer(p0)->ip.flow_hash; - } - if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) - { - mpls_output_trace_t *tr = vlib_add_trace (vm, node, - p1, sizeof (*tr)); - tr->adj_index = vnet_buffer(p1)->ip.adj_index[VLIB_TX]; - tr->flow_hash = vnet_buffer(p1)->ip.flow_hash; - } + adj_midchain_fixup (vm, adj0, p0, VNET_LINK_MPLS); + adj_midchain_fixup (vm, adj1, p1, VNET_LINK_MPLS); + } - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - pi0, pi1, next0, next1); - } + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_output_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->adj_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer (p0)->ip.flow_hash; + } + if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_output_trace_t *tr = + vlib_add_trace (vm, node, p1, sizeof (*tr)); + tr->adj_index = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer (p1)->ip.flow_hash; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, next0, + next1); + } while (n_left_from > 0 && n_left_to_next > 0) { ip_adjacency_t * adj0; - mpls_unicast_header_t *hdr0; + mpls_unicast_header_t *hdr0; vlib_buffer_t * p0; - u32 pi0, rw_len0, adj_index0, next0, error0; + u32 pi0, adj_index0, next0, error0; + word rw_len0; pi0 = to_next[0] = from[0]; @@ -237,32 +241,29 @@ mpls_output_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT(adj_index0); - adj0 = adj_get(adj_index0); - hdr0 = vlib_buffer_get_current (p0); + hdr0 = vlib_buffer_get_current (p0); /* Guess we are only writing on simple Ethernet header. */ - vnet_rewrite_one_header (adj0[0], hdr0, + vnet_rewrite_one_header (adj0[0], hdr0, sizeof (ethernet_header_t)); - + /* Update packet buffer attributes/set output interface. */ rw_len0 = adj0[0].rewrite_header.data_bytes; - + vnet_buffer (p0)->mpls.save_rewrite_length = rw_len0; + vlib_increment_combined_counter (&adjacency_counters, - cpu_index, + thread_index, adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); - + /* Check MTU of outgoing interface. */ if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <= adj0[0].rewrite_header.max_l3_packet_bytes)) { - p0->current_data -= rw_len0; - p0->current_length += rw_len0; + vlib_buffer_advance(p0, -rw_len0); vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index; @@ -276,24 +277,27 @@ mpls_output_inline (vlib_main_t * vm, } else { - error0 = IP4_ERROR_MTU_EXCEEDED; - next0 = MPLS_OUTPUT_NEXT_DROP; + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = MPLS_OUTPUT_NEXT_FRAG; + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_NEED_FRAG, + 1); } - if (is_midchain) + if (mode == MPLS_OUTPUT_MIDCHAIN_MODE) { - adj0->sub_type.midchain.fixup_func(vm, adj0, p0); - } + adj_midchain_fixup (vm, adj0, p0, VNET_LINK_MPLS); + } - p0->error = error_node->errors[error0]; + p0->error = error_node->errors[error0]; from += 1; n_left_from -= 1; to_next += 1; n_left_to_next -= 1; - - if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { - mpls_output_trace_t *tr = vlib_add_trace (vm, node, + mpls_output_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX]; tr->flow_hash = vnet_buffer(p0)->ip.flow_hash; @@ -319,16 +323,14 @@ static char * mpls_error_strings[] = { #undef mpls_error }; -static inline uword -mpls_output (vlib_main_t * vm, +VLIB_NODE_FN (mpls_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { - return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 0)); + return (mpls_output_inline(vm, node, from_frame, MPLS_OUTPUT_MODE)); } VLIB_REGISTER_NODE (mpls_output_node) = { - .function = mpls_output, .name = "mpls-output", /* Takes a vector of packets. */ .vector_size = sizeof (u32), @@ -337,37 +339,201 @@ VLIB_REGISTER_NODE (mpls_output_node) = { .n_next_nodes = MPLS_OUTPUT_N_NEXT, .next_nodes = { -#define _(s,n) [MPLS_OUTPUT_NEXT_##s] = n, - foreach_mpls_output_next -#undef _ - }, - + [MPLS_OUTPUT_NEXT_DROP] = "mpls-drop", + [MPLS_OUTPUT_NEXT_FRAG] = "mpls-frag", + }, .format_trace = format_mpls_output_trace, }; -VLIB_NODE_FUNCTION_MULTIARCH (mpls_output_node, mpls_output) - -static inline uword -mpls_midchain (vlib_main_t * vm, +VLIB_NODE_FN (mpls_midchain_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { - return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 1)); + return (mpls_output_inline(vm, node, from_frame, MPLS_OUTPUT_MIDCHAIN_MODE)); } VLIB_REGISTER_NODE (mpls_midchain_node) = { - .function = mpls_midchain, .name = "mpls-midchain", .vector_size = sizeof (u32), - .format_trace = format_mpls_output_trace, + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, .sibling_of = "mpls-output", + .format_trace = format_mpls_output_trace, }; -VLIB_NODE_FUNCTION_MULTIARCH (mpls_midchain_node, mpls_midchain) +static char *mpls_frag_error_strings[] = { +#define _(sym,string) string, + foreach_ip_frag_error +#undef _ +}; -/** +typedef struct mpls_frag_trace_t_ +{ + u16 pkt_size; + u16 mtu; +} mpls_frag_trace_t; + +typedef enum +{ + MPLS_FRAG_NEXT_REWRITE, + MPLS_FRAG_NEXT_REWRITE_MIDCHAIN, + MPLS_FRAG_NEXT_ICMP_ERROR, + MPLS_FRAG_NEXT_DROP, + MPLS_FRAG_N_NEXT, +} mpls_frag_next_t; + +static uword +mpls_frag (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, next_index, * from, * to_next, n_left_to_next, *frags; + vlib_node_runtime_t * error_node; + + error_node = vlib_node_get_runtime (vm, mpls_output_node.index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + frags = NULL; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t * adj0; + vlib_buffer_t * p0; + mpls_frag_next_t next0; + u32 pi0, adj_index0; + ip_frag_error_t error0 = IP_FRAG_ERROR_NONE; + i16 encap_size; + u8 is_ip4; + + pi0 = to_next[0] = from[0]; + p0 = vlib_get_buffer (vm, pi0); + from += 1; + n_left_from -= 1; + is_ip4 = vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4; + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + adj0 = adj_get(adj_index0); + + /* the size of the MPLS stack */ + encap_size = vnet_buffer(p0)->l3_hdr_offset - p0->current_data; + + /* IP fragmentation */ + if (is_ip4) + error0 = ip4_frag_do_fragment (vm, pi0, + adj0->rewrite_header.max_l3_packet_bytes, + encap_size, &frags); + else + error0 = ip6_frag_do_fragment (vm, pi0, + adj0->rewrite_header.max_l3_packet_bytes, + encap_size, &frags); + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_frag_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->mtu = adj0->rewrite_header.max_l3_packet_bytes; + tr->pkt_size = vlib_buffer_length_in_chain(vm, p0); + } + + if (PREDICT_TRUE(error0 == IP_FRAG_ERROR_NONE)) + { + /* Free original buffer chain */ + vlib_buffer_free_one (vm, pi0); /* Free original packet */ + next0 = (IP_LOOKUP_NEXT_MIDCHAIN == adj0->lookup_next_index ? + MPLS_FRAG_NEXT_REWRITE_MIDCHAIN : + MPLS_FRAG_NEXT_REWRITE); + } + else if (is_ip4 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET) + { + icmp4_error_set_vnet_buffer ( + p0, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, + vnet_buffer (p0)->ip_frag.mtu); + next0 = MPLS_FRAG_NEXT_ICMP_ERROR; + } + else + { + vlib_error_count (vm, mpls_output_node.index, error0, 1); + vec_add1 (frags, pi0); /* Get rid of the original buffer */ + next0 = MPLS_FRAG_NEXT_DROP; + } + + /* Send fragments that were added in the frame */ + u32 *frag_from, frag_left; + + frag_from = frags; + frag_left = vec_len (frags); + + while (frag_left > 0) + { + while (frag_left > 0 && n_left_to_next > 0) + { + u32 i; + i = to_next[0] = frag_from[0]; + frag_from += 1; + frag_left -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, i); + p0->error = error_node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, i, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + vec_reset_length (frags); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vec_free (frags); + + return frame->n_vectors; +} + +static u8 * +format_mpls_frag_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_frag_trace_t *t = va_arg (*args, mpls_frag_trace_t *); + + s = format (s, "mtu:%d pkt-size:%d", t->mtu, t->pkt_size); + return s; +} + +VLIB_REGISTER_NODE (mpls_frag_node) = { + .function = mpls_frag, + .name = "mpls-frag", + .vector_size = sizeof (u32), + .format_trace = format_mpls_frag_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = IP_FRAG_N_ERROR, + .error_strings = mpls_frag_error_strings, + + .n_next_nodes = MPLS_FRAG_N_NEXT, + .next_nodes = { + [MPLS_FRAG_NEXT_REWRITE] = "mpls-output", + [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain", + [MPLS_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [MPLS_FRAG_NEXT_DROP] = "mpls-drop" + }, +}; + +/* * @brief Next index values from the MPLS incomplete adj node */ #define foreach_mpls_adj_incomplete_next \ @@ -399,8 +565,7 @@ typedef struct mpls_adj_incomplete_trace_t_ * We pay a cost for this 'routing' node, but an incomplete adj is the * exception case. */ -static inline uword -mpls_adj_incomplete (vlib_main_t * vm, +VLIB_NODE_FN (mpls_adj_incomplete_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { @@ -431,7 +596,6 @@ mpls_adj_incomplete (vlib_main_t * vm, n_left_to_next -= 1; adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - ASSERT(adj_index0); adj0 = adj_get(adj_index0); @@ -442,9 +606,9 @@ mpls_adj_incomplete (vlib_main_t * vm, else { next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP6; - } + } - if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { mpls_adj_incomplete_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); @@ -468,7 +632,7 @@ format_mpls_adj_incomplete_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); mpls_adj_incomplete_trace_t * t; - uword indent; + u32 indent; t = va_arg (*args, mpls_adj_incomplete_trace_t *); indent = format_get_indent (s); @@ -480,7 +644,6 @@ format_mpls_adj_incomplete_trace (u8 * s, va_list * args) } VLIB_REGISTER_NODE (mpls_adj_incomplete_node) = { - .function = mpls_adj_incomplete, .name = "mpls-adj-incomplete", .format_trace = format_mpls_adj_incomplete_trace, /* Takes a vector of packets. */ @@ -495,6 +658,3 @@ VLIB_REGISTER_NODE (mpls_adj_incomplete_node) = { #undef _ }, }; - -VLIB_NODE_FUNCTION_MULTIARCH (mpls_adj_incomplete_node, - mpls_adj_incomplete)