X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=vnet%2Fvnet%2Fip%2Fip6_hop_by_hop.c;h=f6e10f08d97ff5e57d7eb54ea238b586bda212a0;hb=816f3e1b879b43802ea8035d6a3f1cbf5db76825;hp=5bd34b8c85e1959cfe8c44e96520b75a9a532f89;hpb=08ff7e00bf0e7cf93a732e98a026a76a4349fd41;p=vpp.git diff --git a/vnet/vnet/ip/ip6_hop_by_hop.c b/vnet/vnet/ip/ip6_hop_by_hop.c index 5bd34b8c85e..f6e10f08d97 100644 --- a/vnet/vnet/ip/ip6_hop_by_hop.c +++ b/vnet/vnet/ip/ip6_hop_by_hop.c @@ -25,384 +25,209 @@ #include -ip6_hop_by_hop_main_t ip6_hop_by_hop_main; - -/* - * ip6 hop-by-hop option handling. We push pkts with h-b-h options to - * ip6_hop_by_hop_node_fn from ip6-lookup at a cost of ~2 clocks/pkt in - * the speed path. - * - * We parse through the h-b-h option TLVs, specifically looking for - * HBH_OPTION_TYPE_IOAM_DATA_LIST. [Someone needs to get bananas from - * IANA, aka to actually allocate the option TLV codes.] - * - * If we find the indicated option type, and we have remaining list - * elements in the trace list, allocate and populate the trace list - * element. - * - * At the ingress edge: punch in the h-b-h rewrite, then visit the - * standard h-b-h option handler. We have to be careful in the standard - * h-b-h handler, to avoid looping until we run out of rewrite space. - * Ask me how I know that. - * - * Remaining work: - * decide on egress point "pop and count" scheme - * time stamp handling: usec since the top of the hour? - * configure the node id - * trace list application data support - * cons up analysis / steering plug-in(s) - * add configuration binary APIs, vpp_api_test_support, yang models and - * orca code - * perf tune: dual loop, replace memcpy w/ N x 8-byte load/stores - * +/* Timestamp precision multipliers for seconds, milliseconds, microseconds + * and nanoseconds respectively. */ +static f64 trace_tsp_mul[4] = {1, 1e3, 1e6, 1e9}; -/* - * primary h-b-h handler trace support - * We work pretty hard on the problem for obvious reasons - */ -typedef struct { - u32 next_index; - u32 trace_len; - u8 option_data[256]; -} ip6_hop_by_hop_trace_t; +char *ppc_state[] = {"None", "Encap", "Decap"}; -static u8 * format_ioam_data_list_element (u8 * s, va_list * args) -{ - ioam_data_list_element_t *elt = va_arg (*args, ioam_data_list_element_t *); - u32 ttl_node_id_host_byte_order = - clib_net_to_host_u32 (elt->ttl_node_id); +ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main; - s = format (s, "ttl %d node id %d ingress %d egress %d ts %u", - ttl_node_id_host_byte_order>>24, - ttl_node_id_host_byte_order & 0x00FFFFFF, - elt->ingress_if, - elt->egress_if, - elt->timestamp); - return s; -} +#define foreach_ip6_hbyh_ioam_input_next \ + _(IP6_REWRITE, "ip6-rewrite") \ + _(IP6_LOOKUP, "ip6-lookup") \ + _(DROP, "error-drop") + +typedef enum { +#define _(s,n) IP6_HBYH_IOAM_INPUT_NEXT_##s, + foreach_ip6_hbyh_ioam_input_next +#undef _ + IP6_HBYH_IOAM_INPUT_N_NEXT, +} ip6_hbyh_ioam_input_next_t; + +typedef union { + u64 as_u64; + u32 as_u32[2]; +} time_u64_t; -static u8 * format_ip6_hop_by_hop_trace (u8 * s, va_list * args) +static inline u8 +fetch_trace_data_size(u8 trace_type) { - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - ip6_hop_by_hop_trace_t * t = va_arg (*args, ip6_hop_by_hop_trace_t *); - ip6_hop_by_hop_header_t *hbh0; - ip6_hop_by_hop_option_t *opt0, *limit0; - ioam_trace_option_t * trace0; - ioam_data_list_element_t * elt0; - int elt_index; - u8 type0; - - hbh0 = (ip6_hop_by_hop_header_t *)t->option_data; + u8 trace_data_size = 0; + + if (trace_type == TRACE_TYPE_IF_TS_APP) + trace_data_size = sizeof(ioam_trace_if_ts_app_t); + else if(trace_type == TRACE_TYPE_IF) + trace_data_size = sizeof(ioam_trace_if_t); + else if(trace_type == TRACE_TYPE_TS) + trace_data_size = sizeof(ioam_trace_ts_t); + else if(trace_type == TRACE_TYPE_APP) + trace_data_size = sizeof(ioam_trace_app_t); + else if(trace_type == TRACE_TYPE_TS_APP) + trace_data_size = sizeof(ioam_trace_ts_app_t); + + return trace_data_size; +} - s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d\n", - t->next_index, (hbh0->length+1)<<3, t->trace_len); - - opt0 = (ip6_hop_by_hop_option_t *) (hbh0+1); - limit0 = (ip6_hop_by_hop_option_t *) ((u8 *)hbh0) + t->trace_len; +static u8 * format_ioam_data_list_element (u8 * s, va_list * args) +{ + u32 *elt = va_arg (*args, u32 *); + u8 *trace_type_p = va_arg (*args, u8 *); + u8 trace_type = *trace_type_p; - while (opt0 < limit0) + + if (trace_type & BIT_TTL_NODEID) { - type0 = opt0->type & HBH_OPTION_TYPE_MASK; - elt_index = 0; - switch (type0) - { - case HBH_OPTION_TYPE_IOAM_DATA_LIST: - trace0 = (ioam_trace_option_t *)opt0; - s = format (s, " Trace %d elts left\n", - trace0->data_list_elts_left); - elt0 = &trace0->elts[0]; - while ((u8 *) elt0 < - ((u8 *)(&trace0->elts[0]) + trace0->hdr.length - 1 - /* -1 accounts for elts_left */)) - { - s = format (s, " [%d] %U\n",elt_index, - format_ioam_data_list_element, elt0); - elt_index++; - elt0++; - } - - opt0 = (ip6_hop_by_hop_option_t *) - (((u8 *)opt0) + opt0->length - + sizeof (ip6_hop_by_hop_option_t)); - break; - - case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK: - s = format (s, " POW opt present\n"); - opt0 = (ip6_hop_by_hop_option_t *) - (((u8 *)opt0) + sizeof (ioam_pow_option_t)); - break; - - case 0: /* Pad, just stop */ - opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1; - break; - - default: - s = format (s, "Unknown %d", type0); - opt0 = (ip6_hop_by_hop_option_t *) - (((u8 *)opt0) + opt0->length - + sizeof (ip6_hop_by_hop_option_t)); - break; - } + u32 ttl_node_id_host_byte_order = clib_net_to_host_u32 (*elt); + s = format (s, "ttl 0x%x node id 0x%x ", + ttl_node_id_host_byte_order>>24, + ttl_node_id_host_byte_order & 0x00FFFFFF); + + elt++; + } + + if (trace_type & BIT_ING_INTERFACE && trace_type & BIT_ING_INTERFACE) + { + u32 ingress_host_byte_order = clib_net_to_host_u32(*elt); + s = format (s, "ingress 0x%x egress 0x%x ", + ingress_host_byte_order >> 16, + ingress_host_byte_order &0xFFFF); + elt++; + } + + if (trace_type & BIT_TIMESTAMP) + { + u32 ts_in_host_byte_order = clib_net_to_host_u32 (*elt); + s = format (s, "ts 0x%x \n", ts_in_host_byte_order); + elt++; } + + if (trace_type & BIT_APPDATA) + { + u32 appdata_in_host_byte_order = clib_net_to_host_u32 (*elt); + s = format (s, "app 0x%x ", appdata_in_host_byte_order); + elt++; + } + return s; } -vlib_node_registration_t ip6_hop_by_hop_node; - -#define foreach_ip6_hop_by_hop_error \ -_(PROCESSED, "Pkts with ip6 hop-by-hop options") - -typedef enum { -#define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym, - foreach_ip6_hop_by_hop_error -#undef _ - IP6_HOP_BY_HOP_N_ERROR, -} ip6_hop_by_hop_error_t; +u8 * +ip6_hbh_ioam_trace_data_list_trace_handler (u8 *s, ip6_hop_by_hop_option_t *opt) +{ + ioam_trace_option_t *trace; + u8 trace_data_size_in_words = 0; + u32 *elt; + int elt_index = 0; -static char * ip6_hop_by_hop_error_strings[] = { -#define _(sym,string) string, - foreach_ip6_hop_by_hop_error -#undef _ -}; + trace = (ioam_trace_option_t *)opt; +#if 0 + s = format (s, " Trace Type 0x%x , %d elts left ts msb(s) 0x%x\n", trace->ioam_trace_type, trace->data_list_elts_left, + t->timestamp_msbs); +#endif + s = format (s, " Trace Type 0x%x , %d elts left\n", trace->ioam_trace_type, trace->data_list_elts_left); + trace_data_size_in_words = fetch_trace_data_size(trace->ioam_trace_type)/4; + elt = &trace->elts[0]; + while ((u8 *) elt < ((u8 *)(&trace->elts[0]) + trace->hdr.length - 2 + /* -2 accounts for ioam_trace_type,elts_left */)) { + s = format (s, " [%d] %U\n",elt_index, + format_ioam_data_list_element, + elt, &trace->ioam_trace_type); + elt_index++; + elt += trace_data_size_in_words; + } + return (s); +} -static uword -ip6_hop_by_hop_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +int +ip6_hbh_ioam_trace_data_list_handler (vlib_buffer_t *b, ip6_header_t *ip, ip6_hop_by_hop_option_t *opt) { ip6_main_t * im = &ip6_main; ip_lookup_main_t * lm = &im->lookup_main; - ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main; - u32 n_left_from, * from, * to_next; - ip_lookup_next_t next_index; - u32 processed = 0; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - -#if 0 /* $$$ DUAL-LOOP ME */ - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 next0 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT; - u32 next1 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT; - u32 sw_if_index0, sw_if_index1; - u8 tmp0[6], tmp1[6]; - ethernet_header_t *en0, *en1; - u32 bi0, bi1; - vlib_buffer_t * b0, * b1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t * p2, * p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - /* speculatively enqueue b0 and b1 to the current next frame */ - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - /* $$$$$ Dual loop: process 2 x packets here $$$$$ */ - ASSERT (b0->current_data == 0); - ASSERT (b1->current_data == 0); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b0); - - sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; - - /* $$$$$ End of processing 2 x packets $$$$$ */ + ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main; + u8 elt_index = 0; + ioam_trace_option_t *trace = (ioam_trace_option_t *)opt; + u32 adj_index = vnet_buffer (b)->ip.adj_index[VLIB_TX]; + ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index); + time_u64_t time_u64; + u32 *elt; + int rv = 0; + + time_u64.as_u64 = 0; + + if (PREDICT_TRUE (trace->data_list_elts_left)) { + trace->data_list_elts_left--; + /* fetch_trace_data_size returns in bytes. Convert it to 4-bytes + * to skip to this node's location. + */ + elt_index = trace->data_list_elts_left * fetch_trace_data_size(trace->ioam_trace_type) / 4; + elt = &trace->elts[elt_index]; + if (trace->ioam_trace_type & BIT_TTL_NODEID) { + *elt = clib_host_to_net_u32 ((ip->hop_limit<<24) | hm->node_id); + elt++; + } - if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) - { - if (b0->flags & VLIB_BUFFER_IS_TRACED) - { - ip6_hop_by_hop_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->sw_if_index = sw_if_index0; - t->next_index = next0; - } - if (b1->flags & VLIB_BUFFER_IS_TRACED) - { - ip6_hop_by_hop_trace_t *t = - vlib_add_trace (vm, node, b1, sizeof (*t)); - t->sw_if_index = sw_if_index1; - t->next_index = next1; - } - } - - /* verify speculative enqueues, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } -#endif + if (trace->ioam_trace_type & BIT_ING_INTERFACE) { + *elt = (vnet_buffer(b)->sw_if_index[VLIB_RX]&0xFFFF) << 16 | (adj->rewrite_header.sw_if_index & 0xFFFF); + *elt = clib_host_to_net_u32(*elt); + elt++; + } + + if (trace->ioam_trace_type & BIT_TIMESTAMP) { + /* Send least significant 32 bits */ + f64 time_f64 = (f64)(((f64)hm->unix_time_0) + (vlib_time_now(hm->vlib_main) - hm->vlib_time_0)); + + time_u64.as_u64 = time_f64 * trace_tsp_mul[hm->trace_tsp]; + *elt = clib_host_to_net_u32(time_u64.as_u32[0]); + elt++; + } - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t * b0; - u32 next0; - u32 adj_index0; - ip6_header_t * ip0; - ip_adjacency_t * adj0; - ip6_hop_by_hop_header_t *hbh0; - ip6_hop_by_hop_option_t *opt0, *limit0; - ioam_trace_option_t * trace0; - ioam_data_list_element_t * elt0; - u8 type0; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; + if (trace->ioam_trace_type & BIT_APPDATA) { + /* $$$ set elt0->app_data */ + *elt = clib_host_to_net_u32(hm->app_data); + elt++; + } + } + return (rv); +} - b0 = vlib_get_buffer (vm, bi0); +/* The main h-b-h tracer will be invoked, no need to do much here */ +int +ip6_hbh_add_register_option (u8 option, + u8 size, + int rewrite_options(u8 *rewrite_string, u8 rewrite_size)) +{ + ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main; - ip0 = vlib_buffer_get_current (b0); - adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - adj0 = ip_get_adjacency (lm, adj_index0); - hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1); - opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1); - limit0 = (ip6_hop_by_hop_option_t *) - ((u8 *)hbh0 + ((hbh0->length+1)<<3)); - - /* Scan the set of h-b-h options, process ones that we understand */ - while (opt0 < limit0) - { - type0 = opt0->type & HBH_OPTION_TYPE_MASK; - switch (type0) - { - case HBH_OPTION_TYPE_IOAM_DATA_LIST: - trace0 = (ioam_trace_option_t *)opt0; - if (PREDICT_TRUE (trace0->data_list_elts_left)) - { - trace0->data_list_elts_left--; - elt0 = &trace0->elts[trace0->data_list_elts_left]; - elt0->ttl_node_id = - clib_host_to_net_u32 ((ip0->hop_limit<<24) - | hm->node_id); - elt0->ingress_if = - vnet_buffer(b0)->sw_if_index[VLIB_RX]; - elt0->egress_if = adj0->rewrite_header.sw_if_index; - elt0->timestamp = 123; /* $$$$ */ - /* $$$ set elt0->app_data */ - } - - opt0 = (ip6_hop_by_hop_option_t *) - (((u8 *)opt0) + opt0->length - + sizeof (ip6_hop_by_hop_option_t)); - break; - - case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK: - opt0 = (ip6_hop_by_hop_option_t *) - (((u8 *)opt0) + sizeof (ioam_pow_option_t)); - break; - - case 0: /* Pad */ - opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1; - goto out0; - } - } + ASSERT (option < ARRAY_LEN (hm->add_options)); - out0: + /* Already registered */ + if (hm->add_options[option]) + return (-1); - /* - * Since we push pkts here from the h-b-h header imposition code - * we have to be careful what we wish for... - */ - next0 = adj0->lookup_next_index != IP_LOOKUP_NEXT_ADD_HOP_BY_HOP ? - adj0->lookup_next_index : adj0->saved_lookup_next_index; + hm->add_options[option] = rewrite_options; + hm->options_size[option] = size; + + return (0); +} - if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - ip6_hop_by_hop_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - u32 trace_len = (hbh0->length+1)<<3; - t->next_index = next0; - /* Capture the h-b-h option verbatim */ - trace_len = trace_len < ARRAY_LEN(t->option_data) ? - trace_len : ARRAY_LEN(t->option_data); - t->trace_len = trace_len; - memcpy (t->option_data, hbh0, trace_len); - } - - processed++; +int +ip6_hbh_add_unregister_option (u8 option) +{ + ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main; - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } + ASSERT (option < ARRAY_LEN (hm->add_options)); - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } + /* Not registered */ + if (!hm->add_options[option]) + return (-1); - vlib_node_increment_counter (vm, ip6_hop_by_hop_node.index, - IP6_HOP_BY_HOP_ERROR_PROCESSED, processed); - return frame->n_vectors; + hm->add_options[option] = NULL; + hm->options_size[option] = 0; + return (0); } -VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = { - .function = ip6_hop_by_hop_node_fn, - .name = "ip6-hop-by-hop", - .vector_size = sizeof (u32), - .format_trace = format_ip6_hop_by_hop_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(ip6_hop_by_hop_error_strings), - .error_strings = ip6_hop_by_hop_error_strings, - - /* See ip/lookup.h */ - .n_next_nodes = IP_LOOKUP_N_NEXT, - .next_nodes = { - [IP_LOOKUP_NEXT_MISS] = "ip6-miss", - [IP_LOOKUP_NEXT_DROP] = "ip6-drop", - [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", - [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", - [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", - [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", - [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", - [IP_LOOKUP_NEXT_MAP] = "ip6-map", - [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t", - [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd", - /* Next 3 arcs probably never used */ - [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", - [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", - [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", - }, -}; - -/* The main h-b-h tracer will be invoked, no need to do much here */ typedef struct { u32 next_index; } ip6_add_hop_by_hop_trace_t; @@ -443,7 +268,7 @@ ip6_add_hop_by_hop_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main; + ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main; u32 n_left_from, * from, * to_next; ip_lookup_next_t next_index; u32 processed = 0; @@ -570,7 +395,7 @@ ip6_add_hop_by_hop_node_fn (vlib_main_t * vm, hbh0 = (ip6_hop_by_hop_header_t *)(ip0 + 1); /* $$$ tune, rewrite_length is a multiple of 8 */ - memcpy (hbh0, rewrite, rewrite_length); + clib_memcpy (hbh0, rewrite, rewrite_length); /* Patch the protocol chain, insert the h-b-h (type 0) header */ hbh0->protocol = ip0->protocol; ip0->protocol = 0; @@ -578,7 +403,7 @@ ip6_add_hop_by_hop_node_fn (vlib_main_t * vm, ip0->payload_length = clib_host_to_net_u16 (new_l0); /* Populate the (first) h-b-h list elt */ - next0 = IP_LOOKUP_NEXT_HOP_BY_HOP; + next0 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP; if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) @@ -615,25 +440,15 @@ VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = { .error_strings = ip6_add_hop_by_hop_error_strings, /* See ip/lookup.h */ - .n_next_nodes = IP_LOOKUP_N_NEXT, + .n_next_nodes = IP6_HBYH_IOAM_INPUT_N_NEXT, .next_nodes = { - [IP_LOOKUP_NEXT_MISS] = "ip6-miss", - [IP_LOOKUP_NEXT_DROP] = "ip6-drop", - [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", - [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", - [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", - [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", - [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", - [IP_LOOKUP_NEXT_MAP] = "ip6-map", - [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t", - [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd", - /* Next 3 arcs probably never used */ - [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", - [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", - [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", +#define _(s,n) [IP6_HBYH_IOAM_INPUT_NEXT_##s] = n, + foreach_ip6_hbyh_ioam_input_next +#undef _ }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_add_hop_by_hop_node, ip6_add_hop_by_hop_node_fn) /* The main h-b-h tracer was already invoked, no need to do much here */ typedef struct { @@ -652,11 +467,44 @@ static u8 * format_ip6_pop_hop_by_hop_trace (u8 * s, va_list * args) return s; } +int +ip6_hbh_pop_register_option (u8 option, + int options(ip6_header_t *ip, ip6_hop_by_hop_option_t *opt)) +{ + ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->pop_options)); + + /* Already registered */ + if (hm->pop_options[option]) + return (-1); + + hm->pop_options[option] = options; + + return (0); +} + +int +ip6_hbh_pop_unregister_option (u8 option) +{ + ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->pop_options)); + + /* Not registered */ + if (!hm->pop_options[option]) + return (-1); + + hm->pop_options[option] = NULL; + return (0); +} + vlib_node_registration_t ip6_pop_hop_by_hop_node; #define foreach_ip6_pop_hop_by_hop_error \ _(PROCESSED, "Pkts w/ removed ip6 hop-by-hop options") \ -_(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options") +_(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options") \ +_(OPTION_FAILED, "ip6 pop hop-by-hop failed to process") typedef enum { #define _(sym,str) IP6_POP_HOP_BY_HOP_ERROR_##sym, @@ -671,23 +519,56 @@ static char * ip6_pop_hop_by_hop_error_strings[] = { #undef _ }; +static inline void ioam_pop_hop_by_hop_processing (vlib_main_t * vm, + ip6_header_t *ip0, + ip6_hop_by_hop_header_t *hbh0) +{ + ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main; + ip6_hop_by_hop_option_t *opt0, *limit0; + u8 type0; + + if (!hbh0 || !ip0) return; + + opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1); + limit0 = (ip6_hop_by_hop_option_t *) + ((u8 *)hbh0 + ((hbh0->length+1)<<3)); + + /* Scan the set of h-b-h options, process ones that we understand */ + while (opt0 < limit0) + { + type0 = opt0->type; + switch (type0) + { + case 0: /* Pad1 */ + opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1; + continue; + case 1: /* PadN */ + break; + default: + if (hm->pop_options[type0]) + { + if ((*hm->pop_options[type0])(ip0, opt0) < 0) + { + vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index, + IP6_POP_HOP_BY_HOP_ERROR_OPTION_FAILED, 1); + } + } + } + opt0 = (ip6_hop_by_hop_option_t *) (((u8 *)opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t)); + } +} + static uword ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main; ip6_main_t * im = &ip6_main; ip_lookup_main_t * lm = &im->lookup_main; u32 n_left_from, * from, * to_next; ip_lookup_next_t next_index; u32 processed = 0; u32 no_header = 0; - u32 (*ioam_end_of_path_cb) (vlib_main_t *, vlib_node_runtime_t *, - vlib_buffer_t *, ip6_header_t *, - ip_adjacency_t *); - - ioam_end_of_path_cb = hm->ioam_end_of_path_cb; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -799,37 +680,28 @@ ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm, adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; adj0 = ip_get_adjacency (lm, adj_index0); + /* Default use the next_index from the adjacency. */ + next0 = adj0->lookup_next_index; + /* Perfectly normal to end up here w/ out h-b-h header */ - if (PREDICT_TRUE (ip0->protocol == 0)) - { - hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1); + hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1); - /* Collect data from trace via callback */ - next0 = ioam_end_of_path_cb ? - ioam_end_of_path_cb (vm, node, b0, ip0, adj0) - : adj0->saved_lookup_next_index; - - - /* Pop the trace data */ - vlib_buffer_advance (b0, (hbh0->length+1)<<3); - new_l0 = clib_net_to_host_u16 (ip0->payload_length) - - ((hbh0->length+1)<<3); - ip0->payload_length = clib_host_to_net_u16 (new_l0); - ip0->protocol = hbh0->protocol; - copy_src0 = (u64 *)ip0; - copy_dst0 = copy_src0 + (hbh0->length+1); - copy_dst0 [4] = copy_src0[4]; - copy_dst0 [3] = copy_src0[3]; - copy_dst0 [2] = copy_src0[2]; - copy_dst0 [1] = copy_src0[1]; - copy_dst0 [0] = copy_src0[0]; - processed++; - } - else - { - next0 = adj0->saved_lookup_next_index; - no_header++; - } + /* TODO:Temporarily doing it here.. do this validation in end_of_path_cb */ + ioam_pop_hop_by_hop_processing(vm, ip0, hbh0); + /* Pop the trace data */ + vlib_buffer_advance (b0, (hbh0->length+1)<<3); + new_l0 = clib_net_to_host_u16 (ip0->payload_length) - + ((hbh0->length+1)<<3); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip0->protocol = hbh0->protocol; + copy_src0 = (u64 *)ip0; + copy_dst0 = copy_src0 + (hbh0->length+1); + copy_dst0 [4] = copy_src0[4]; + copy_dst0 [3] = copy_src0[3]; + copy_dst0 [2] = copy_src0[2]; + copy_dst0 [1] = copy_src0[1]; + copy_dst0 [0] = copy_src0[0]; + processed++; if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) @@ -861,60 +733,60 @@ VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = { .vector_size = sizeof (u32), .format_trace = format_ip6_pop_hop_by_hop_trace, .type = VLIB_NODE_TYPE_INTERNAL, - + .sibling_of = "ip6-lookup", .n_errors = ARRAY_LEN(ip6_pop_hop_by_hop_error_strings), .error_strings = ip6_pop_hop_by_hop_error_strings, /* See ip/lookup.h */ - .n_next_nodes = IP_LOOKUP_N_NEXT, - .next_nodes = { - [IP_LOOKUP_NEXT_MISS] = "ip6-miss", - [IP_LOOKUP_NEXT_DROP] = "ip6-drop", - [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", - [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", - [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", - [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", - [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", - [IP_LOOKUP_NEXT_MAP] = "ip6-map", - [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t", - [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd", - /* Next 3 arcs probably never used */ - [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", - [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", - [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", - }, + .n_next_nodes = 0, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_pop_hop_by_hop_node, + ip6_pop_hop_by_hop_node_fn) static clib_error_t * -ip6_hop_by_hop_init (vlib_main_t * vm) +ip6_hop_by_hop_ioam_init (vlib_main_t * vm) { - ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main; + ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main; hm->vlib_main = vm; hm->vnet_main = vnet_get_main(); - - return 0; + hm->unix_time_0 = (u32) time (0); /* Store starting time */ + hm->vlib_time_0 = vlib_time_now (vm); + hm->ioam_flag = IOAM_HBYH_MOD; + hm->trace_tsp = TSP_MICROSECONDS; /* Micro seconds */ + memset(hm->add_options, 0, sizeof(hm->add_options)); + memset(hm->pop_options, 0, sizeof(hm->pop_options)); + memset(hm->options_size, 0, sizeof(hm->options_size)); + + /* + * Register the handlers + * XXX: This should be done dynamically based on OAM feature being enabled or not. + */ + if (ip6_hbh_register_option(HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST, ip6_hbh_ioam_trace_data_list_handler, + ip6_hbh_ioam_trace_data_list_trace_handler) < 0) + return (clib_error_create("registration of HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST failed")); + + return (0); } -VLIB_INIT_FUNCTION (ip6_hop_by_hop_init); +VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_init); -int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_option_elts, int has_pow_option) +int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_type, u32 trace_option_elts, + int has_pot_option, int has_ppc_option) { + ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main; u8 *rewrite = 0; u32 size, rnd_size; ip6_hop_by_hop_header_t *hbh; ioam_trace_option_t * trace_option; - ioam_pow_option_t * pow_option; u8 *current; + u8 trace_data_size = 0; vec_free (*rwp); - if (trace_option_elts == 0 && has_pow_option == 0) - return 0; - - if (trace_option_elts * sizeof (ioam_data_list_element_t) > 254) - return VNET_API_ERROR_INVALID_VALUE; + if (trace_option_elts == 0 && has_pot_option == 0) + return -1; /* Work out how much space we need */ size = sizeof (ip6_hop_by_hop_header_t); @@ -922,12 +794,20 @@ int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_option_elts, int has_pow_option) if (trace_option_elts) { size += sizeof (ip6_hop_by_hop_option_t); - size += trace_option_elts * (sizeof (ioam_data_list_element_t)); + + trace_data_size = fetch_trace_data_size(trace_type); + if (trace_data_size == 0) + return VNET_API_ERROR_INVALID_VALUE; + + if (trace_option_elts * trace_data_size > 254) + return VNET_API_ERROR_INVALID_VALUE; + + size += trace_option_elts * trace_data_size; } - if (has_pow_option) + if (has_pot_option && hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0) { size += sizeof (ip6_hop_by_hop_option_t); - size += sizeof (ioam_pow_option_t); + size += hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT]; } /* Round to a multiple of 8 octets */ @@ -944,71 +824,203 @@ int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_option_elts, int has_pow_option) if (trace_option_elts) { trace_option = (ioam_trace_option_t *)current; - trace_option->hdr.type = HBH_OPTION_TYPE_IOAM_DATA_LIST + trace_option->hdr.type = HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE; - trace_option->hdr.length = 1 /*data_list_elts_left */ + - trace_option_elts * sizeof (ioam_data_list_element_t); + trace_option->hdr.length = + 2 /*ioam_trace_type,data_list_elts_left */ + + trace_option_elts * trace_data_size; + trace_option->ioam_trace_type = trace_type & TRACE_TYPE_MASK; trace_option->data_list_elts_left = trace_option_elts; current += sizeof (ioam_trace_option_t) + - trace_option_elts * sizeof (ioam_data_list_element_t); + trace_option_elts * trace_data_size; } - if (has_pow_option) + if (has_pot_option && hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0) { - pow_option = (ioam_pow_option_t *)current; - pow_option->hdr.type = HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK - | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE; - pow_option->hdr.length = sizeof (ioam_pow_option_t) - - sizeof (ip6_hop_by_hop_option_t); - current += sizeof (ioam_pow_option_t); + if (0 == hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT](current, + hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT])) + current += sizeof (hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT]); } *rwp = rewrite; return 0; } +clib_error_t * +clear_ioam_rewrite_fn(void) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + vec_free(hm->rewrite); + hm->rewrite = 0; + hm->node_id = 0; + hm->app_data = 0; + hm->trace_type = 0; + hm->trace_option_elts = 0; + hm->has_pot_option = 0; + hm->has_ppc_option = 0; + hm->trace_tsp = TSP_MICROSECONDS; + + return 0; +} + +clib_error_t * clear_ioam_rewrite_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + return(clear_ioam_rewrite_fn()); +} + +VLIB_CLI_COMMAND (ip6_clear_ioam_trace_cmd, static) = { + .path = "clear ioam rewrite", + .short_help = "clear ioam rewrite", + .function = clear_ioam_rewrite_command_fn, +}; + +clib_error_t * +ip6_ioam_trace_profile_set(u32 trace_option_elts, u32 trace_type, u32 node_id, + u32 app_data, int has_pot_option, u32 trace_tsp, + int has_ppc_option) +{ + int rv; + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + rv = ip6_ioam_set_rewrite (&hm->rewrite, trace_type, trace_option_elts, + has_pot_option, has_ppc_option); + + switch (rv) + { + case 0: + hm->node_id = node_id; + hm->app_data = app_data; + hm->trace_type = trace_type; + hm->trace_option_elts = trace_option_elts; + hm->has_pot_option = has_pot_option; + hm->has_ppc_option = has_ppc_option; + hm->trace_tsp = trace_tsp; + break; + + default: + return clib_error_return_code(0, rv, 0, "ip6_ioam_set_rewrite returned %d", rv); + } + + return 0; +} + + static clib_error_t * -ip6_ioam_set_rewrite_command_fn (vlib_main_t * vm, +ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; u32 trace_option_elts = 0; - int has_pow_option = 0; - int rv; + u32 trace_type = 0, node_id = 0; + u32 app_data = 0, trace_tsp = TSP_MICROSECONDS; + int has_pot_option = 0; + int has_ppc_option = 0; + clib_error_t * rv = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "trace-elts %d", &trace_option_elts)) - ; - else if (unformat (input, "pow")) - has_pow_option = 1; + if (unformat (input, "trace-type 0x%x trace-elts %d " + "trace-tsp %d node-id 0x%x app-data 0x%x", + &trace_type, &trace_option_elts, &trace_tsp, + &node_id, &app_data)) + ; + else if (unformat (input, "pot")) + has_pot_option = 1; + else if (unformat (input, "ppc encap")) + has_ppc_option = PPC_ENCAP; + else if (unformat (input, "ppc decap")) + has_ppc_option = PPC_DECAP; + else if (unformat (input, "ppc none")) + has_ppc_option = PPC_NONE; else break; } - rv = ip6_ioam_set_rewrite (&hm->rewrite, trace_option_elts, has_pow_option); - - switch (rv) - { - case 0: - break; - default: - return clib_error_return (0, "ip6_ioam_set_rewrite returned %d", rv); - } + + rv = ip6_ioam_trace_profile_set(trace_option_elts, trace_type, node_id, + app_data, has_pot_option, trace_tsp, has_ppc_option); + + return rv; +} + + +VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = { + .path = "set ioam rewrite", + .short_help = "set ioam rewrite trace-type <0x1f|0x3|0x9|0x11|0x19> trace-elts trace-tsp <0|1|2|3> node-id app-data [pot] [ppc ]", + .function = ip6_set_ioam_rewrite_command_fn, +}; +static clib_error_t * +ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + u8 *s = 0; + + + if (!is_zero_ip6_address(&hm->adj)) + { + s = format(s, " REWRITE FLOW CONFIGS - \n"); + s = format(s, " Destination Address : %U\n", + format_ip6_address, &hm->adj, sizeof(ip6_address_t)); + s = format(s, " Flow operation : %d (%s)\n", hm->ioam_flag, + (hm->ioam_flag == IOAM_HBYH_ADD) ? "Add" : + ((hm->ioam_flag == IOAM_HBYH_MOD) ? "Mod" : "Pop")); + } + else + { + s = format(s, " REWRITE FLOW CONFIGS - Not configured\n"); + } + + if (hm->trace_option_elts) + { + s = format(s, " HOP BY HOP OPTIONS - TRACE CONFIG - \n"); + s = format(s, " Trace Type : 0x%x (%d)\n", + hm->trace_type, hm->trace_type); + s = format(s, " Trace timestamp precision : %d (%s)\n", hm->trace_tsp, + (hm->trace_tsp == TSP_SECONDS) ? "Seconds" : + ((hm->trace_tsp == TSP_MILLISECONDS) ? "Milliseconds" : + (((hm->trace_tsp == TSP_MICROSECONDS) ? "Microseconds" : "Nanoseconds")))); + s = format(s, " Num of trace nodes : %d\n", + hm->trace_option_elts); + s = format(s, " Node-id : 0x%x (%d)\n", + hm->node_id, hm->node_id); + s = format(s, " App Data : 0x%x (%d)\n", + hm->app_data, hm->app_data); + } + else + { + s = format(s, " HOP BY HOP OPTIONS - TRACE CONFIG - Not configured\n"); + } + + s = format(s, " POT OPTION - %d (%s)\n", + hm->has_pot_option, (hm->has_pot_option?"Enabled":"Disabled")); + if (hm->has_pot_option) + s = format(s, "Try 'show ioam pot and show pot profile' for more information\n"); + + s = format(s, " EDGE TO EDGE - PPC OPTION - %d (%s)\n", + hm->has_ppc_option, ppc_state[hm->has_ppc_option]); + if (hm->has_ppc_option) + s = format(s, "Try 'show ioam ppc' for more information\n"); + + vlib_cli_output(vm, "%v", s); + vec_free(s); return 0; } -VLIB_CLI_COMMAND (ip6_ioam_set_rewrite_cmd, static) = { - .path = "ioam set rewrite", - .short_help = "ioam set rewrite [trace-elts ] [pow]", - .function = ip6_ioam_set_rewrite_command_fn, +VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = { + .path = "show ioam summary", + .short_help = "Summary of IOAM configuration", + .function = ip6_show_ioam_summary_cmd_fn, }; - + int ip6_ioam_set_destination (ip6_address_t *addr, u32 mask_width, u32 vrf_id, int is_add, int is_pop, int is_none) { ip6_main_t * im = &ip6_main; + ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main; ip_lookup_main_t * lm = &im->lookup_main; ip_adjacency_t * adj; u32 fib_index; @@ -1067,16 +1079,19 @@ int ip6_ioam_set_destination (ip6_address_t *addr, u32 mask_width, u32 vrf_id, adj->saved_lookup_next_index = adj->lookup_next_index; if (is_add) - adj->lookup_next_index = IP_LOOKUP_NEXT_ADD_HOP_BY_HOP; + adj->lookup_next_index = IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP; if (is_pop) - adj->lookup_next_index = IP_LOOKUP_NEXT_POP_HOP_BY_HOP; + adj->lookup_next_index = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP; + hm->adj = *addr; + hm->ioam_flag = (is_add ? IOAM_HBYH_ADD : + (is_pop ? IOAM_HBYH_POP : IOAM_HBYH_MOD)); return 0; } static clib_error_t * -ip6_ioam_set_destination_command_fn (vlib_main_t * vm, +ip6_set_ioam_destination_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { @@ -1124,16 +1139,16 @@ ip6_ioam_set_destination_command_fn (vlib_main_t * vm, return 0; } -VLIB_CLI_COMMAND (ip6_ioam_set_destination_cmd, static) = { - .path = "ioam set destination", - .short_help = "ioam set destination / add | pop | none", - .function = ip6_ioam_set_destination_command_fn, +VLIB_CLI_COMMAND (ip6_set_ioam_destination_cmd, static) = { + .path = "set ioam destination", + .short_help = "set ioam destination / add | pop | none", + .function = ip6_set_ioam_destination_command_fn, }; - + + void vnet_register_ioam_end_of_path_callback (void *cb) { - ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main; + ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main; hm->ioam_end_of_path_cb = cb; } -