2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
15 #include <vlib/vlib.h>
16 #include <vnet/vnet.h>
17 #include <vnet/pg/pg.h>
18 #include <vppinfra/error.h>
20 #include <vnet/ip/ip.h>
22 #include <vppinfra/hash.h>
23 #include <vppinfra/error.h>
24 #include <vppinfra/elog.h>
26 #include <vnet/ip/ip6_hop_by_hop.h>
28 /* Timestamp precision multipliers for seconds, milliseconds, microseconds
29 * and nanoseconds respectively.
31 static f64 trace_tsp_mul[4] = {1, 1e3, 1e6, 1e9};
33 char *ppc_state[] = {"None", "Encap", "Decap"};
35 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
38 * ip6 hop-by-hop option handling. We push pkts with h-b-h options to
39 * ip6_hop_by_hop_node_fn from ip6-lookup at a cost of ~2 clocks/pkt in
42 * We parse through the h-b-h option TLVs, specifically looking for
43 * HBH_OPTION_TYPE_IOAM_DATA_LIST. [Someone needs to get bananas from
44 * IANA, aka to actually allocate the option TLV codes.]
46 * If we find the indicated option type, and we have remaining list
47 * elements in the trace list, allocate and populate the trace list
50 * At the ingress edge: punch in the h-b-h rewrite, then visit the
51 * standard h-b-h option handler. We have to be careful in the standard
52 * h-b-h handler, to avoid looping until we run out of rewrite space.
53 * Ask me how I know that.
56 * decide on egress point "pop and count" scheme
57 * time stamp handling: usec since the top of the hour?
58 * configure the node id
59 * trace list application data support
60 * cons up analysis / steering plug-in(s)
61 * add configuration binary APIs, vpp_api_test_support, yang models and
63 * perf tune: dual loop, replace memcpy w/ N x 8-byte load/stores
68 * primary h-b-h handler trace support
69 * We work pretty hard on the problem for obvious reasons
74 u32 timestamp_msbs; /* Store the top set of bits of timestamp */
76 } ip6_hop_by_hop_trace_t;
84 fetch_trace_data_size(u8 trace_type)
86 u8 trace_data_size = 0;
88 if (trace_type == TRACE_TYPE_IF_TS_APP)
89 trace_data_size = sizeof(ioam_trace_if_ts_app_t);
90 else if(trace_type == TRACE_TYPE_IF)
91 trace_data_size = sizeof(ioam_trace_if_t);
92 else if(trace_type == TRACE_TYPE_TS)
93 trace_data_size = sizeof(ioam_trace_ts_t);
94 else if(trace_type == TRACE_TYPE_APP)
95 trace_data_size = sizeof(ioam_trace_app_t);
96 else if(trace_type == TRACE_TYPE_TS_APP)
97 trace_data_size = sizeof(ioam_trace_ts_app_t);
99 return trace_data_size;
102 static u8 * format_ioam_data_list_element (u8 * s, va_list * args)
104 u32 *elt = va_arg (*args, u32 *);
105 u8 *trace_type_p = va_arg (*args, u8 *);
106 u8 trace_type = *trace_type_p;
109 if (trace_type & BIT_TTL_NODEID)
111 u32 ttl_node_id_host_byte_order = clib_net_to_host_u32 (*elt);
112 s = format (s, "ttl 0x%x node id 0x%x ",
113 ttl_node_id_host_byte_order>>24,
114 ttl_node_id_host_byte_order & 0x00FFFFFF);
119 if (trace_type & BIT_ING_INTERFACE && trace_type & BIT_ING_INTERFACE)
121 u32 ingress_host_byte_order = clib_net_to_host_u32(*elt);
122 s = format (s, "ingress 0x%x egress 0x%x ",
123 ingress_host_byte_order >> 16,
124 ingress_host_byte_order &0xFFFF);
128 if (trace_type & BIT_TIMESTAMP)
130 u32 ts_in_host_byte_order = clib_net_to_host_u32 (*elt);
131 s = format (s, "ts 0x%x \n", ts_in_host_byte_order);
135 if (trace_type & BIT_APPDATA)
137 u32 appdata_in_host_byte_order = clib_net_to_host_u32 (*elt);
138 s = format (s, "app 0x%x ", appdata_in_host_byte_order);
145 static u8 * format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
147 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
148 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
149 ip6_hop_by_hop_trace_t * t = va_arg (*args, ip6_hop_by_hop_trace_t *);
150 ip6_hop_by_hop_header_t *hbh0;
151 ip6_hop_by_hop_option_t *opt0, *limit0;
152 ioam_trace_option_t * trace0;
153 u8 trace_data_size_in_words = 0;
158 hbh0 = (ip6_hop_by_hop_header_t *)t->option_data;
160 s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d\n",
161 t->next_index, (hbh0->length+1)<<3, t->trace_len);
163 opt0 = (ip6_hop_by_hop_option_t *) (hbh0+1);
164 limit0 = (ip6_hop_by_hop_option_t *) ((u8 *)hbh0) + t->trace_len;
166 while (opt0 < limit0)
168 type0 = opt0->type & HBH_OPTION_TYPE_MASK;
172 case HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST:
173 trace0 = (ioam_trace_option_t *)opt0;
174 s = format (s, " Trace Type 0x%x , %d elts left ts msb(s) 0x%x\n",
175 trace0->ioam_trace_type, trace0->data_list_elts_left,
177 trace_data_size_in_words =
178 fetch_trace_data_size(trace0->ioam_trace_type)/4;
179 elt0 = &trace0->elts[0];
181 ((u8 *)(&trace0->elts[0]) + trace0->hdr.length - 2
182 /* -2 accounts for ioam_trace_type,elts_left */))
184 s = format (s, " [%d] %U\n",elt_index,
185 format_ioam_data_list_element,
186 elt0, &trace0->ioam_trace_type);
188 elt0 += trace_data_size_in_words;
191 opt0 = (ip6_hop_by_hop_option_t *)
192 (((u8 *)opt0) + opt0->length
193 + sizeof (ip6_hop_by_hop_option_t));
196 case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK:
197 s = format (s, " POW opt present\n");
198 opt0 = (ip6_hop_by_hop_option_t *)
199 (((u8 *)opt0) + sizeof (ioam_pow_option_t));
202 case 0: /* Pad, just stop */
203 opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
207 s = format (s, "Unknown %d", type0);
208 opt0 = (ip6_hop_by_hop_option_t *)
209 (((u8 *)opt0) + opt0->length
210 + sizeof (ip6_hop_by_hop_option_t));
217 vlib_node_registration_t ip6_hop_by_hop_node;
219 #define foreach_ip6_hop_by_hop_error \
220 _(PROCESSED, "Pkts with ip6 hop-by-hop options") \
221 _(UNKNOWN_OPTION, "Unknown ip6 hop-by-hop options")
224 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
225 foreach_ip6_hop_by_hop_error
227 IP6_HOP_BY_HOP_N_ERROR,
228 } ip6_hop_by_hop_error_t;
230 static char * ip6_hop_by_hop_error_strings[] = {
231 #define _(sym,string) string,
232 foreach_ip6_hop_by_hop_error
237 ip6_hop_by_hop_node_fn (vlib_main_t * vm,
238 vlib_node_runtime_t * node,
239 vlib_frame_t * frame)
241 ip6_main_t * im = &ip6_main;
242 ip_lookup_main_t * lm = &im->lookup_main;
243 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
244 u32 n_left_from, * from, * to_next;
245 ip_lookup_next_t next_index;
246 u32 processed = 0, unknown_opts = 0;
251 from = vlib_frame_vector_args (frame);
252 n_left_from = frame->n_vectors;
253 next_index = node->cached_next_index;
255 while (n_left_from > 0)
259 vlib_get_next_frame (vm, node, next_index,
260 to_next, n_left_to_next);
262 #if 0 /* $$$ DUAL-LOOP ME */
263 while (n_left_from >= 4 && n_left_to_next >= 2)
265 u32 next0 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
266 u32 next1 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
267 u32 sw_if_index0, sw_if_index1;
269 ethernet_header_t *en0, *en1;
271 vlib_buffer_t * b0, * b1;
273 /* Prefetch next iteration. */
275 vlib_buffer_t * p2, * p3;
277 p2 = vlib_get_buffer (vm, from[2]);
278 p3 = vlib_get_buffer (vm, from[3]);
280 vlib_prefetch_buffer_header (p2, LOAD);
281 vlib_prefetch_buffer_header (p3, LOAD);
283 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
284 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
287 /* speculatively enqueue b0 and b1 to the current next frame */
288 to_next[0] = bi0 = from[0];
289 to_next[1] = bi1 = from[1];
295 b0 = vlib_get_buffer (vm, bi0);
296 b1 = vlib_get_buffer (vm, bi1);
298 /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
299 ASSERT (b0->current_data == 0);
300 ASSERT (b1->current_data == 0);
302 ip0 = vlib_buffer_get_current (b0);
303 ip1 = vlib_buffer_get_current (b0);
305 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
306 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
308 /* $$$$$ End of processing 2 x packets $$$$$ */
310 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
312 if (b0->flags & VLIB_BUFFER_IS_TRACED)
314 ip6_hop_by_hop_trace_t *t =
315 vlib_add_trace (vm, node, b0, sizeof (*t));
316 t->sw_if_index = sw_if_index0;
317 t->next_index = next0;
319 if (b1->flags & VLIB_BUFFER_IS_TRACED)
321 ip6_hop_by_hop_trace_t *t =
322 vlib_add_trace (vm, node, b1, sizeof (*t));
323 t->sw_if_index = sw_if_index1;
324 t->next_index = next1;
328 /* verify speculative enqueues, maybe switch current next frame */
329 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
330 to_next, n_left_to_next,
331 bi0, bi1, next0, next1);
335 while (n_left_from > 0 && n_left_to_next > 0)
342 ip_adjacency_t * adj0;
343 ip6_hop_by_hop_header_t *hbh0;
344 ip6_hop_by_hop_option_t *opt0, *limit0;
345 ioam_trace_option_t * trace0;
349 /* speculatively enqueue b0 to the current next frame */
357 b0 = vlib_get_buffer (vm, bi0);
359 ip0 = vlib_buffer_get_current (b0);
360 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
361 adj0 = ip_get_adjacency (lm, adj_index0);
362 hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
363 opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
364 limit0 = (ip6_hop_by_hop_option_t *)
365 ((u8 *)hbh0 + ((hbh0->length+1)<<3));
367 /* Scan the set of h-b-h options, process ones that we understand */
368 while (opt0 < limit0)
370 type0 = opt0->type & HBH_OPTION_TYPE_MASK;
373 case HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST:
374 trace0 = (ioam_trace_option_t *)opt0;
375 if (PREDICT_TRUE (trace0->data_list_elts_left))
377 trace0->data_list_elts_left--;
378 /* fetch_trace_data_size returns in bytes. Convert it to 4-bytes
379 * to skip to this node's location.
381 elt_index = trace0->data_list_elts_left *
382 fetch_trace_data_size(trace0->ioam_trace_type)/4;
383 elt0 = &trace0->elts[elt_index];
384 if (trace0->ioam_trace_type & BIT_TTL_NODEID)
387 clib_host_to_net_u32 ((ip0->hop_limit<<24)
392 if (trace0->ioam_trace_type & BIT_ING_INTERFACE)
395 (vnet_buffer(b0)->sw_if_index[VLIB_RX]&0xFFFF) << 16 | (adj0->rewrite_header.sw_if_index & 0xFFFF);
396 *elt0 = clib_host_to_net_u32(*elt0);
400 if (trace0->ioam_trace_type & BIT_TIMESTAMP)
402 /* Send least significant 32 bits */
403 f64 time_f64 = (f64)(((f64)hm->unix_time_0) +
404 (vlib_time_now(hm->vlib_main) - hm->vlib_time_0));
407 time_f64 * trace_tsp_mul[hm->trace_tsp];
408 *elt0 = clib_host_to_net_u32(time_u64.as_u32[0]);
412 if (trace0->ioam_trace_type & BIT_APPDATA)
414 /* $$$ set elt0->app_data */
415 *elt0 = clib_host_to_net_u32(hm->app_data);
420 opt0 = (ip6_hop_by_hop_option_t *)
421 (((u8 *)opt0) + opt0->length
422 + sizeof (ip6_hop_by_hop_option_t));
425 case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK:
426 opt0 = (ip6_hop_by_hop_option_t *)
427 (((u8 *)opt0) + sizeof (ioam_pow_option_t));
431 opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
435 opt0 = (ip6_hop_by_hop_option_t *)
436 (((u8 *)opt0) + opt0->length
437 + sizeof (ip6_hop_by_hop_option_t));
446 * Since we push pkts here from the h-b-h header imposition code
447 * we have to be careful what we wish for...
449 next0 = adj0->lookup_next_index != IP_LOOKUP_NEXT_ADD_HOP_BY_HOP ?
450 adj0->lookup_next_index : adj0->saved_lookup_next_index;
452 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
453 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
455 ip6_hop_by_hop_trace_t *t =
456 vlib_add_trace (vm, node, b0, sizeof (*t));
457 u32 trace_len = (hbh0->length+1)<<3;
458 t->next_index = next0;
459 /* Capture the h-b-h option verbatim */
460 trace_len = trace_len < ARRAY_LEN(t->option_data) ?
461 trace_len : ARRAY_LEN(t->option_data);
462 t->trace_len = trace_len;
463 t->timestamp_msbs = time_u64.as_u32[1];
464 memcpy (t->option_data, hbh0, trace_len);
469 /* verify speculative enqueue, maybe switch current next frame */
470 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
471 to_next, n_left_to_next,
475 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
478 if (PREDICT_FALSE(unknown_opts > 0)) {
479 vlib_node_increment_counter (vm, ip6_hop_by_hop_node.index,
480 IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION, unknown_opts);
483 vlib_node_increment_counter (vm, ip6_hop_by_hop_node.index,
484 IP6_HOP_BY_HOP_ERROR_PROCESSED, processed);
485 return frame->n_vectors;
488 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = {
489 .function = ip6_hop_by_hop_node_fn,
490 .name = "ip6-hop-by-hop",
491 .vector_size = sizeof (u32),
492 .format_trace = format_ip6_hop_by_hop_trace,
493 .type = VLIB_NODE_TYPE_INTERNAL,
495 .n_errors = ARRAY_LEN(ip6_hop_by_hop_error_strings),
496 .error_strings = ip6_hop_by_hop_error_strings,
498 /* See ip/lookup.h */
499 .n_next_nodes = IP_LOOKUP_N_NEXT,
500 .next_nodes = IP6_LOOKUP_NEXT_NODES,
503 /* The main h-b-h tracer will be invoked, no need to do much here */
506 } ip6_add_hop_by_hop_trace_t;
508 /* packet trace format function */
509 static u8 * format_ip6_add_hop_by_hop_trace (u8 * s, va_list * args)
511 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
512 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
513 ip6_add_hop_by_hop_trace_t * t = va_arg (*args,
514 ip6_add_hop_by_hop_trace_t *);
516 s = format (s, "IP6_ADD_HOP_BY_HOP: next index %d",
521 vlib_node_registration_t ip6_add_hop_by_hop_node;
523 #define foreach_ip6_add_hop_by_hop_error \
524 _(PROCESSED, "Pkts w/ added ip6 hop-by-hop options")
527 #define _(sym,str) IP6_ADD_HOP_BY_HOP_ERROR_##sym,
528 foreach_ip6_add_hop_by_hop_error
530 IP6_ADD_HOP_BY_HOP_N_ERROR,
531 } ip6_add_hop_by_hop_error_t;
533 static char * ip6_add_hop_by_hop_error_strings[] = {
534 #define _(sym,string) string,
535 foreach_ip6_add_hop_by_hop_error
540 ip6_add_hop_by_hop_node_fn (vlib_main_t * vm,
541 vlib_node_runtime_t * node,
542 vlib_frame_t * frame)
544 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
545 u32 n_left_from, * from, * to_next;
546 ip_lookup_next_t next_index;
548 u8 * rewrite = hm->rewrite;
549 u32 rewrite_length = vec_len (rewrite);
551 from = vlib_frame_vector_args (frame);
552 n_left_from = frame->n_vectors;
553 next_index = node->cached_next_index;
555 while (n_left_from > 0)
559 vlib_get_next_frame (vm, node, next_index,
560 to_next, n_left_to_next);
563 while (n_left_from >= 4 && n_left_to_next >= 2)
565 u32 next0 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
566 u32 next1 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
567 u32 sw_if_index0, sw_if_index1;
569 ethernet_header_t *en0, *en1;
571 vlib_buffer_t * b0, * b1;
573 /* Prefetch next iteration. */
575 vlib_buffer_t * p2, * p3;
577 p2 = vlib_get_buffer (vm, from[2]);
578 p3 = vlib_get_buffer (vm, from[3]);
580 vlib_prefetch_buffer_header (p2, LOAD);
581 vlib_prefetch_buffer_header (p3, LOAD);
583 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
584 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
587 /* speculatively enqueue b0 and b1 to the current next frame */
588 to_next[0] = bi0 = from[0];
589 to_next[1] = bi1 = from[1];
595 b0 = vlib_get_buffer (vm, bi0);
596 b1 = vlib_get_buffer (vm, bi1);
598 /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
599 ASSERT (b0->current_data == 0);
600 ASSERT (b1->current_data == 0);
602 ip0 = vlib_buffer_get_current (b0);
603 ip1 = vlib_buffer_get_current (b0);
605 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
606 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
608 /* $$$$$ End of processing 2 x packets $$$$$ */
610 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
612 if (b0->flags & VLIB_BUFFER_IS_TRACED)
614 ip6_add_hop_by_hop_trace_t *t =
615 vlib_add_trace (vm, node, b0, sizeof (*t));
616 t->sw_if_index = sw_if_index0;
617 t->next_index = next0;
619 if (b1->flags & VLIB_BUFFER_IS_TRACED)
621 ip6_add_hop_by_hop_trace_t *t =
622 vlib_add_trace (vm, node, b1, sizeof (*t));
623 t->sw_if_index = sw_if_index1;
624 t->next_index = next1;
628 /* verify speculative enqueues, maybe switch current next frame */
629 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
630 to_next, n_left_to_next,
631 bi0, bi1, next0, next1);
635 while (n_left_from > 0 && n_left_to_next > 0)
641 ip6_hop_by_hop_header_t * hbh0;
642 u64 * copy_src0, * copy_dst0;
645 /* speculatively enqueue b0 to the current next frame */
653 b0 = vlib_get_buffer (vm, bi0);
655 ip0 = vlib_buffer_get_current (b0);
657 /* Copy the ip header left by the required amount */
658 copy_dst0 = (u64 *)(((u8 *)ip0) - rewrite_length);
659 copy_src0 = (u64 *) ip0;
661 copy_dst0 [0] = copy_src0 [0];
662 copy_dst0 [1] = copy_src0 [1];
663 copy_dst0 [2] = copy_src0 [2];
664 copy_dst0 [3] = copy_src0 [3];
665 copy_dst0 [4] = copy_src0 [4];
666 vlib_buffer_advance (b0, - (word)rewrite_length);
667 ip0 = vlib_buffer_get_current (b0);
669 hbh0 = (ip6_hop_by_hop_header_t *)(ip0 + 1);
670 /* $$$ tune, rewrite_length is a multiple of 8 */
671 memcpy (hbh0, rewrite, rewrite_length);
672 /* Patch the protocol chain, insert the h-b-h (type 0) header */
673 hbh0->protocol = ip0->protocol;
675 new_l0 = clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
676 ip0->payload_length = clib_host_to_net_u16 (new_l0);
678 /* Populate the (first) h-b-h list elt */
679 next0 = IP_LOOKUP_NEXT_HOP_BY_HOP;
681 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
682 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
684 ip6_add_hop_by_hop_trace_t *t =
685 vlib_add_trace (vm, node, b0, sizeof (*t));
686 t->next_index = next0;
691 /* verify speculative enqueue, maybe switch current next frame */
692 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
693 to_next, n_left_to_next,
697 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
700 vlib_node_increment_counter (vm, ip6_add_hop_by_hop_node.index,
701 IP6_ADD_HOP_BY_HOP_ERROR_PROCESSED, processed);
702 return frame->n_vectors;
705 VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = {
706 .function = ip6_add_hop_by_hop_node_fn,
707 .name = "ip6-add-hop-by-hop",
708 .vector_size = sizeof (u32),
709 .format_trace = format_ip6_add_hop_by_hop_trace,
710 .type = VLIB_NODE_TYPE_INTERNAL,
712 .n_errors = ARRAY_LEN(ip6_add_hop_by_hop_error_strings),
713 .error_strings = ip6_add_hop_by_hop_error_strings,
715 /* See ip/lookup.h */
716 .n_next_nodes = IP_LOOKUP_N_NEXT,
717 .next_nodes = IP6_LOOKUP_NEXT_NODES,
721 /* The main h-b-h tracer was already invoked, no need to do much here */
724 } ip6_pop_hop_by_hop_trace_t;
726 /* packet trace format function */
727 static u8 * format_ip6_pop_hop_by_hop_trace (u8 * s, va_list * args)
729 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
730 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
731 ip6_pop_hop_by_hop_trace_t * t = va_arg (*args, ip6_pop_hop_by_hop_trace_t *);
733 s = format (s, "IP6_POP_HOP_BY_HOP: next index %d",
738 vlib_node_registration_t ip6_pop_hop_by_hop_node;
740 #define foreach_ip6_pop_hop_by_hop_error \
741 _(PROCESSED, "Pkts w/ removed ip6 hop-by-hop options") \
742 _(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options")
745 #define _(sym,str) IP6_POP_HOP_BY_HOP_ERROR_##sym,
746 foreach_ip6_pop_hop_by_hop_error
748 IP6_POP_HOP_BY_HOP_N_ERROR,
749 } ip6_pop_hop_by_hop_error_t;
751 static char * ip6_pop_hop_by_hop_error_strings[] = {
752 #define _(sym,string) string,
753 foreach_ip6_pop_hop_by_hop_error
758 ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm,
759 vlib_node_runtime_t * node,
760 vlib_frame_t * frame)
762 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
763 ip6_main_t * im = &ip6_main;
764 ip_lookup_main_t * lm = &im->lookup_main;
765 u32 n_left_from, * from, * to_next;
766 ip_lookup_next_t next_index;
769 u32 (*ioam_end_of_path_cb) (vlib_main_t *, vlib_node_runtime_t *,
770 vlib_buffer_t *, ip6_header_t *,
773 ioam_end_of_path_cb = hm->ioam_end_of_path_cb;
775 from = vlib_frame_vector_args (frame);
776 n_left_from = frame->n_vectors;
777 next_index = node->cached_next_index;
779 while (n_left_from > 0)
783 vlib_get_next_frame (vm, node, next_index,
784 to_next, n_left_to_next);
787 while (n_left_from >= 4 && n_left_to_next >= 2)
789 u32 next0 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
790 u32 next1 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
791 u32 sw_if_index0, sw_if_index1;
793 ethernet_header_t *en0, *en1;
795 vlib_buffer_t * b0, * b1;
797 /* Prefetch next iteration. */
799 vlib_buffer_t * p2, * p3;
801 p2 = vlib_get_buffer (vm, from[2]);
802 p3 = vlib_get_buffer (vm, from[3]);
804 vlib_prefetch_buffer_header (p2, LOAD);
805 vlib_prefetch_buffer_header (p3, LOAD);
807 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
808 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
811 /* speculatively enqueue b0 and b1 to the current next frame */
812 to_next[0] = bi0 = from[0];
813 to_next[1] = bi1 = from[1];
819 b0 = vlib_get_buffer (vm, bi0);
820 b1 = vlib_get_buffer (vm, bi1);
822 /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
823 ASSERT (b0->current_data == 0);
824 ASSERT (b1->current_data == 0);
826 ip0 = vlib_buffer_get_current (b0);
827 ip1 = vlib_buffer_get_current (b0);
829 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
830 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
832 /* $$$$$ End of processing 2 x packets $$$$$ */
834 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
836 if (b0->flags & VLIB_BUFFER_IS_TRACED)
838 ip6_pop_hop_by_hop_trace_t *t =
839 vlib_add_trace (vm, node, b0, sizeof (*t));
840 t->sw_if_index = sw_if_index0;
841 t->next_index = next0;
843 if (b1->flags & VLIB_BUFFER_IS_TRACED)
845 ip6_pop_hop_by_hop_trace_t *t =
846 vlib_add_trace (vm, node, b1, sizeof (*t));
847 t->sw_if_index = sw_if_index1;
848 t->next_index = next1;
852 /* verify speculative enqueues, maybe switch current next frame */
853 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
854 to_next, n_left_to_next,
855 bi0, bi1, next0, next1);
859 while (n_left_from > 0 && n_left_to_next > 0)
866 ip_adjacency_t * adj0;
867 ip6_hop_by_hop_header_t *hbh0;
868 u64 * copy_dst0, * copy_src0;
871 /* speculatively enqueue b0 to the current next frame */
879 b0 = vlib_get_buffer (vm, bi0);
881 ip0 = vlib_buffer_get_current (b0);
882 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
883 adj0 = ip_get_adjacency (lm, adj_index0);
885 /* Perfectly normal to end up here w/ out h-b-h header */
886 if (PREDICT_TRUE (ip0->protocol == 0))
888 hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
890 /* Collect data from trace via callback */
891 next0 = ioam_end_of_path_cb ?
892 ioam_end_of_path_cb (vm, node, b0, ip0, adj0)
893 : adj0->saved_lookup_next_index;
896 /* Pop the trace data */
897 vlib_buffer_advance (b0, (hbh0->length+1)<<3);
898 new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
899 ((hbh0->length+1)<<3);
900 ip0->payload_length = clib_host_to_net_u16 (new_l0);
901 ip0->protocol = hbh0->protocol;
902 copy_src0 = (u64 *)ip0;
903 copy_dst0 = copy_src0 + (hbh0->length+1);
904 copy_dst0 [4] = copy_src0[4];
905 copy_dst0 [3] = copy_src0[3];
906 copy_dst0 [2] = copy_src0[2];
907 copy_dst0 [1] = copy_src0[1];
908 copy_dst0 [0] = copy_src0[0];
913 next0 = adj0->saved_lookup_next_index;
917 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
918 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
920 ip6_pop_hop_by_hop_trace_t *t =
921 vlib_add_trace (vm, node, b0, sizeof (*t));
922 t->next_index = next0;
925 /* verify speculative enqueue, maybe switch current next frame */
926 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
927 to_next, n_left_to_next,
931 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
934 vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
935 IP6_POP_HOP_BY_HOP_ERROR_PROCESSED, processed);
936 vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
937 IP6_POP_HOP_BY_HOP_ERROR_NO_HOHO, no_header);
938 return frame->n_vectors;
941 VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = {
942 .function = ip6_pop_hop_by_hop_node_fn,
943 .name = "ip6-pop-hop-by-hop",
944 .vector_size = sizeof (u32),
945 .format_trace = format_ip6_pop_hop_by_hop_trace,
946 .type = VLIB_NODE_TYPE_INTERNAL,
948 .n_errors = ARRAY_LEN(ip6_pop_hop_by_hop_error_strings),
949 .error_strings = ip6_pop_hop_by_hop_error_strings,
951 /* See ip/lookup.h */
952 .n_next_nodes = IP_LOOKUP_N_NEXT,
953 .next_nodes = IP6_LOOKUP_NEXT_NODES,
957 static clib_error_t *
958 ip6_hop_by_hop_init (vlib_main_t * vm)
960 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
963 hm->vnet_main = vnet_get_main();
964 hm->unix_time_0 = (u32) time (0); /* Store starting time */
965 hm->vlib_time_0 = vlib_time_now (vm);
966 hm->ioam_flag = IOAM_HBYH_MOD;
967 hm->trace_tsp = TSP_MICROSECONDS; /* Micro seconds */
972 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
974 int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_type, u32 trace_option_elts,
975 int has_pow_option, int has_ppc_option)
979 ip6_hop_by_hop_header_t *hbh;
980 ioam_trace_option_t * trace_option;
981 ioam_pow_option_t * pow_option;
983 u8 trace_data_size = 0;
987 if (trace_option_elts == 0 && has_pow_option == 0)
990 /* Work out how much space we need */
991 size = sizeof (ip6_hop_by_hop_header_t);
993 if (trace_option_elts)
995 size += sizeof (ip6_hop_by_hop_option_t);
997 trace_data_size = fetch_trace_data_size(trace_type);
998 if (trace_data_size == 0)
999 return VNET_API_ERROR_INVALID_VALUE;
1001 if (trace_option_elts * trace_data_size > 254)
1002 return VNET_API_ERROR_INVALID_VALUE;
1004 size += trace_option_elts * trace_data_size;
1008 size += sizeof (ip6_hop_by_hop_option_t);
1009 size += sizeof (ioam_pow_option_t);
1012 /* Round to a multiple of 8 octets */
1013 rnd_size = (size + 7) & ~7;
1015 /* allocate it, zero-fill / pad by construction */
1016 vec_validate (rewrite, rnd_size-1);
1018 hbh = (ip6_hop_by_hop_header_t *) rewrite;
1019 /* Length of header in 8 octet units, not incl first 8 octets */
1020 hbh->length = (rnd_size>>3) - 1;
1021 current = (u8 *)(hbh+1);
1023 if (trace_option_elts)
1025 trace_option = (ioam_trace_option_t *)current;
1026 trace_option->hdr.type = HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST
1027 | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
1028 trace_option->hdr.length =
1029 2 /*ioam_trace_type,data_list_elts_left */ +
1030 trace_option_elts * trace_data_size;
1031 trace_option->ioam_trace_type = trace_type & TRACE_TYPE_MASK;
1032 trace_option->data_list_elts_left = trace_option_elts;
1033 current += sizeof (ioam_trace_option_t) +
1034 trace_option_elts * trace_data_size;
1038 pow_option = (ioam_pow_option_t *)current;
1039 pow_option->hdr.type = HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK
1040 | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
1041 pow_option->hdr.length = sizeof (ioam_pow_option_t) -
1042 sizeof (ip6_hop_by_hop_option_t);
1043 current += sizeof (ioam_pow_option_t);
1051 clear_ioam_rewrite_fn(void)
1053 ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
1055 vec_free(hm->rewrite);
1060 hm->trace_option_elts = 0;
1061 hm->has_pow_option = 0;
1062 hm->has_ppc_option = 0;
1063 hm->trace_tsp = TSP_MICROSECONDS;
1068 clib_error_t * clear_ioam_rewrite_command_fn (vlib_main_t * vm,
1069 unformat_input_t * input,
1070 vlib_cli_command_t * cmd)
1072 return(clear_ioam_rewrite_fn());
1075 VLIB_CLI_COMMAND (ip6_clear_ioam_trace_cmd, static) = {
1076 .path = "clear ioam rewrite",
1077 .short_help = "clear ioam rewrite",
1078 .function = clear_ioam_rewrite_command_fn,
1082 ip6_ioam_trace_profile_set(u32 trace_option_elts, u32 trace_type, u32 node_id,
1083 u32 app_data, int has_pow_option, u32 trace_tsp,
1087 ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
1088 rv = ip6_ioam_set_rewrite (&hm->rewrite, trace_type, trace_option_elts,
1089 has_pow_option, has_ppc_option);
1094 hm->node_id = node_id;
1095 hm->app_data = app_data;
1096 hm->trace_type = trace_type;
1097 hm->trace_option_elts = trace_option_elts;
1098 hm->has_pow_option = has_pow_option;
1099 hm->has_ppc_option = has_ppc_option;
1100 hm->trace_tsp = trace_tsp;
1104 return clib_error_return_code(0, rv, 0, "ip6_ioam_set_rewrite returned %d", rv);
1111 static clib_error_t *
1112 ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
1113 unformat_input_t * input,
1114 vlib_cli_command_t * cmd)
1116 u32 trace_option_elts = 0;
1117 u32 trace_type = 0, node_id = 0;
1118 u32 app_data = 0, trace_tsp = TSP_MICROSECONDS;
1119 int has_pow_option = 0;
1120 int has_ppc_option = 0;
1121 clib_error_t * rv = 0;
1123 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1125 if (unformat (input, "trace-type 0x%x trace-elts %d "
1126 "trace-tsp %d node-id 0x%x app-data 0x%x",
1127 &trace_type, &trace_option_elts, &trace_tsp,
1128 &node_id, &app_data))
1130 else if (unformat (input, "pow"))
1132 else if (unformat (input, "ppc encap"))
1133 has_ppc_option = PPC_ENCAP;
1134 else if (unformat (input, "ppc decap"))
1135 has_ppc_option = PPC_DECAP;
1136 else if (unformat (input, "ppc none"))
1137 has_ppc_option = PPC_NONE;
1143 rv = ip6_ioam_trace_profile_set(trace_option_elts, trace_type, node_id,
1144 app_data, has_pow_option, trace_tsp, has_ppc_option);
1150 VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = {
1151 .path = "set ioam rewrite",
1152 .short_help = "set ioam rewrite trace-type <0x1f|0x3|0x9|0x11|0x19> trace-elts <nn> trace-tsp <0|1|2|3> node-id <node id in hex> app-data <app_data in hex> [pow] [ppc <encap|decap>]",
1153 .function = ip6_set_ioam_rewrite_command_fn,
1156 static clib_error_t *
1157 ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
1158 unformat_input_t * input,
1159 vlib_cli_command_t * cmd)
1161 ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
1165 if (!is_zero_ip6_address(&hm->adj))
1167 s = format(s, " REWRITE FLOW CONFIGS - \n");
1168 s = format(s, " Destination Address : %U\n",
1169 format_ip6_address, &hm->adj, sizeof(ip6_address_t));
1170 s = format(s, " Flow operation : %d (%s)\n", hm->ioam_flag,
1171 (hm->ioam_flag == IOAM_HBYH_ADD) ? "Add" :
1172 ((hm->ioam_flag == IOAM_HBYH_MOD) ? "Mod" : "Pop"));
1176 s = format(s, " REWRITE FLOW CONFIGS - Not configured\n");
1179 if (hm->trace_option_elts)
1181 s = format(s, " HOP BY HOP OPTIONS - TRACE CONFIG - \n");
1182 s = format(s, " Trace Type : 0x%x (%d)\n",
1183 hm->trace_type, hm->trace_type);
1184 s = format(s, " Trace timestamp precision : %d (%s)\n", hm->trace_tsp,
1185 (hm->trace_tsp == TSP_SECONDS) ? "Seconds" :
1186 ((hm->trace_tsp == TSP_MILLISECONDS) ? "Milliseconds" :
1187 (((hm->trace_tsp == TSP_MICROSECONDS) ? "Microseconds" : "Nanoseconds"))));
1188 s = format(s, " Num of trace nodes : %d\n",
1189 hm->trace_option_elts);
1190 s = format(s, " Node-id : 0x%x (%d)\n",
1191 hm->node_id, hm->node_id);
1192 s = format(s, " App Data : 0x%x (%d)\n",
1193 hm->app_data, hm->app_data);
1197 s = format(s, " HOP BY HOP OPTIONS - TRACE CONFIG - Not configured\n");
1200 s = format(s, " POW OPTION - %d (%s)\n",
1201 hm->has_pow_option, (hm->has_pow_option?"Enabled":"Disabled"));
1202 if (hm->has_pow_option)
1203 s = format(s, "Try 'show ioam sc-profile' for more information\n");
1205 s = format(s, " EDGE TO EDGE - PPC OPTION - %d (%s)\n",
1206 hm->has_ppc_option, ppc_state[hm->has_ppc_option]);
1207 if (hm->has_ppc_option)
1208 s = format(s, "Try 'show ioam ppc' for more information\n");
1210 vlib_cli_output(vm, "%v", s);
1215 VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = {
1216 .path = "show ioam summary",
1217 .short_help = "Summary of IOAM configuration",
1218 .function = ip6_show_ioam_summary_cmd_fn,
1221 int ip6_ioam_set_destination (ip6_address_t *addr, u32 mask_width, u32 vrf_id,
1222 int is_add, int is_pop, int is_none)
1224 ip6_main_t * im = &ip6_main;
1225 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
1226 ip_lookup_main_t * lm = &im->lookup_main;
1227 ip_adjacency_t * adj;
1232 BVT(clib_bihash_kv) kv, value;
1234 if ((is_add + is_pop + is_none) != 1)
1235 return VNET_API_ERROR_INVALID_VALUE_2;
1237 /* Go find the adjacency we're supposed to tickle */
1238 p = hash_get (im->fib_index_by_table_id, vrf_id);
1241 return VNET_API_ERROR_NO_SUCH_FIB;
1245 len = vec_len (im->prefix_lengths_in_search_order);
1247 for (i = 0; i < len; i++)
1249 int dst_address_length = im->prefix_lengths_in_search_order[i];
1250 ip6_address_t * mask = &im->fib_masks[dst_address_length];
1252 if (dst_address_length != mask_width)
1255 kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
1256 kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
1257 kv.key[2] = ((u64)((fib_index))<<32) | dst_address_length;
1259 rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value);
1264 return VNET_API_ERROR_NO_SUCH_ENTRY;
1268 /* Got it, modify as directed... */
1269 adj_index = value.value;
1270 adj = ip_get_adjacency (lm, adj_index);
1272 /* Restore original lookup-next action */
1273 if (adj->saved_lookup_next_index)
1275 adj->lookup_next_index = adj->saved_lookup_next_index;
1276 adj->saved_lookup_next_index = 0;
1279 /* Save current action */
1280 if (is_add || is_pop)
1281 adj->saved_lookup_next_index = adj->lookup_next_index;
1284 adj->lookup_next_index = IP_LOOKUP_NEXT_ADD_HOP_BY_HOP;
1287 adj->lookup_next_index = IP_LOOKUP_NEXT_POP_HOP_BY_HOP;
1290 hm->ioam_flag = (is_add ? IOAM_HBYH_ADD :
1291 (is_pop ? IOAM_HBYH_POP : IOAM_HBYH_MOD));
1295 static clib_error_t *
1296 ip6_set_ioam_destination_command_fn (vlib_main_t * vm,
1297 unformat_input_t * input,
1298 vlib_cli_command_t * cmd)
1301 u32 mask_width = ~0;
1308 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1310 if (unformat (input, "%U/%d",
1311 unformat_ip6_address, &addr, &mask_width))
1313 else if (unformat (input, "vrf-id %d", &vrf_id))
1315 else if (unformat (input, "add"))
1317 else if (unformat (input, "pop"))
1319 else if (unformat (input, "none"))
1325 if ((is_add + is_pop + is_none) != 1)
1326 return clib_error_return (0, "One of (add, pop, none) required");
1327 if (mask_width == ~0)
1328 return clib_error_return (0, "<address>/<mask-width> required");
1330 rv = ip6_ioam_set_destination (&addr, mask_width, vrf_id,
1331 is_add, is_pop, is_none);
1338 return clib_error_return (0, "ip6_ioam_set_destination returned %d", rv);
1344 VLIB_CLI_COMMAND (ip6_set_ioam_destination_cmd, static) = {
1345 .path = "set ioam destination",
1346 .short_help = "set ioam destination <ip6-address>/<width> add | pop | none",
1347 .function = ip6_set_ioam_destination_command_fn,
1350 void vnet_register_ioam_end_of_path_callback (void *cb)
1352 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
1354 hm->ioam_end_of_path_cb = cb;