2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
15 #include <vlib/vlib.h>
16 #include <vnet/vnet.h>
17 #include <vnet/pg/pg.h>
18 #include <vppinfra/error.h>
20 #include <vnet/ip/ip.h>
22 #include <vppinfra/hash.h>
23 #include <vppinfra/error.h>
24 #include <vppinfra/elog.h>
26 #include <vnet/ip/ip6_hop_by_hop.h>
28 /* Timestamp precision multipliers for seconds, milliseconds, microseconds
29 * and nanoseconds respectively.
31 static f64 trace_tsp_mul[4] = {1, 1e3, 1e6, 1e9};
33 char *ppc_state[] = {"None", "Encap", "Decap"};
35 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
39 #define foreach_ip6_hbyh_input_next \
40 _(IP6_REWRITE, "ip6-rewrite") \
41 _(IP6_LOOKUP, "ip6-lookup") \
42 _(IP6_HBYH, "ip6-hop-by-hop")\
43 _(IP6_POP_HBYH, "ip6-pop-hop-by-hop")\
47 #define _(s,n) IP6_HBYH_INPUT_NEXT_##s,
48 foreach_ip6_hbyh_input_next
50 IP6_HBYH_INPUT_N_NEXT,
51 } ip6_hbyh_input_next_t;
54 * ip6 hop-by-hop option handling. We push pkts with h-b-h options to
55 * ip6_hop_by_hop_node_fn from ip6-lookup at a cost of ~2 clocks/pkt in
58 * We parse through the h-b-h option TLVs, specifically looking for
59 * HBH_OPTION_TYPE_IOAM_DATA_LIST. [Someone needs to get bananas from
60 * IANA, aka to actually allocate the option TLV codes.]
62 * If we find the indicated option type, and we have remaining list
63 * elements in the trace list, allocate and populate the trace list
66 * At the ingress edge: punch in the h-b-h rewrite, then visit the
67 * standard h-b-h option handler. We have to be careful in the standard
68 * h-b-h handler, to avoid looping until we run out of rewrite space.
69 * Ask me how I know that.
72 * decide on egress point "pop and count" scheme
73 * time stamp handling: usec since the top of the hour?
74 * configure the node id
75 * trace list application data support
76 * cons up analysis / steering plug-in(s)
77 * add configuration binary APIs, vpp_api_test_support, yang models and
79 * perf tune: dual loop, replace memcpy w/ N x 8-byte load/stores
84 * primary h-b-h handler trace support
85 * We work pretty hard on the problem for obvious reasons
90 u32 timestamp_msbs; /* Store the top set of bits of timestamp */
92 } ip6_hop_by_hop_trace_t;
100 fetch_trace_data_size(u8 trace_type)
102 u8 trace_data_size = 0;
104 if (trace_type == TRACE_TYPE_IF_TS_APP)
105 trace_data_size = sizeof(ioam_trace_if_ts_app_t);
106 else if(trace_type == TRACE_TYPE_IF)
107 trace_data_size = sizeof(ioam_trace_if_t);
108 else if(trace_type == TRACE_TYPE_TS)
109 trace_data_size = sizeof(ioam_trace_ts_t);
110 else if(trace_type == TRACE_TYPE_APP)
111 trace_data_size = sizeof(ioam_trace_app_t);
112 else if(trace_type == TRACE_TYPE_TS_APP)
113 trace_data_size = sizeof(ioam_trace_ts_app_t);
115 return trace_data_size;
118 static u8 * format_ioam_data_list_element (u8 * s, va_list * args)
120 u32 *elt = va_arg (*args, u32 *);
121 u8 *trace_type_p = va_arg (*args, u8 *);
122 u8 trace_type = *trace_type_p;
125 if (trace_type & BIT_TTL_NODEID)
127 u32 ttl_node_id_host_byte_order = clib_net_to_host_u32 (*elt);
128 s = format (s, "ttl 0x%x node id 0x%x ",
129 ttl_node_id_host_byte_order>>24,
130 ttl_node_id_host_byte_order & 0x00FFFFFF);
135 if (trace_type & BIT_ING_INTERFACE && trace_type & BIT_ING_INTERFACE)
137 u32 ingress_host_byte_order = clib_net_to_host_u32(*elt);
138 s = format (s, "ingress 0x%x egress 0x%x ",
139 ingress_host_byte_order >> 16,
140 ingress_host_byte_order &0xFFFF);
144 if (trace_type & BIT_TIMESTAMP)
146 u32 ts_in_host_byte_order = clib_net_to_host_u32 (*elt);
147 s = format (s, "ts 0x%x \n", ts_in_host_byte_order);
151 if (trace_type & BIT_APPDATA)
153 u32 appdata_in_host_byte_order = clib_net_to_host_u32 (*elt);
154 s = format (s, "app 0x%x ", appdata_in_host_byte_order);
161 static u8 * format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
163 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
164 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
165 ip6_hop_by_hop_trace_t * t = va_arg (*args, ip6_hop_by_hop_trace_t *);
166 ip6_hop_by_hop_header_t *hbh0;
167 ip6_hop_by_hop_option_t *opt0, *limit0;
168 ioam_trace_option_t * trace0;
169 u8 trace_data_size_in_words = 0;
174 hbh0 = (ip6_hop_by_hop_header_t *)t->option_data;
176 s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d\n",
177 t->next_index, (hbh0->length+1)<<3, t->trace_len);
179 opt0 = (ip6_hop_by_hop_option_t *) (hbh0+1);
180 limit0 = (ip6_hop_by_hop_option_t *) ((u8 *)hbh0) + t->trace_len;
182 while (opt0 < limit0)
184 type0 = opt0->type & HBH_OPTION_TYPE_MASK;
188 case HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST:
189 trace0 = (ioam_trace_option_t *)opt0;
190 s = format (s, " Trace Type 0x%x , %d elts left ts msb(s) 0x%x\n",
191 trace0->ioam_trace_type, trace0->data_list_elts_left,
193 trace_data_size_in_words =
194 fetch_trace_data_size(trace0->ioam_trace_type)/4;
195 elt0 = &trace0->elts[0];
197 ((u8 *)(&trace0->elts[0]) + trace0->hdr.length - 2
198 /* -2 accounts for ioam_trace_type,elts_left */))
200 s = format (s, " [%d] %U\n",elt_index,
201 format_ioam_data_list_element,
202 elt0, &trace0->ioam_trace_type);
204 elt0 += trace_data_size_in_words;
207 opt0 = (ip6_hop_by_hop_option_t *)
208 (((u8 *)opt0) + opt0->length
209 + sizeof (ip6_hop_by_hop_option_t));
212 case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK:
213 s = format (s, " POW opt present\n");
214 opt0 = (ip6_hop_by_hop_option_t *)
215 (((u8 *)opt0) + sizeof (ioam_pow_option_t));
218 case 0: /* Pad, just stop */
219 opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
223 s = format (s, "Unknown %d", type0);
224 opt0 = (ip6_hop_by_hop_option_t *)
225 (((u8 *)opt0) + opt0->length
226 + sizeof (ip6_hop_by_hop_option_t));
233 vlib_node_registration_t ip6_hop_by_hop_node;
235 #define foreach_ip6_hop_by_hop_error \
236 _(PROCESSED, "Pkts with ip6 hop-by-hop options") \
237 _(UNKNOWN_OPTION, "Unknown ip6 hop-by-hop options")
240 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
241 foreach_ip6_hop_by_hop_error
243 IP6_HOP_BY_HOP_N_ERROR,
244 } ip6_hop_by_hop_error_t;
246 static char * ip6_hop_by_hop_error_strings[] = {
247 #define _(sym,string) string,
248 foreach_ip6_hop_by_hop_error
253 ip6_hop_by_hop_node_fn (vlib_main_t * vm,
254 vlib_node_runtime_t * node,
255 vlib_frame_t * frame)
257 ip6_main_t * im = &ip6_main;
258 ip_lookup_main_t * lm = &im->lookup_main;
259 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
260 u32 n_left_from, * from, * to_next;
261 ip_lookup_next_t next_index;
262 u32 processed = 0, unknown_opts = 0;
267 from = vlib_frame_vector_args (frame);
268 n_left_from = frame->n_vectors;
269 next_index = node->cached_next_index;
271 while (n_left_from > 0)
275 vlib_get_next_frame (vm, node, next_index,
276 to_next, n_left_to_next);
278 #if 0 /* $$$ DUAL-LOOP ME */
279 while (n_left_from >= 4 && n_left_to_next >= 2)
281 u32 next0 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
282 u32 next1 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
283 u32 sw_if_index0, sw_if_index1;
285 ethernet_header_t *en0, *en1;
287 vlib_buffer_t * b0, * b1;
289 /* Prefetch next iteration. */
291 vlib_buffer_t * p2, * p3;
293 p2 = vlib_get_buffer (vm, from[2]);
294 p3 = vlib_get_buffer (vm, from[3]);
296 vlib_prefetch_buffer_header (p2, LOAD);
297 vlib_prefetch_buffer_header (p3, LOAD);
299 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
300 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
303 /* speculatively enqueue b0 and b1 to the current next frame */
304 to_next[0] = bi0 = from[0];
305 to_next[1] = bi1 = from[1];
311 b0 = vlib_get_buffer (vm, bi0);
312 b1 = vlib_get_buffer (vm, bi1);
314 /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
315 ASSERT (b0->current_data == 0);
316 ASSERT (b1->current_data == 0);
318 ip0 = vlib_buffer_get_current (b0);
319 ip1 = vlib_buffer_get_current (b0);
321 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
322 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
324 /* $$$$$ End of processing 2 x packets $$$$$ */
326 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
328 if (b0->flags & VLIB_BUFFER_IS_TRACED)
330 ip6_hop_by_hop_trace_t *t =
331 vlib_add_trace (vm, node, b0, sizeof (*t));
332 t->sw_if_index = sw_if_index0;
333 t->next_index = next0;
335 if (b1->flags & VLIB_BUFFER_IS_TRACED)
337 ip6_hop_by_hop_trace_t *t =
338 vlib_add_trace (vm, node, b1, sizeof (*t));
339 t->sw_if_index = sw_if_index1;
340 t->next_index = next1;
344 /* verify speculative enqueues, maybe switch current next frame */
345 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
346 to_next, n_left_to_next,
347 bi0, bi1, next0, next1);
351 while (n_left_from > 0 && n_left_to_next > 0)
358 ip_adjacency_t * adj0;
359 ip6_hop_by_hop_header_t *hbh0;
360 ip6_hop_by_hop_option_t *opt0, *limit0;
361 ioam_trace_option_t * trace0;
365 /* speculatively enqueue b0 to the current next frame */
373 b0 = vlib_get_buffer (vm, bi0);
375 ip0 = vlib_buffer_get_current (b0);
376 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
377 adj0 = ip_get_adjacency (lm, adj_index0);
378 hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
379 opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
380 limit0 = (ip6_hop_by_hop_option_t *)
381 ((u8 *)hbh0 + ((hbh0->length+1)<<3));
383 /* Scan the set of h-b-h options, process ones that we understand */
384 while (opt0 < limit0)
386 type0 = opt0->type & HBH_OPTION_TYPE_MASK;
389 case HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST:
390 trace0 = (ioam_trace_option_t *)opt0;
391 if (PREDICT_TRUE (trace0->data_list_elts_left))
393 trace0->data_list_elts_left--;
394 /* fetch_trace_data_size returns in bytes. Convert it to 4-bytes
395 * to skip to this node's location.
397 elt_index = trace0->data_list_elts_left *
398 fetch_trace_data_size(trace0->ioam_trace_type)/4;
399 elt0 = &trace0->elts[elt_index];
400 if (trace0->ioam_trace_type & BIT_TTL_NODEID)
403 clib_host_to_net_u32 ((ip0->hop_limit<<24)
408 if (trace0->ioam_trace_type & BIT_ING_INTERFACE)
411 (vnet_buffer(b0)->sw_if_index[VLIB_RX]&0xFFFF) << 16 | (adj0->rewrite_header.sw_if_index & 0xFFFF);
412 *elt0 = clib_host_to_net_u32(*elt0);
416 if (trace0->ioam_trace_type & BIT_TIMESTAMP)
418 /* Send least significant 32 bits */
419 f64 time_f64 = (f64)(((f64)hm->unix_time_0) +
420 (vlib_time_now(hm->vlib_main) - hm->vlib_time_0));
423 time_f64 * trace_tsp_mul[hm->trace_tsp];
424 *elt0 = clib_host_to_net_u32(time_u64.as_u32[0]);
428 if (trace0->ioam_trace_type & BIT_APPDATA)
430 /* $$$ set elt0->app_data */
431 *elt0 = clib_host_to_net_u32(hm->app_data);
436 opt0 = (ip6_hop_by_hop_option_t *)
437 (((u8 *)opt0) + opt0->length
438 + sizeof (ip6_hop_by_hop_option_t));
441 case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK:
442 opt0 = (ip6_hop_by_hop_option_t *)
443 (((u8 *)opt0) + sizeof (ioam_pow_option_t));
447 opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
451 opt0 = (ip6_hop_by_hop_option_t *)
452 (((u8 *)opt0) + opt0->length
453 + sizeof (ip6_hop_by_hop_option_t));
460 next0 = (vnet_buffer(b0)->l2_classify.opaque_index == OI_DECAP) ?
461 IP6_HBYH_INPUT_NEXT_IP6_POP_HBYH : IP6_HBYH_INPUT_NEXT_IP6_REWRITE;
462 vnet_buffer(b0)->l2_classify.opaque_index = ~0;
464 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
465 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
467 ip6_hop_by_hop_trace_t *t =
468 vlib_add_trace (vm, node, b0, sizeof (*t));
469 u32 trace_len = (hbh0->length+1)<<3;
470 t->next_index = next0;
471 /* Capture the h-b-h option verbatim */
472 trace_len = trace_len < ARRAY_LEN(t->option_data) ?
473 trace_len : ARRAY_LEN(t->option_data);
474 t->trace_len = trace_len;
475 clib_memcpy (t->option_data, hbh0, trace_len);
480 /* verify speculative enqueue, maybe switch current next frame */
481 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
482 to_next, n_left_to_next,
486 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
489 if (PREDICT_FALSE(unknown_opts > 0)) {
490 vlib_node_increment_counter (vm, ip6_hop_by_hop_node.index,
491 IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION, unknown_opts);
494 vlib_node_increment_counter (vm, ip6_hop_by_hop_node.index,
495 IP6_HOP_BY_HOP_ERROR_PROCESSED, processed);
496 return frame->n_vectors;
499 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = {
500 .function = ip6_hop_by_hop_node_fn,
501 .name = "ip6-hop-by-hop",
502 .vector_size = sizeof (u32),
503 .format_trace = format_ip6_hop_by_hop_trace,
504 .type = VLIB_NODE_TYPE_INTERNAL,
506 .n_errors = ARRAY_LEN(ip6_hop_by_hop_error_strings),
507 .error_strings = ip6_hop_by_hop_error_strings,
509 .n_next_nodes = IP6_HBYH_INPUT_N_NEXT,
511 #define _(s,n) [IP6_HBYH_INPUT_NEXT_##s] = n,
512 foreach_ip6_hbyh_input_next
517 /* The main h-b-h tracer will be invoked, no need to do much here */
520 } ip6_add_hop_by_hop_trace_t;
522 /* packet trace format function */
523 static u8 * format_ip6_add_hop_by_hop_trace (u8 * s, va_list * args)
525 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
526 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
527 ip6_add_hop_by_hop_trace_t * t = va_arg (*args,
528 ip6_add_hop_by_hop_trace_t *);
530 s = format (s, "IP6_ADD_HOP_BY_HOP: next index %d",
535 vlib_node_registration_t ip6_add_hop_by_hop_node;
537 #define foreach_ip6_add_hop_by_hop_error \
538 _(PROCESSED, "Pkts w/ added ip6 hop-by-hop options")
541 #define _(sym,str) IP6_ADD_HOP_BY_HOP_ERROR_##sym,
542 foreach_ip6_add_hop_by_hop_error
544 IP6_ADD_HOP_BY_HOP_N_ERROR,
545 } ip6_add_hop_by_hop_error_t;
547 static char * ip6_add_hop_by_hop_error_strings[] = {
548 #define _(sym,string) string,
549 foreach_ip6_add_hop_by_hop_error
554 ip6_add_hop_by_hop_node_fn (vlib_main_t * vm,
555 vlib_node_runtime_t * node,
556 vlib_frame_t * frame)
558 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
559 u32 n_left_from, * from, * to_next;
560 ip_lookup_next_t next_index;
562 u8 * rewrite = hm->rewrite;
563 u32 rewrite_length = vec_len (rewrite);
565 from = vlib_frame_vector_args (frame);
566 n_left_from = frame->n_vectors;
567 next_index = node->cached_next_index;
569 while (n_left_from > 0)
573 vlib_get_next_frame (vm, node, next_index,
574 to_next, n_left_to_next);
577 while (n_left_from >= 4 && n_left_to_next >= 2)
579 u32 next0 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
580 u32 next1 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
581 u32 sw_if_index0, sw_if_index1;
583 ethernet_header_t *en0, *en1;
585 vlib_buffer_t * b0, * b1;
587 /* Prefetch next iteration. */
589 vlib_buffer_t * p2, * p3;
591 p2 = vlib_get_buffer (vm, from[2]);
592 p3 = vlib_get_buffer (vm, from[3]);
594 vlib_prefetch_buffer_header (p2, LOAD);
595 vlib_prefetch_buffer_header (p3, LOAD);
597 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
598 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
601 /* speculatively enqueue b0 and b1 to the current next frame */
602 to_next[0] = bi0 = from[0];
603 to_next[1] = bi1 = from[1];
609 b0 = vlib_get_buffer (vm, bi0);
610 b1 = vlib_get_buffer (vm, bi1);
612 /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
613 ASSERT (b0->current_data == 0);
614 ASSERT (b1->current_data == 0);
616 ip0 = vlib_buffer_get_current (b0);
617 ip1 = vlib_buffer_get_current (b0);
619 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
620 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
622 /* $$$$$ End of processing 2 x packets $$$$$ */
624 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
626 if (b0->flags & VLIB_BUFFER_IS_TRACED)
628 ip6_add_hop_by_hop_trace_t *t =
629 vlib_add_trace (vm, node, b0, sizeof (*t));
630 t->sw_if_index = sw_if_index0;
631 t->next_index = next0;
633 if (b1->flags & VLIB_BUFFER_IS_TRACED)
635 ip6_add_hop_by_hop_trace_t *t =
636 vlib_add_trace (vm, node, b1, sizeof (*t));
637 t->sw_if_index = sw_if_index1;
638 t->next_index = next1;
642 /* verify speculative enqueues, maybe switch current next frame */
643 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
644 to_next, n_left_to_next,
645 bi0, bi1, next0, next1);
649 while (n_left_from > 0 && n_left_to_next > 0)
655 ip6_hop_by_hop_header_t * hbh0;
656 u64 * copy_src0, * copy_dst0;
659 /* speculatively enqueue b0 to the current next frame */
667 b0 = vlib_get_buffer (vm, bi0);
669 ip0 = vlib_buffer_get_current (b0);
671 /* Copy the ip header left by the required amount */
672 copy_dst0 = (u64 *)(((u8 *)ip0) - rewrite_length);
673 copy_src0 = (u64 *) ip0;
675 copy_dst0 [0] = copy_src0 [0];
676 copy_dst0 [1] = copy_src0 [1];
677 copy_dst0 [2] = copy_src0 [2];
678 copy_dst0 [3] = copy_src0 [3];
679 copy_dst0 [4] = copy_src0 [4];
680 vlib_buffer_advance (b0, - (word)rewrite_length);
681 ip0 = vlib_buffer_get_current (b0);
683 hbh0 = (ip6_hop_by_hop_header_t *)(ip0 + 1);
684 /* $$$ tune, rewrite_length is a multiple of 8 */
685 clib_memcpy (hbh0, rewrite, rewrite_length);
686 /* Patch the protocol chain, insert the h-b-h (type 0) header */
687 hbh0->protocol = ip0->protocol;
689 new_l0 = clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
690 ip0->payload_length = clib_host_to_net_u16 (new_l0);
692 /* Populate the (first) h-b-h list elt */
693 next0 = IP6_HBYH_INPUT_NEXT_IP6_LOOKUP;
695 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
696 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
698 ip6_add_hop_by_hop_trace_t *t =
699 vlib_add_trace (vm, node, b0, sizeof (*t));
700 t->next_index = next0;
705 /* verify speculative enqueue, maybe switch current next frame */
706 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
707 to_next, n_left_to_next,
711 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
714 vlib_node_increment_counter (vm, ip6_add_hop_by_hop_node.index,
715 IP6_ADD_HOP_BY_HOP_ERROR_PROCESSED, processed);
716 return frame->n_vectors;
719 VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = {
720 .function = ip6_add_hop_by_hop_node_fn,
721 .name = "ip6-add-hop-by-hop",
722 .vector_size = sizeof (u32),
723 .format_trace = format_ip6_add_hop_by_hop_trace,
724 .type = VLIB_NODE_TYPE_INTERNAL,
726 .n_errors = ARRAY_LEN(ip6_add_hop_by_hop_error_strings),
727 .error_strings = ip6_add_hop_by_hop_error_strings,
729 /* See ip/lookup.h */
730 .n_next_nodes = IP6_HBYH_INPUT_N_NEXT,
732 #define _(s,n) [IP6_HBYH_INPUT_NEXT_##s] = n,
733 foreach_ip6_hbyh_input_next
739 /* The main h-b-h tracer was already invoked, no need to do much here */
742 } ip6_pop_hop_by_hop_trace_t;
744 /* packet trace format function */
745 static u8 * format_ip6_pop_hop_by_hop_trace (u8 * s, va_list * args)
747 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
748 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
749 ip6_pop_hop_by_hop_trace_t * t = va_arg (*args, ip6_pop_hop_by_hop_trace_t *);
751 s = format (s, "IP6_POP_HOP_BY_HOP: next index %d",
756 vlib_node_registration_t ip6_pop_hop_by_hop_node;
758 #define foreach_ip6_pop_hop_by_hop_error \
759 _(PROCESSED, "Pkts w/ removed ip6 hop-by-hop options") \
760 _(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options")
763 #define _(sym,str) IP6_POP_HOP_BY_HOP_ERROR_##sym,
764 foreach_ip6_pop_hop_by_hop_error
766 IP6_POP_HOP_BY_HOP_N_ERROR,
767 } ip6_pop_hop_by_hop_error_t;
769 static char * ip6_pop_hop_by_hop_error_strings[] = {
770 #define _(sym,string) string,
771 foreach_ip6_pop_hop_by_hop_error
776 ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm,
777 vlib_node_runtime_t * node,
778 vlib_frame_t * frame)
780 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
781 ip6_main_t * im = &ip6_main;
782 ip_lookup_main_t * lm = &im->lookup_main;
783 u32 n_left_from, * from, * to_next;
784 ip_lookup_next_t next_index;
787 u32 (*ioam_end_of_path_cb) (vlib_main_t *, vlib_node_runtime_t *,
788 vlib_buffer_t *, ip6_header_t *,
791 ioam_end_of_path_cb = hm->ioam_end_of_path_cb;
793 from = vlib_frame_vector_args (frame);
794 n_left_from = frame->n_vectors;
795 next_index = node->cached_next_index;
797 while (n_left_from > 0)
801 vlib_get_next_frame (vm, node, next_index,
802 to_next, n_left_to_next);
805 while (n_left_from >= 4 && n_left_to_next >= 2)
807 u32 next0 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
808 u32 next1 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
809 u32 sw_if_index0, sw_if_index1;
811 ethernet_header_t *en0, *en1;
813 vlib_buffer_t * b0, * b1;
815 /* Prefetch next iteration. */
817 vlib_buffer_t * p2, * p3;
819 p2 = vlib_get_buffer (vm, from[2]);
820 p3 = vlib_get_buffer (vm, from[3]);
822 vlib_prefetch_buffer_header (p2, LOAD);
823 vlib_prefetch_buffer_header (p3, LOAD);
825 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
826 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
829 /* speculatively enqueue b0 and b1 to the current next frame */
830 to_next[0] = bi0 = from[0];
831 to_next[1] = bi1 = from[1];
837 b0 = vlib_get_buffer (vm, bi0);
838 b1 = vlib_get_buffer (vm, bi1);
840 /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
841 ASSERT (b0->current_data == 0);
842 ASSERT (b1->current_data == 0);
844 ip0 = vlib_buffer_get_current (b0);
845 ip1 = vlib_buffer_get_current (b0);
847 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
848 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
850 /* $$$$$ End of processing 2 x packets $$$$$ */
852 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
854 if (b0->flags & VLIB_BUFFER_IS_TRACED)
856 ip6_pop_hop_by_hop_trace_t *t =
857 vlib_add_trace (vm, node, b0, sizeof (*t));
858 t->sw_if_index = sw_if_index0;
859 t->next_index = next0;
861 if (b1->flags & VLIB_BUFFER_IS_TRACED)
863 ip6_pop_hop_by_hop_trace_t *t =
864 vlib_add_trace (vm, node, b1, sizeof (*t));
865 t->sw_if_index = sw_if_index1;
866 t->next_index = next1;
870 /* verify speculative enqueues, maybe switch current next frame */
871 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
872 to_next, n_left_to_next,
873 bi0, bi1, next0, next1);
877 while (n_left_from > 0 && n_left_to_next > 0)
884 ip_adjacency_t * adj0;
885 ip6_hop_by_hop_header_t *hbh0;
886 u64 * copy_dst0, * copy_src0;
889 /* speculatively enqueue b0 to the current next frame */
897 b0 = vlib_get_buffer (vm, bi0);
899 ip0 = vlib_buffer_get_current (b0);
900 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
901 adj0 = ip_get_adjacency (lm, adj_index0);
903 /* Perfectly normal to end up here w/ out h-b-h header */
904 if (PREDICT_TRUE (ip0->protocol == 0))
906 hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
908 if (vnet_buffer(b0)->l2_classify.opaque_index == OI_DECAP)
909 { /* First pass. Send to hbyh node. */
910 next0 = IP6_HBYH_INPUT_NEXT_IP6_LOOKUP;
915 /* Collect data from trace via callback */
916 next0 = ioam_end_of_path_cb ?
917 ioam_end_of_path_cb (vm, node, b0, ip0, adj0) :
918 IP6_HBYH_INPUT_NEXT_IP6_REWRITE;
921 /* Pop the trace data */
922 vlib_buffer_advance (b0, (hbh0->length+1)<<3);
923 new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
924 ((hbh0->length+1)<<3);
925 ip0->payload_length = clib_host_to_net_u16 (new_l0);
926 ip0->protocol = hbh0->protocol;
927 copy_src0 = (u64 *)ip0;
928 copy_dst0 = copy_src0 + (hbh0->length+1);
929 copy_dst0 [4] = copy_src0[4];
930 copy_dst0 [3] = copy_src0[3];
931 copy_dst0 [2] = copy_src0[2];
932 copy_dst0 [1] = copy_src0[1];
933 copy_dst0 [0] = copy_src0[0];
938 next0 = IP6_HBYH_INPUT_NEXT_IP6_LOOKUP;
942 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
943 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
945 ip6_pop_hop_by_hop_trace_t *t =
946 vlib_add_trace (vm, node, b0, sizeof (*t));
947 t->next_index = next0;
951 /* verify speculative enqueue, maybe switch current next frame */
952 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
953 to_next, n_left_to_next,
957 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
960 vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
961 IP6_POP_HOP_BY_HOP_ERROR_PROCESSED, processed);
962 vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
963 IP6_POP_HOP_BY_HOP_ERROR_NO_HOHO, no_header);
964 return frame->n_vectors;
967 VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = {
968 .function = ip6_pop_hop_by_hop_node_fn,
969 .name = "ip6-pop-hop-by-hop",
970 .vector_size = sizeof (u32),
971 .format_trace = format_ip6_pop_hop_by_hop_trace,
972 .type = VLIB_NODE_TYPE_INTERNAL,
974 .n_errors = ARRAY_LEN(ip6_pop_hop_by_hop_error_strings),
975 .error_strings = ip6_pop_hop_by_hop_error_strings,
977 /* See ip/lookup.h */
978 .n_next_nodes = IP6_HBYH_INPUT_N_NEXT,
980 #define _(s,n) [IP6_HBYH_INPUT_NEXT_##s] = n,
981 foreach_ip6_hbyh_input_next
987 static clib_error_t *
988 ip6_hop_by_hop_init (vlib_main_t * vm)
990 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
993 hm->vnet_main = vnet_get_main();
994 hm->unix_time_0 = (u32) time (0); /* Store starting time */
995 hm->vlib_time_0 = vlib_time_now (vm);
996 hm->ioam_flag = IOAM_HBYH_MOD;
997 hm->trace_tsp = TSP_MICROSECONDS; /* Micro seconds */
1002 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
1004 int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_type, u32 trace_option_elts,
1005 int has_pow_option, int has_ppc_option)
1009 ip6_hop_by_hop_header_t *hbh;
1010 ioam_trace_option_t * trace_option;
1011 ioam_pow_option_t * pow_option;
1013 u8 trace_data_size = 0;
1017 if (trace_option_elts == 0 && has_pow_option == 0)
1020 /* Work out how much space we need */
1021 size = sizeof (ip6_hop_by_hop_header_t);
1023 if (trace_option_elts)
1025 size += sizeof (ip6_hop_by_hop_option_t);
1027 trace_data_size = fetch_trace_data_size(trace_type);
1028 if (trace_data_size == 0)
1029 return VNET_API_ERROR_INVALID_VALUE;
1031 if (trace_option_elts * trace_data_size > 254)
1032 return VNET_API_ERROR_INVALID_VALUE;
1034 size += trace_option_elts * trace_data_size;
1038 size += sizeof (ip6_hop_by_hop_option_t);
1039 size += sizeof (ioam_pow_option_t);
1042 /* Round to a multiple of 8 octets */
1043 rnd_size = (size + 7) & ~7;
1045 /* allocate it, zero-fill / pad by construction */
1046 vec_validate (rewrite, rnd_size-1);
1048 hbh = (ip6_hop_by_hop_header_t *) rewrite;
1049 /* Length of header in 8 octet units, not incl first 8 octets */
1050 hbh->length = (rnd_size>>3) - 1;
1051 current = (u8 *)(hbh+1);
1053 if (trace_option_elts)
1055 trace_option = (ioam_trace_option_t *)current;
1056 trace_option->hdr.type = HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST
1057 | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
1058 trace_option->hdr.length =
1059 2 /*ioam_trace_type,data_list_elts_left */ +
1060 trace_option_elts * trace_data_size;
1061 trace_option->ioam_trace_type = trace_type & TRACE_TYPE_MASK;
1062 trace_option->data_list_elts_left = trace_option_elts;
1063 current += sizeof (ioam_trace_option_t) +
1064 trace_option_elts * trace_data_size;
1068 pow_option = (ioam_pow_option_t *)current;
1069 pow_option->hdr.type = HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK
1070 | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
1071 pow_option->hdr.length = sizeof (ioam_pow_option_t) -
1072 sizeof (ip6_hop_by_hop_option_t);
1073 current += sizeof (ioam_pow_option_t);
1081 clear_ioam_rewrite_fn(void)
1083 ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
1085 vec_free(hm->rewrite);
1090 hm->trace_option_elts = 0;
1091 hm->has_pow_option = 0;
1092 hm->has_ppc_option = 0;
1093 hm->trace_tsp = TSP_MICROSECONDS;
1098 clib_error_t * clear_ioam_rewrite_command_fn (vlib_main_t * vm,
1099 unformat_input_t * input,
1100 vlib_cli_command_t * cmd)
1102 return(clear_ioam_rewrite_fn());
1105 VLIB_CLI_COMMAND (ip6_clear_ioam_trace_cmd, static) = {
1106 .path = "clear ioam rewrite",
1107 .short_help = "clear ioam rewrite",
1108 .function = clear_ioam_rewrite_command_fn,
1112 ip6_ioam_trace_profile_set(u32 trace_option_elts, u32 trace_type, u32 node_id,
1113 u32 app_data, int has_pow_option, u32 trace_tsp,
1117 ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
1118 rv = ip6_ioam_set_rewrite (&hm->rewrite, trace_type, trace_option_elts,
1119 has_pow_option, has_ppc_option);
1124 hm->node_id = node_id;
1125 hm->app_data = app_data;
1126 hm->trace_type = trace_type;
1127 hm->trace_option_elts = trace_option_elts;
1128 hm->has_pow_option = has_pow_option;
1129 hm->has_ppc_option = has_ppc_option;
1130 hm->trace_tsp = trace_tsp;
1134 return clib_error_return_code(0, rv, 0, "ip6_ioam_set_rewrite returned %d", rv);
1141 static clib_error_t *
1142 ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
1143 unformat_input_t * input,
1144 vlib_cli_command_t * cmd)
1146 u32 trace_option_elts = 0;
1147 u32 trace_type = 0, node_id = 0;
1148 u32 app_data = 0, trace_tsp = TSP_MICROSECONDS;
1149 int has_pow_option = 0;
1150 int has_ppc_option = 0;
1151 clib_error_t * rv = 0;
1153 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1155 if (unformat (input, "trace-type 0x%x trace-elts %d "
1156 "trace-tsp %d node-id 0x%x app-data 0x%x",
1157 &trace_type, &trace_option_elts, &trace_tsp,
1158 &node_id, &app_data))
1160 else if (unformat (input, "pow"))
1162 else if (unformat (input, "ppc encap"))
1163 has_ppc_option = PPC_ENCAP;
1164 else if (unformat (input, "ppc decap"))
1165 has_ppc_option = PPC_DECAP;
1166 else if (unformat (input, "ppc none"))
1167 has_ppc_option = PPC_NONE;
1173 rv = ip6_ioam_trace_profile_set(trace_option_elts, trace_type, node_id,
1174 app_data, has_pow_option, trace_tsp, has_ppc_option);
1180 VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = {
1181 .path = "set ioam rewrite",
1182 .short_help = "set ioam rewrite trace-type <0x1f|0x3|0x9|0x11|0x19> trace-elts <nn> trace-tsp <0|1|2|3> node-id <node id in hex> app-data <app_data in hex> [pow] [ppc <encap|decap>]",
1183 .function = ip6_set_ioam_rewrite_command_fn,
1186 static clib_error_t *
1187 ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
1188 unformat_input_t * input,
1189 vlib_cli_command_t * cmd)
1191 ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
1195 if (!is_zero_ip6_address(&hm->adj))
1197 s = format(s, " REWRITE FLOW CONFIGS - \n");
1198 s = format(s, " Destination Address : %U\n",
1199 format_ip6_address, &hm->adj, sizeof(ip6_address_t));
1200 s = format(s, " Flow operation : %d (%s)\n", hm->ioam_flag,
1201 (hm->ioam_flag == IOAM_HBYH_ADD) ? "Add" :
1202 ((hm->ioam_flag == IOAM_HBYH_MOD) ? "Mod" : "Pop"));
1206 s = format(s, " REWRITE FLOW CONFIGS - Not configured\n");
1209 if (hm->trace_option_elts)
1211 s = format(s, " HOP BY HOP OPTIONS - TRACE CONFIG - \n");
1212 s = format(s, " Trace Type : 0x%x (%d)\n",
1213 hm->trace_type, hm->trace_type);
1214 s = format(s, " Trace timestamp precision : %d (%s)\n", hm->trace_tsp,
1215 (hm->trace_tsp == TSP_SECONDS) ? "Seconds" :
1216 ((hm->trace_tsp == TSP_MILLISECONDS) ? "Milliseconds" :
1217 (((hm->trace_tsp == TSP_MICROSECONDS) ? "Microseconds" : "Nanoseconds"))));
1218 s = format(s, " Num of trace nodes : %d\n",
1219 hm->trace_option_elts);
1220 s = format(s, " Node-id : 0x%x (%d)\n",
1221 hm->node_id, hm->node_id);
1222 s = format(s, " App Data : 0x%x (%d)\n",
1223 hm->app_data, hm->app_data);
1227 s = format(s, " HOP BY HOP OPTIONS - TRACE CONFIG - Not configured\n");
1230 s = format(s, " POW OPTION - %d (%s)\n",
1231 hm->has_pow_option, (hm->has_pow_option?"Enabled":"Disabled"));
1232 if (hm->has_pow_option)
1233 s = format(s, "Try 'show ioam sc-profile' for more information\n");
1235 s = format(s, " EDGE TO EDGE - PPC OPTION - %d (%s)\n",
1236 hm->has_ppc_option, ppc_state[hm->has_ppc_option]);
1237 if (hm->has_ppc_option)
1238 s = format(s, "Try 'show ioam ppc' for more information\n");
1240 vlib_cli_output(vm, "%v", s);
1245 VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = {
1246 .path = "show ioam summary",
1247 .short_help = "Summary of IOAM configuration",
1248 .function = ip6_show_ioam_summary_cmd_fn,
1251 int ip6_ioam_set_destination (ip6_address_t *addr, u32 mask_width, u32 vrf_id,
1252 int is_add, int is_pop, int is_none)
1254 ip6_main_t * im = &ip6_main;
1255 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
1256 ip_lookup_main_t * lm = &im->lookup_main;
1257 ip_adjacency_t * adj;
1262 BVT(clib_bihash_kv) kv, value;
1264 if ((is_add + is_pop + is_none) != 1)
1265 return VNET_API_ERROR_INVALID_VALUE_2;
1267 /* Go find the adjacency we're supposed to tickle */
1268 p = hash_get (im->fib_index_by_table_id, vrf_id);
1271 return VNET_API_ERROR_NO_SUCH_FIB;
1275 len = vec_len (im->prefix_lengths_in_search_order);
1277 for (i = 0; i < len; i++)
1279 int dst_address_length = im->prefix_lengths_in_search_order[i];
1280 ip6_address_t * mask = &im->fib_masks[dst_address_length];
1282 if (dst_address_length != mask_width)
1285 kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
1286 kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
1287 kv.key[2] = ((u64)((fib_index))<<32) | dst_address_length;
1289 rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value);
1294 return VNET_API_ERROR_NO_SUCH_ENTRY;
1298 /* Got it, modify as directed... */
1299 adj_index = value.value;
1300 adj = ip_get_adjacency (lm, adj_index);
1302 /* Restore original lookup-next action */
1303 if (adj->saved_lookup_next_index)
1305 adj->lookup_next_index = adj->saved_lookup_next_index;
1306 adj->saved_lookup_next_index = 0;
1309 /* Save current action */
1310 if (is_add || is_pop)
1311 adj->saved_lookup_next_index = adj->lookup_next_index;
1314 adj->lookup_next_index = IP_LOOKUP_NEXT_ADD_HOP_BY_HOP;
1317 adj->lookup_next_index = IP_LOOKUP_NEXT_POP_HOP_BY_HOP;
1320 hm->ioam_flag = (is_add ? IOAM_HBYH_ADD :
1321 (is_pop ? IOAM_HBYH_POP : IOAM_HBYH_MOD));
1325 static clib_error_t *
1326 ip6_set_ioam_destination_command_fn (vlib_main_t * vm,
1327 unformat_input_t * input,
1328 vlib_cli_command_t * cmd)
1331 u32 mask_width = ~0;
1338 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1340 if (unformat (input, "%U/%d",
1341 unformat_ip6_address, &addr, &mask_width))
1343 else if (unformat (input, "vrf-id %d", &vrf_id))
1345 else if (unformat (input, "add"))
1347 else if (unformat (input, "pop"))
1349 else if (unformat (input, "none"))
1355 if ((is_add + is_pop + is_none) != 1)
1356 return clib_error_return (0, "One of (add, pop, none) required");
1357 if (mask_width == ~0)
1358 return clib_error_return (0, "<address>/<mask-width> required");
1360 rv = ip6_ioam_set_destination (&addr, mask_width, vrf_id,
1361 is_add, is_pop, is_none);
1368 return clib_error_return (0, "ip6_ioam_set_destination returned %d", rv);
1374 VLIB_CLI_COMMAND (ip6_set_ioam_destination_cmd, static) = {
1375 .path = "set ioam destination",
1376 .short_help = "set ioam destination <ip6-address>/<width> add | pop | none",
1377 .function = ip6_set_ioam_destination_command_fn,
1380 void vnet_register_ioam_end_of_path_callback (void *cb)
1382 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
1384 hm->ioam_end_of_path_cb = cb;