2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
15 #include <vlib/vlib.h>
16 #include <vnet/vnet.h>
17 #include <vnet/pg/pg.h>
18 #include <vppinfra/error.h>
20 #include <vnet/ip/ip.h>
22 #include <vppinfra/hash.h>
23 #include <vppinfra/error.h>
24 #include <vppinfra/elog.h>
26 #include <vnet/ip/ip6_hop_by_hop.h>
28 /* Timestamp precision multipliers for seconds, milliseconds, microseconds
29 * and nanoseconds respectively.
31 static f64 trace_tsp_mul[4] = {1, 1e3, 1e6, 1e9};
33 char *ppc_state[] = {"None", "Encap", "Decap"};
35 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
39 #define foreach_ip6_hbyh_input_next \
40 _(IP6_REWRITE, "ip6-rewrite") \
41 _(IP6_LOOKUP, "ip6-lookup") \
42 _(IP6_HBYH, "ip6-hop-by-hop")\
43 _(IP6_POP_HBYH, "ip6-pop-hop-by-hop")\
47 #define _(s,n) IP6_HBYH_INPUT_NEXT_##s,
48 foreach_ip6_hbyh_input_next
50 IP6_HBYH_INPUT_N_NEXT,
51 } ip6_hbyh_input_next_t;
54 * ip6 hop-by-hop option handling. We push pkts with h-b-h options to
55 * ip6_hop_by_hop_node_fn from ip6-lookup at a cost of ~2 clocks/pkt in
58 * We parse through the h-b-h option TLVs, specifically looking for
59 * HBH_OPTION_TYPE_IOAM_DATA_LIST. [Someone needs to get bananas from
60 * IANA, aka to actually allocate the option TLV codes.]
62 * If we find the indicated option type, and we have remaining list
63 * elements in the trace list, allocate and populate the trace list
66 * At the ingress edge: punch in the h-b-h rewrite, then visit the
67 * standard h-b-h option handler. We have to be careful in the standard
68 * h-b-h handler, to avoid looping until we run out of rewrite space.
69 * Ask me how I know that.
72 * decide on egress point "pop and count" scheme
73 * time stamp handling: usec since the top of the hour?
74 * configure the node id
75 * trace list application data support
76 * cons up analysis / steering plug-in(s)
77 * add configuration binary APIs, vpp_api_test_support, yang models and
79 * perf tune: dual loop, replace memcpy w/ N x 8-byte load/stores
84 * primary h-b-h handler trace support
85 * We work pretty hard on the problem for obvious reasons
90 u32 timestamp_msbs; /* Store the top set of bits of timestamp */
92 } ip6_hop_by_hop_trace_t;
100 fetch_trace_data_size(u8 trace_type)
102 u8 trace_data_size = 0;
104 if (trace_type == TRACE_TYPE_IF_TS_APP)
105 trace_data_size = sizeof(ioam_trace_if_ts_app_t);
106 else if(trace_type == TRACE_TYPE_IF)
107 trace_data_size = sizeof(ioam_trace_if_t);
108 else if(trace_type == TRACE_TYPE_TS)
109 trace_data_size = sizeof(ioam_trace_ts_t);
110 else if(trace_type == TRACE_TYPE_APP)
111 trace_data_size = sizeof(ioam_trace_app_t);
112 else if(trace_type == TRACE_TYPE_TS_APP)
113 trace_data_size = sizeof(ioam_trace_ts_app_t);
115 return trace_data_size;
118 static u8 * format_ioam_data_list_element (u8 * s, va_list * args)
120 u32 *elt = va_arg (*args, u32 *);
121 u8 *trace_type_p = va_arg (*args, u8 *);
122 u8 trace_type = *trace_type_p;
125 if (trace_type & BIT_TTL_NODEID)
127 u32 ttl_node_id_host_byte_order = clib_net_to_host_u32 (*elt);
128 s = format (s, "ttl 0x%x node id 0x%x ",
129 ttl_node_id_host_byte_order>>24,
130 ttl_node_id_host_byte_order & 0x00FFFFFF);
135 if (trace_type & BIT_ING_INTERFACE && trace_type & BIT_ING_INTERFACE)
137 u32 ingress_host_byte_order = clib_net_to_host_u32(*elt);
138 s = format (s, "ingress 0x%x egress 0x%x ",
139 ingress_host_byte_order >> 16,
140 ingress_host_byte_order &0xFFFF);
144 if (trace_type & BIT_TIMESTAMP)
146 u32 ts_in_host_byte_order = clib_net_to_host_u32 (*elt);
147 s = format (s, "ts 0x%x \n", ts_in_host_byte_order);
151 if (trace_type & BIT_APPDATA)
153 u32 appdata_in_host_byte_order = clib_net_to_host_u32 (*elt);
154 s = format (s, "app 0x%x ", appdata_in_host_byte_order);
161 static u8 * format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
163 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
164 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
165 ip6_hop_by_hop_trace_t * t = va_arg (*args, ip6_hop_by_hop_trace_t *);
166 ip6_hop_by_hop_header_t *hbh0;
167 ip6_hop_by_hop_option_t *opt0, *limit0;
168 ioam_trace_option_t * trace0;
169 u8 trace_data_size_in_words = 0;
174 hbh0 = (ip6_hop_by_hop_header_t *)t->option_data;
176 s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d\n",
177 t->next_index, (hbh0->length+1)<<3, t->trace_len);
179 opt0 = (ip6_hop_by_hop_option_t *) (hbh0+1);
180 limit0 = (ip6_hop_by_hop_option_t *) ((u8 *)hbh0) + t->trace_len;
182 while (opt0 < limit0)
184 type0 = opt0->type & HBH_OPTION_TYPE_MASK;
188 case HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST:
189 trace0 = (ioam_trace_option_t *)opt0;
190 s = format (s, " Trace Type 0x%x , %d elts left ts msb(s) 0x%x\n",
191 trace0->ioam_trace_type, trace0->data_list_elts_left,
193 trace_data_size_in_words =
194 fetch_trace_data_size(trace0->ioam_trace_type)/4;
195 elt0 = &trace0->elts[0];
197 ((u8 *)(&trace0->elts[0]) + trace0->hdr.length - 2
198 /* -2 accounts for ioam_trace_type,elts_left */))
200 s = format (s, " [%d] %U\n",elt_index,
201 format_ioam_data_list_element,
202 elt0, &trace0->ioam_trace_type);
204 elt0 += trace_data_size_in_words;
207 opt0 = (ip6_hop_by_hop_option_t *)
208 (((u8 *)opt0) + opt0->length
209 + sizeof (ip6_hop_by_hop_option_t));
212 case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK:
213 s = format (s, " POW opt present\n");
214 opt0 = (ip6_hop_by_hop_option_t *)
215 (((u8 *)opt0) + sizeof (ioam_pow_option_t));
218 case 0: /* Pad, just stop */
219 opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
223 s = format (s, "Unknown %d", type0);
224 opt0 = (ip6_hop_by_hop_option_t *)
225 (((u8 *)opt0) + opt0->length
226 + sizeof (ip6_hop_by_hop_option_t));
233 vlib_node_registration_t ip6_hop_by_hop_node;
235 #define foreach_ip6_hop_by_hop_error \
236 _(PROCESSED, "Pkts with ip6 hop-by-hop options") \
237 _(UNKNOWN_OPTION, "Unknown ip6 hop-by-hop options")
240 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
241 foreach_ip6_hop_by_hop_error
243 IP6_HOP_BY_HOP_N_ERROR,
244 } ip6_hop_by_hop_error_t;
246 static char * ip6_hop_by_hop_error_strings[] = {
247 #define _(sym,string) string,
248 foreach_ip6_hop_by_hop_error
253 ip6_hop_by_hop_node_fn (vlib_main_t * vm,
254 vlib_node_runtime_t * node,
255 vlib_frame_t * frame)
257 ip6_main_t * im = &ip6_main;
258 ip_lookup_main_t * lm = &im->lookup_main;
259 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
260 u32 n_left_from, * from, * to_next;
261 ip_lookup_next_t next_index;
262 u32 processed = 0, unknown_opts = 0;
267 from = vlib_frame_vector_args (frame);
268 n_left_from = frame->n_vectors;
269 next_index = node->cached_next_index;
271 while (n_left_from > 0)
275 vlib_get_next_frame (vm, node, next_index,
276 to_next, n_left_to_next);
278 #if 0 /* $$$ DUAL-LOOP ME */
279 while (n_left_from >= 4 && n_left_to_next >= 2)
281 u32 next0 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
282 u32 next1 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
283 u32 sw_if_index0, sw_if_index1;
285 ethernet_header_t *en0, *en1;
287 vlib_buffer_t * b0, * b1;
289 /* Prefetch next iteration. */
291 vlib_buffer_t * p2, * p3;
293 p2 = vlib_get_buffer (vm, from[2]);
294 p3 = vlib_get_buffer (vm, from[3]);
296 vlib_prefetch_buffer_header (p2, LOAD);
297 vlib_prefetch_buffer_header (p3, LOAD);
299 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
300 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
303 /* speculatively enqueue b0 and b1 to the current next frame */
304 to_next[0] = bi0 = from[0];
305 to_next[1] = bi1 = from[1];
311 b0 = vlib_get_buffer (vm, bi0);
312 b1 = vlib_get_buffer (vm, bi1);
314 /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
315 ASSERT (b0->current_data == 0);
316 ASSERT (b1->current_data == 0);
318 ip0 = vlib_buffer_get_current (b0);
319 ip1 = vlib_buffer_get_current (b0);
321 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
322 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
324 /* $$$$$ End of processing 2 x packets $$$$$ */
326 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
328 if (b0->flags & VLIB_BUFFER_IS_TRACED)
330 ip6_hop_by_hop_trace_t *t =
331 vlib_add_trace (vm, node, b0, sizeof (*t));
332 t->sw_if_index = sw_if_index0;
333 t->next_index = next0;
335 if (b1->flags & VLIB_BUFFER_IS_TRACED)
337 ip6_hop_by_hop_trace_t *t =
338 vlib_add_trace (vm, node, b1, sizeof (*t));
339 t->sw_if_index = sw_if_index1;
340 t->next_index = next1;
344 /* verify speculative enqueues, maybe switch current next frame */
345 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
346 to_next, n_left_to_next,
347 bi0, bi1, next0, next1);
351 while (n_left_from > 0 && n_left_to_next > 0)
358 ip_adjacency_t * adj0;
359 ip6_hop_by_hop_header_t *hbh0;
360 ip6_hop_by_hop_option_t *opt0, *limit0;
361 ioam_trace_option_t * trace0;
365 /* speculatively enqueue b0 to the current next frame */
373 b0 = vlib_get_buffer (vm, bi0);
375 ip0 = vlib_buffer_get_current (b0);
376 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
377 adj0 = ip_get_adjacency (lm, adj_index0);
378 hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
379 opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
380 limit0 = (ip6_hop_by_hop_option_t *)
381 ((u8 *)hbh0 + ((hbh0->length+1)<<3));
383 /* Scan the set of h-b-h options, process ones that we understand */
384 while (opt0 < limit0)
386 type0 = opt0->type & HBH_OPTION_TYPE_MASK;
389 case HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST:
390 trace0 = (ioam_trace_option_t *)opt0;
391 if (PREDICT_TRUE (trace0->data_list_elts_left))
393 trace0->data_list_elts_left--;
394 /* fetch_trace_data_size returns in bytes. Convert it to 4-bytes
395 * to skip to this node's location.
397 elt_index = trace0->data_list_elts_left *
398 fetch_trace_data_size(trace0->ioam_trace_type)/4;
399 elt0 = &trace0->elts[elt_index];
400 if (trace0->ioam_trace_type & BIT_TTL_NODEID)
403 clib_host_to_net_u32 ((ip0->hop_limit<<24)
408 if (trace0->ioam_trace_type & BIT_ING_INTERFACE)
411 (vnet_buffer(b0)->sw_if_index[VLIB_RX]&0xFFFF) << 16 | (adj0->rewrite_header.sw_if_index & 0xFFFF);
412 *elt0 = clib_host_to_net_u32(*elt0);
416 if (trace0->ioam_trace_type & BIT_TIMESTAMP)
418 /* Send least significant 32 bits */
419 f64 time_f64 = (f64)(((f64)hm->unix_time_0) +
420 (vlib_time_now(hm->vlib_main) - hm->vlib_time_0));
423 time_f64 * trace_tsp_mul[hm->trace_tsp];
424 *elt0 = clib_host_to_net_u32(time_u64.as_u32[0]);
428 if (trace0->ioam_trace_type & BIT_APPDATA)
430 /* $$$ set elt0->app_data */
431 *elt0 = clib_host_to_net_u32(hm->app_data);
436 opt0 = (ip6_hop_by_hop_option_t *)
437 (((u8 *)opt0) + opt0->length
438 + sizeof (ip6_hop_by_hop_option_t));
441 case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK:
442 opt0 = (ip6_hop_by_hop_option_t *)
443 (((u8 *)opt0) + sizeof (ioam_pow_option_t));
447 opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
451 opt0 = (ip6_hop_by_hop_option_t *)
452 (((u8 *)opt0) + opt0->length
453 + sizeof (ip6_hop_by_hop_option_t));
460 next0 = (vnet_buffer(b0)->l2_classify.opaque_index == OI_DECAP) ?
461 IP6_HBYH_INPUT_NEXT_IP6_POP_HBYH : IP6_HBYH_INPUT_NEXT_IP6_REWRITE;
462 vnet_buffer(b0)->l2_classify.opaque_index = ~0;
464 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
465 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
467 ip6_hop_by_hop_trace_t *t =
468 vlib_add_trace (vm, node, b0, sizeof (*t));
469 u32 trace_len = (hbh0->length+1)<<3;
470 t->next_index = next0;
471 /* Capture the h-b-h option verbatim */
472 trace_len = trace_len < ARRAY_LEN(t->option_data) ?
473 trace_len : ARRAY_LEN(t->option_data);
474 t->trace_len = trace_len;
475 t->timestamp_msbs = time_u64.as_u32[1];
476 memcpy (t->option_data, hbh0, trace_len);
481 /* verify speculative enqueue, maybe switch current next frame */
482 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
483 to_next, n_left_to_next,
487 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
490 if (PREDICT_FALSE(unknown_opts > 0)) {
491 vlib_node_increment_counter (vm, ip6_hop_by_hop_node.index,
492 IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION, unknown_opts);
495 vlib_node_increment_counter (vm, ip6_hop_by_hop_node.index,
496 IP6_HOP_BY_HOP_ERROR_PROCESSED, processed);
497 return frame->n_vectors;
500 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = {
501 .function = ip6_hop_by_hop_node_fn,
502 .name = "ip6-hop-by-hop",
503 .vector_size = sizeof (u32),
504 .format_trace = format_ip6_hop_by_hop_trace,
505 .type = VLIB_NODE_TYPE_INTERNAL,
507 .n_errors = ARRAY_LEN(ip6_hop_by_hop_error_strings),
508 .error_strings = ip6_hop_by_hop_error_strings,
510 .n_next_nodes = IP6_HBYH_INPUT_N_NEXT,
512 #define _(s,n) [IP6_HBYH_INPUT_NEXT_##s] = n,
513 foreach_ip6_hbyh_input_next
518 /* The main h-b-h tracer will be invoked, no need to do much here */
521 } ip6_add_hop_by_hop_trace_t;
523 /* packet trace format function */
524 static u8 * format_ip6_add_hop_by_hop_trace (u8 * s, va_list * args)
526 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
527 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
528 ip6_add_hop_by_hop_trace_t * t = va_arg (*args,
529 ip6_add_hop_by_hop_trace_t *);
531 s = format (s, "IP6_ADD_HOP_BY_HOP: next index %d",
536 vlib_node_registration_t ip6_add_hop_by_hop_node;
538 #define foreach_ip6_add_hop_by_hop_error \
539 _(PROCESSED, "Pkts w/ added ip6 hop-by-hop options")
542 #define _(sym,str) IP6_ADD_HOP_BY_HOP_ERROR_##sym,
543 foreach_ip6_add_hop_by_hop_error
545 IP6_ADD_HOP_BY_HOP_N_ERROR,
546 } ip6_add_hop_by_hop_error_t;
548 static char * ip6_add_hop_by_hop_error_strings[] = {
549 #define _(sym,string) string,
550 foreach_ip6_add_hop_by_hop_error
555 ip6_add_hop_by_hop_node_fn (vlib_main_t * vm,
556 vlib_node_runtime_t * node,
557 vlib_frame_t * frame)
559 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
560 u32 n_left_from, * from, * to_next;
561 ip_lookup_next_t next_index;
563 u8 * rewrite = hm->rewrite;
564 u32 rewrite_length = vec_len (rewrite);
566 from = vlib_frame_vector_args (frame);
567 n_left_from = frame->n_vectors;
568 next_index = node->cached_next_index;
570 while (n_left_from > 0)
574 vlib_get_next_frame (vm, node, next_index,
575 to_next, n_left_to_next);
578 while (n_left_from >= 4 && n_left_to_next >= 2)
580 u32 next0 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
581 u32 next1 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
582 u32 sw_if_index0, sw_if_index1;
584 ethernet_header_t *en0, *en1;
586 vlib_buffer_t * b0, * b1;
588 /* Prefetch next iteration. */
590 vlib_buffer_t * p2, * p3;
592 p2 = vlib_get_buffer (vm, from[2]);
593 p3 = vlib_get_buffer (vm, from[3]);
595 vlib_prefetch_buffer_header (p2, LOAD);
596 vlib_prefetch_buffer_header (p3, LOAD);
598 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
599 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
602 /* speculatively enqueue b0 and b1 to the current next frame */
603 to_next[0] = bi0 = from[0];
604 to_next[1] = bi1 = from[1];
610 b0 = vlib_get_buffer (vm, bi0);
611 b1 = vlib_get_buffer (vm, bi1);
613 /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
614 ASSERT (b0->current_data == 0);
615 ASSERT (b1->current_data == 0);
617 ip0 = vlib_buffer_get_current (b0);
618 ip1 = vlib_buffer_get_current (b0);
620 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
621 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
623 /* $$$$$ End of processing 2 x packets $$$$$ */
625 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
627 if (b0->flags & VLIB_BUFFER_IS_TRACED)
629 ip6_add_hop_by_hop_trace_t *t =
630 vlib_add_trace (vm, node, b0, sizeof (*t));
631 t->sw_if_index = sw_if_index0;
632 t->next_index = next0;
634 if (b1->flags & VLIB_BUFFER_IS_TRACED)
636 ip6_add_hop_by_hop_trace_t *t =
637 vlib_add_trace (vm, node, b1, sizeof (*t));
638 t->sw_if_index = sw_if_index1;
639 t->next_index = next1;
643 /* verify speculative enqueues, maybe switch current next frame */
644 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
645 to_next, n_left_to_next,
646 bi0, bi1, next0, next1);
650 while (n_left_from > 0 && n_left_to_next > 0)
656 ip6_hop_by_hop_header_t * hbh0;
657 u64 * copy_src0, * copy_dst0;
660 /* speculatively enqueue b0 to the current next frame */
668 b0 = vlib_get_buffer (vm, bi0);
670 ip0 = vlib_buffer_get_current (b0);
672 /* Copy the ip header left by the required amount */
673 copy_dst0 = (u64 *)(((u8 *)ip0) - rewrite_length);
674 copy_src0 = (u64 *) ip0;
676 copy_dst0 [0] = copy_src0 [0];
677 copy_dst0 [1] = copy_src0 [1];
678 copy_dst0 [2] = copy_src0 [2];
679 copy_dst0 [3] = copy_src0 [3];
680 copy_dst0 [4] = copy_src0 [4];
681 vlib_buffer_advance (b0, - (word)rewrite_length);
682 ip0 = vlib_buffer_get_current (b0);
684 hbh0 = (ip6_hop_by_hop_header_t *)(ip0 + 1);
685 /* $$$ tune, rewrite_length is a multiple of 8 */
686 memcpy (hbh0, rewrite, rewrite_length);
687 /* Patch the protocol chain, insert the h-b-h (type 0) header */
688 hbh0->protocol = ip0->protocol;
690 new_l0 = clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
691 ip0->payload_length = clib_host_to_net_u16 (new_l0);
693 /* Populate the (first) h-b-h list elt */
694 next0 = IP6_HBYH_INPUT_NEXT_IP6_LOOKUP;
696 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
697 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
699 ip6_add_hop_by_hop_trace_t *t =
700 vlib_add_trace (vm, node, b0, sizeof (*t));
701 t->next_index = next0;
706 /* verify speculative enqueue, maybe switch current next frame */
707 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
708 to_next, n_left_to_next,
712 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
715 vlib_node_increment_counter (vm, ip6_add_hop_by_hop_node.index,
716 IP6_ADD_HOP_BY_HOP_ERROR_PROCESSED, processed);
717 return frame->n_vectors;
720 VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = {
721 .function = ip6_add_hop_by_hop_node_fn,
722 .name = "ip6-add-hop-by-hop",
723 .vector_size = sizeof (u32),
724 .format_trace = format_ip6_add_hop_by_hop_trace,
725 .type = VLIB_NODE_TYPE_INTERNAL,
727 .n_errors = ARRAY_LEN(ip6_add_hop_by_hop_error_strings),
728 .error_strings = ip6_add_hop_by_hop_error_strings,
730 /* See ip/lookup.h */
731 .n_next_nodes = IP6_HBYH_INPUT_N_NEXT,
733 #define _(s,n) [IP6_HBYH_INPUT_NEXT_##s] = n,
734 foreach_ip6_hbyh_input_next
740 /* The main h-b-h tracer was already invoked, no need to do much here */
743 } ip6_pop_hop_by_hop_trace_t;
745 /* packet trace format function */
746 static u8 * format_ip6_pop_hop_by_hop_trace (u8 * s, va_list * args)
748 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
749 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
750 ip6_pop_hop_by_hop_trace_t * t = va_arg (*args, ip6_pop_hop_by_hop_trace_t *);
752 s = format (s, "IP6_POP_HOP_BY_HOP: next index %d",
757 vlib_node_registration_t ip6_pop_hop_by_hop_node;
759 #define foreach_ip6_pop_hop_by_hop_error \
760 _(PROCESSED, "Pkts w/ removed ip6 hop-by-hop options") \
761 _(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options")
764 #define _(sym,str) IP6_POP_HOP_BY_HOP_ERROR_##sym,
765 foreach_ip6_pop_hop_by_hop_error
767 IP6_POP_HOP_BY_HOP_N_ERROR,
768 } ip6_pop_hop_by_hop_error_t;
770 static char * ip6_pop_hop_by_hop_error_strings[] = {
771 #define _(sym,string) string,
772 foreach_ip6_pop_hop_by_hop_error
777 ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm,
778 vlib_node_runtime_t * node,
779 vlib_frame_t * frame)
781 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
782 ip6_main_t * im = &ip6_main;
783 ip_lookup_main_t * lm = &im->lookup_main;
784 u32 n_left_from, * from, * to_next;
785 ip_lookup_next_t next_index;
788 u32 (*ioam_end_of_path_cb) (vlib_main_t *, vlib_node_runtime_t *,
789 vlib_buffer_t *, ip6_header_t *,
792 ioam_end_of_path_cb = hm->ioam_end_of_path_cb;
794 from = vlib_frame_vector_args (frame);
795 n_left_from = frame->n_vectors;
796 next_index = node->cached_next_index;
798 while (n_left_from > 0)
802 vlib_get_next_frame (vm, node, next_index,
803 to_next, n_left_to_next);
806 while (n_left_from >= 4 && n_left_to_next >= 2)
808 u32 next0 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
809 u32 next1 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
810 u32 sw_if_index0, sw_if_index1;
812 ethernet_header_t *en0, *en1;
814 vlib_buffer_t * b0, * b1;
816 /* Prefetch next iteration. */
818 vlib_buffer_t * p2, * p3;
820 p2 = vlib_get_buffer (vm, from[2]);
821 p3 = vlib_get_buffer (vm, from[3]);
823 vlib_prefetch_buffer_header (p2, LOAD);
824 vlib_prefetch_buffer_header (p3, LOAD);
826 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
827 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
830 /* speculatively enqueue b0 and b1 to the current next frame */
831 to_next[0] = bi0 = from[0];
832 to_next[1] = bi1 = from[1];
838 b0 = vlib_get_buffer (vm, bi0);
839 b1 = vlib_get_buffer (vm, bi1);
841 /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
842 ASSERT (b0->current_data == 0);
843 ASSERT (b1->current_data == 0);
845 ip0 = vlib_buffer_get_current (b0);
846 ip1 = vlib_buffer_get_current (b0);
848 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
849 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
851 /* $$$$$ End of processing 2 x packets $$$$$ */
853 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
855 if (b0->flags & VLIB_BUFFER_IS_TRACED)
857 ip6_pop_hop_by_hop_trace_t *t =
858 vlib_add_trace (vm, node, b0, sizeof (*t));
859 t->sw_if_index = sw_if_index0;
860 t->next_index = next0;
862 if (b1->flags & VLIB_BUFFER_IS_TRACED)
864 ip6_pop_hop_by_hop_trace_t *t =
865 vlib_add_trace (vm, node, b1, sizeof (*t));
866 t->sw_if_index = sw_if_index1;
867 t->next_index = next1;
871 /* verify speculative enqueues, maybe switch current next frame */
872 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
873 to_next, n_left_to_next,
874 bi0, bi1, next0, next1);
878 while (n_left_from > 0 && n_left_to_next > 0)
885 ip_adjacency_t * adj0;
886 ip6_hop_by_hop_header_t *hbh0;
887 u64 * copy_dst0, * copy_src0;
890 /* speculatively enqueue b0 to the current next frame */
898 b0 = vlib_get_buffer (vm, bi0);
900 ip0 = vlib_buffer_get_current (b0);
901 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
902 adj0 = ip_get_adjacency (lm, adj_index0);
904 /* Perfectly normal to end up here w/ out h-b-h header */
905 if (PREDICT_TRUE (ip0->protocol == 0))
907 hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
909 if (vnet_buffer(b0)->l2_classify.opaque_index == OI_DECAP)
910 { /* First pass. Send to hbyh node. */
911 next0 = IP6_HBYH_INPUT_NEXT_IP6_LOOKUP;
916 /* Collect data from trace via callback */
917 next0 = ioam_end_of_path_cb ?
918 ioam_end_of_path_cb (vm, node, b0, ip0, adj0) :
919 IP6_HBYH_INPUT_NEXT_IP6_REWRITE;
922 /* Pop the trace data */
923 vlib_buffer_advance (b0, (hbh0->length+1)<<3);
924 new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
925 ((hbh0->length+1)<<3);
926 ip0->payload_length = clib_host_to_net_u16 (new_l0);
927 ip0->protocol = hbh0->protocol;
928 copy_src0 = (u64 *)ip0;
929 copy_dst0 = copy_src0 + (hbh0->length+1);
930 copy_dst0 [4] = copy_src0[4];
931 copy_dst0 [3] = copy_src0[3];
932 copy_dst0 [2] = copy_src0[2];
933 copy_dst0 [1] = copy_src0[1];
934 copy_dst0 [0] = copy_src0[0];
939 next0 = IP6_HBYH_INPUT_NEXT_IP6_LOOKUP;
943 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
944 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
946 ip6_pop_hop_by_hop_trace_t *t =
947 vlib_add_trace (vm, node, b0, sizeof (*t));
948 t->next_index = next0;
952 /* verify speculative enqueue, maybe switch current next frame */
953 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
954 to_next, n_left_to_next,
958 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
961 vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
962 IP6_POP_HOP_BY_HOP_ERROR_PROCESSED, processed);
963 vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
964 IP6_POP_HOP_BY_HOP_ERROR_NO_HOHO, no_header);
965 return frame->n_vectors;
968 VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = {
969 .function = ip6_pop_hop_by_hop_node_fn,
970 .name = "ip6-pop-hop-by-hop",
971 .vector_size = sizeof (u32),
972 .format_trace = format_ip6_pop_hop_by_hop_trace,
973 .type = VLIB_NODE_TYPE_INTERNAL,
975 .n_errors = ARRAY_LEN(ip6_pop_hop_by_hop_error_strings),
976 .error_strings = ip6_pop_hop_by_hop_error_strings,
978 /* See ip/lookup.h */
979 .n_next_nodes = IP6_HBYH_INPUT_N_NEXT,
981 #define _(s,n) [IP6_HBYH_INPUT_NEXT_##s] = n,
982 foreach_ip6_hbyh_input_next
988 static clib_error_t *
989 ip6_hop_by_hop_init (vlib_main_t * vm)
991 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
994 hm->vnet_main = vnet_get_main();
995 hm->unix_time_0 = (u32) time (0); /* Store starting time */
996 hm->vlib_time_0 = vlib_time_now (vm);
997 hm->ioam_flag = IOAM_HBYH_MOD;
998 hm->trace_tsp = TSP_MICROSECONDS; /* Micro seconds */
1003 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
1005 int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_type, u32 trace_option_elts,
1006 int has_pow_option, int has_ppc_option)
1010 ip6_hop_by_hop_header_t *hbh;
1011 ioam_trace_option_t * trace_option;
1012 ioam_pow_option_t * pow_option;
1014 u8 trace_data_size = 0;
1018 if (trace_option_elts == 0 && has_pow_option == 0)
1021 /* Work out how much space we need */
1022 size = sizeof (ip6_hop_by_hop_header_t);
1024 if (trace_option_elts)
1026 size += sizeof (ip6_hop_by_hop_option_t);
1028 trace_data_size = fetch_trace_data_size(trace_type);
1029 if (trace_data_size == 0)
1030 return VNET_API_ERROR_INVALID_VALUE;
1032 if (trace_option_elts * trace_data_size > 254)
1033 return VNET_API_ERROR_INVALID_VALUE;
1035 size += trace_option_elts * trace_data_size;
1039 size += sizeof (ip6_hop_by_hop_option_t);
1040 size += sizeof (ioam_pow_option_t);
1043 /* Round to a multiple of 8 octets */
1044 rnd_size = (size + 7) & ~7;
1046 /* allocate it, zero-fill / pad by construction */
1047 vec_validate (rewrite, rnd_size-1);
1049 hbh = (ip6_hop_by_hop_header_t *) rewrite;
1050 /* Length of header in 8 octet units, not incl first 8 octets */
1051 hbh->length = (rnd_size>>3) - 1;
1052 current = (u8 *)(hbh+1);
1054 if (trace_option_elts)
1056 trace_option = (ioam_trace_option_t *)current;
1057 trace_option->hdr.type = HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST
1058 | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
1059 trace_option->hdr.length =
1060 2 /*ioam_trace_type,data_list_elts_left */ +
1061 trace_option_elts * trace_data_size;
1062 trace_option->ioam_trace_type = trace_type & TRACE_TYPE_MASK;
1063 trace_option->data_list_elts_left = trace_option_elts;
1064 current += sizeof (ioam_trace_option_t) +
1065 trace_option_elts * trace_data_size;
1069 pow_option = (ioam_pow_option_t *)current;
1070 pow_option->hdr.type = HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK
1071 | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
1072 pow_option->hdr.length = sizeof (ioam_pow_option_t) -
1073 sizeof (ip6_hop_by_hop_option_t);
1074 current += sizeof (ioam_pow_option_t);
1082 clear_ioam_rewrite_fn(void)
1084 ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
1086 vec_free(hm->rewrite);
1091 hm->trace_option_elts = 0;
1092 hm->has_pow_option = 0;
1093 hm->has_ppc_option = 0;
1094 hm->trace_tsp = TSP_MICROSECONDS;
1099 clib_error_t * clear_ioam_rewrite_command_fn (vlib_main_t * vm,
1100 unformat_input_t * input,
1101 vlib_cli_command_t * cmd)
1103 return(clear_ioam_rewrite_fn());
1106 VLIB_CLI_COMMAND (ip6_clear_ioam_trace_cmd, static) = {
1107 .path = "clear ioam rewrite",
1108 .short_help = "clear ioam rewrite",
1109 .function = clear_ioam_rewrite_command_fn,
1113 ip6_ioam_trace_profile_set(u32 trace_option_elts, u32 trace_type, u32 node_id,
1114 u32 app_data, int has_pow_option, u32 trace_tsp,
1118 ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
1119 rv = ip6_ioam_set_rewrite (&hm->rewrite, trace_type, trace_option_elts,
1120 has_pow_option, has_ppc_option);
1125 hm->node_id = node_id;
1126 hm->app_data = app_data;
1127 hm->trace_type = trace_type;
1128 hm->trace_option_elts = trace_option_elts;
1129 hm->has_pow_option = has_pow_option;
1130 hm->has_ppc_option = has_ppc_option;
1131 hm->trace_tsp = trace_tsp;
1135 return clib_error_return_code(0, rv, 0, "ip6_ioam_set_rewrite returned %d", rv);
1142 static clib_error_t *
1143 ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
1144 unformat_input_t * input,
1145 vlib_cli_command_t * cmd)
1147 u32 trace_option_elts = 0;
1148 u32 trace_type = 0, node_id = 0;
1149 u32 app_data = 0, trace_tsp = TSP_MICROSECONDS;
1150 int has_pow_option = 0;
1151 int has_ppc_option = 0;
1152 clib_error_t * rv = 0;
1154 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1156 if (unformat (input, "trace-type 0x%x trace-elts %d "
1157 "trace-tsp %d node-id 0x%x app-data 0x%x",
1158 &trace_type, &trace_option_elts, &trace_tsp,
1159 &node_id, &app_data))
1161 else if (unformat (input, "pow"))
1163 else if (unformat (input, "ppc encap"))
1164 has_ppc_option = PPC_ENCAP;
1165 else if (unformat (input, "ppc decap"))
1166 has_ppc_option = PPC_DECAP;
1167 else if (unformat (input, "ppc none"))
1168 has_ppc_option = PPC_NONE;
1174 rv = ip6_ioam_trace_profile_set(trace_option_elts, trace_type, node_id,
1175 app_data, has_pow_option, trace_tsp, has_ppc_option);
1181 VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = {
1182 .path = "set ioam rewrite",
1183 .short_help = "set ioam rewrite trace-type <0x1f|0x3|0x9|0x11|0x19> trace-elts <nn> trace-tsp <0|1|2|3> node-id <node id in hex> app-data <app_data in hex> [pow] [ppc <encap|decap>]",
1184 .function = ip6_set_ioam_rewrite_command_fn,
1187 static clib_error_t *
1188 ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
1189 unformat_input_t * input,
1190 vlib_cli_command_t * cmd)
1192 ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
1196 if (!is_zero_ip6_address(&hm->adj))
1198 s = format(s, " REWRITE FLOW CONFIGS - \n");
1199 s = format(s, " Destination Address : %U\n",
1200 format_ip6_address, &hm->adj, sizeof(ip6_address_t));
1201 s = format(s, " Flow operation : %d (%s)\n", hm->ioam_flag,
1202 (hm->ioam_flag == IOAM_HBYH_ADD) ? "Add" :
1203 ((hm->ioam_flag == IOAM_HBYH_MOD) ? "Mod" : "Pop"));
1207 s = format(s, " REWRITE FLOW CONFIGS - Not configured\n");
1210 if (hm->trace_option_elts)
1212 s = format(s, " HOP BY HOP OPTIONS - TRACE CONFIG - \n");
1213 s = format(s, " Trace Type : 0x%x (%d)\n",
1214 hm->trace_type, hm->trace_type);
1215 s = format(s, " Trace timestamp precision : %d (%s)\n", hm->trace_tsp,
1216 (hm->trace_tsp == TSP_SECONDS) ? "Seconds" :
1217 ((hm->trace_tsp == TSP_MILLISECONDS) ? "Milliseconds" :
1218 (((hm->trace_tsp == TSP_MICROSECONDS) ? "Microseconds" : "Nanoseconds"))));
1219 s = format(s, " Num of trace nodes : %d\n",
1220 hm->trace_option_elts);
1221 s = format(s, " Node-id : 0x%x (%d)\n",
1222 hm->node_id, hm->node_id);
1223 s = format(s, " App Data : 0x%x (%d)\n",
1224 hm->app_data, hm->app_data);
1228 s = format(s, " HOP BY HOP OPTIONS - TRACE CONFIG - Not configured\n");
1231 s = format(s, " POW OPTION - %d (%s)\n",
1232 hm->has_pow_option, (hm->has_pow_option?"Enabled":"Disabled"));
1233 if (hm->has_pow_option)
1234 s = format(s, "Try 'show ioam sc-profile' for more information\n");
1236 s = format(s, " EDGE TO EDGE - PPC OPTION - %d (%s)\n",
1237 hm->has_ppc_option, ppc_state[hm->has_ppc_option]);
1238 if (hm->has_ppc_option)
1239 s = format(s, "Try 'show ioam ppc' for more information\n");
1241 vlib_cli_output(vm, "%v", s);
1246 VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = {
1247 .path = "show ioam summary",
1248 .short_help = "Summary of IOAM configuration",
1249 .function = ip6_show_ioam_summary_cmd_fn,
1252 int ip6_ioam_set_destination (ip6_address_t *addr, u32 mask_width, u32 vrf_id,
1253 int is_add, int is_pop, int is_none)
1255 ip6_main_t * im = &ip6_main;
1256 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
1257 ip_lookup_main_t * lm = &im->lookup_main;
1258 ip_adjacency_t * adj;
1263 BVT(clib_bihash_kv) kv, value;
1265 if ((is_add + is_pop + is_none) != 1)
1266 return VNET_API_ERROR_INVALID_VALUE_2;
1268 /* Go find the adjacency we're supposed to tickle */
1269 p = hash_get (im->fib_index_by_table_id, vrf_id);
1272 return VNET_API_ERROR_NO_SUCH_FIB;
1276 len = vec_len (im->prefix_lengths_in_search_order);
1278 for (i = 0; i < len; i++)
1280 int dst_address_length = im->prefix_lengths_in_search_order[i];
1281 ip6_address_t * mask = &im->fib_masks[dst_address_length];
1283 if (dst_address_length != mask_width)
1286 kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
1287 kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
1288 kv.key[2] = ((u64)((fib_index))<<32) | dst_address_length;
1290 rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value);
1295 return VNET_API_ERROR_NO_SUCH_ENTRY;
1299 /* Got it, modify as directed... */
1300 adj_index = value.value;
1301 adj = ip_get_adjacency (lm, adj_index);
1303 /* Restore original lookup-next action */
1304 if (adj->saved_lookup_next_index)
1306 adj->lookup_next_index = adj->saved_lookup_next_index;
1307 adj->saved_lookup_next_index = 0;
1310 /* Save current action */
1311 if (is_add || is_pop)
1312 adj->saved_lookup_next_index = adj->lookup_next_index;
1315 adj->lookup_next_index = IP_LOOKUP_NEXT_ADD_HOP_BY_HOP;
1318 adj->lookup_next_index = IP_LOOKUP_NEXT_POP_HOP_BY_HOP;
1321 hm->ioam_flag = (is_add ? IOAM_HBYH_ADD :
1322 (is_pop ? IOAM_HBYH_POP : IOAM_HBYH_MOD));
1326 static clib_error_t *
1327 ip6_set_ioam_destination_command_fn (vlib_main_t * vm,
1328 unformat_input_t * input,
1329 vlib_cli_command_t * cmd)
1332 u32 mask_width = ~0;
1339 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1341 if (unformat (input, "%U/%d",
1342 unformat_ip6_address, &addr, &mask_width))
1344 else if (unformat (input, "vrf-id %d", &vrf_id))
1346 else if (unformat (input, "add"))
1348 else if (unformat (input, "pop"))
1350 else if (unformat (input, "none"))
1356 if ((is_add + is_pop + is_none) != 1)
1357 return clib_error_return (0, "One of (add, pop, none) required");
1358 if (mask_width == ~0)
1359 return clib_error_return (0, "<address>/<mask-width> required");
1361 rv = ip6_ioam_set_destination (&addr, mask_width, vrf_id,
1362 is_add, is_pop, is_none);
1369 return clib_error_return (0, "ip6_ioam_set_destination returned %d", rv);
1375 VLIB_CLI_COMMAND (ip6_set_ioam_destination_cmd, static) = {
1376 .path = "set ioam destination",
1377 .short_help = "set ioam destination <ip6-address>/<width> add | pop | none",
1378 .function = ip6_set_ioam_destination_command_fn,
1381 void vnet_register_ioam_end_of_path_callback (void *cb)
1383 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
1385 hm->ioam_end_of_path_cb = cb;