2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
15 #include <vlib/vlib.h>
16 #include <vnet/vnet.h>
17 #include <vnet/pg/pg.h>
18 #include <vppinfra/error.h>
20 #include <vnet/ip/ip.h>
22 #include <vppinfra/hash.h>
23 #include <vppinfra/error.h>
24 #include <vppinfra/elog.h>
26 #include <vnet/ip/ip6_hop_by_hop.h>
28 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
31 * ip6 hop-by-hop option handling. We push pkts with h-b-h options to
32 * ip6_hop_by_hop_node_fn from ip6-lookup at a cost of ~2 clocks/pkt in
35 * We parse through the h-b-h option TLVs, specifically looking for
36 * HBH_OPTION_TYPE_IOAM_DATA_LIST. [Someone needs to get bananas from
37 * IANA, aka to actually allocate the option TLV codes.]
39 * If we find the indicated option type, and we have remaining list
40 * elements in the trace list, allocate and populate the trace list
43 * At the ingress edge: punch in the h-b-h rewrite, then visit the
44 * standard h-b-h option handler. We have to be careful in the standard
45 * h-b-h handler, to avoid looping until we run out of rewrite space.
46 * Ask me how I know that.
49 * decide on egress point "pop and count" scheme
50 * time stamp handling: usec since the top of the hour?
51 * configure the node id
52 * trace list application data support
53 * cons up analysis / steering plug-in(s)
54 * add configuration binary APIs, vpp_api_test_support, yang models and
56 * perf tune: dual loop, replace memcpy w/ N x 8-byte load/stores
61 * primary h-b-h handler trace support
62 * We work pretty hard on the problem for obvious reasons
68 } ip6_hop_by_hop_trace_t;
70 static u8 * format_ioam_data_list_element (u8 * s, va_list * args)
72 ioam_data_list_element_t *elt = va_arg (*args, ioam_data_list_element_t *);
73 u32 ttl_node_id_host_byte_order =
74 clib_net_to_host_u32 (elt->ttl_node_id);
76 s = format (s, "ttl %d node id %d ingress %d egress %d ts %u",
77 ttl_node_id_host_byte_order>>24,
78 ttl_node_id_host_byte_order & 0x00FFFFFF,
85 static u8 * format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
87 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
88 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
89 ip6_hop_by_hop_trace_t * t = va_arg (*args, ip6_hop_by_hop_trace_t *);
90 ip6_hop_by_hop_header_t *hbh0;
91 ip6_hop_by_hop_option_t *opt0, *limit0;
92 ioam_trace_option_t * trace0;
93 ioam_data_list_element_t * elt0;
97 hbh0 = (ip6_hop_by_hop_header_t *)t->option_data;
99 s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d\n",
100 t->next_index, (hbh0->length+1)<<3, t->trace_len);
102 opt0 = (ip6_hop_by_hop_option_t *) (hbh0+1);
103 limit0 = (ip6_hop_by_hop_option_t *) ((u8 *)hbh0) + t->trace_len;
105 while (opt0 < limit0)
107 type0 = opt0->type & HBH_OPTION_TYPE_MASK;
111 case HBH_OPTION_TYPE_IOAM_DATA_LIST:
112 trace0 = (ioam_trace_option_t *)opt0;
113 s = format (s, " Trace %d elts left\n",
114 trace0->data_list_elts_left);
115 elt0 = &trace0->elts[0];
117 ((u8 *)(&trace0->elts[0]) + trace0->hdr.length - 1
118 /* -1 accounts for elts_left */))
120 s = format (s, " [%d] %U\n",elt_index,
121 format_ioam_data_list_element, elt0);
126 opt0 = (ip6_hop_by_hop_option_t *)
127 (((u8 *)opt0) + opt0->length
128 + sizeof (ip6_hop_by_hop_option_t));
131 case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK:
132 s = format (s, " POW opt present\n");
133 opt0 = (ip6_hop_by_hop_option_t *)
134 (((u8 *)opt0) + sizeof (ioam_pow_option_t));
137 case 0: /* Pad, just stop */
138 opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
142 s = format (s, "Unknown %d", type0);
143 opt0 = (ip6_hop_by_hop_option_t *)
144 (((u8 *)opt0) + opt0->length
145 + sizeof (ip6_hop_by_hop_option_t));
152 vlib_node_registration_t ip6_hop_by_hop_node;
154 #define foreach_ip6_hop_by_hop_error \
155 _(PROCESSED, "Pkts with ip6 hop-by-hop options")
158 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
159 foreach_ip6_hop_by_hop_error
161 IP6_HOP_BY_HOP_N_ERROR,
162 } ip6_hop_by_hop_error_t;
164 static char * ip6_hop_by_hop_error_strings[] = {
165 #define _(sym,string) string,
166 foreach_ip6_hop_by_hop_error
171 ip6_hop_by_hop_node_fn (vlib_main_t * vm,
172 vlib_node_runtime_t * node,
173 vlib_frame_t * frame)
175 ip6_main_t * im = &ip6_main;
176 ip_lookup_main_t * lm = &im->lookup_main;
177 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
178 u32 n_left_from, * from, * to_next;
179 ip_lookup_next_t next_index;
182 from = vlib_frame_vector_args (frame);
183 n_left_from = frame->n_vectors;
184 next_index = node->cached_next_index;
186 while (n_left_from > 0)
190 vlib_get_next_frame (vm, node, next_index,
191 to_next, n_left_to_next);
193 #if 0 /* $$$ DUAL-LOOP ME */
194 while (n_left_from >= 4 && n_left_to_next >= 2)
196 u32 next0 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
197 u32 next1 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
198 u32 sw_if_index0, sw_if_index1;
200 ethernet_header_t *en0, *en1;
202 vlib_buffer_t * b0, * b1;
204 /* Prefetch next iteration. */
206 vlib_buffer_t * p2, * p3;
208 p2 = vlib_get_buffer (vm, from[2]);
209 p3 = vlib_get_buffer (vm, from[3]);
211 vlib_prefetch_buffer_header (p2, LOAD);
212 vlib_prefetch_buffer_header (p3, LOAD);
214 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
215 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
218 /* speculatively enqueue b0 and b1 to the current next frame */
219 to_next[0] = bi0 = from[0];
220 to_next[1] = bi1 = from[1];
226 b0 = vlib_get_buffer (vm, bi0);
227 b1 = vlib_get_buffer (vm, bi1);
229 /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
230 ASSERT (b0->current_data == 0);
231 ASSERT (b1->current_data == 0);
233 ip0 = vlib_buffer_get_current (b0);
234 ip1 = vlib_buffer_get_current (b0);
236 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
237 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
239 /* $$$$$ End of processing 2 x packets $$$$$ */
241 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
243 if (b0->flags & VLIB_BUFFER_IS_TRACED)
245 ip6_hop_by_hop_trace_t *t =
246 vlib_add_trace (vm, node, b0, sizeof (*t));
247 t->sw_if_index = sw_if_index0;
248 t->next_index = next0;
250 if (b1->flags & VLIB_BUFFER_IS_TRACED)
252 ip6_hop_by_hop_trace_t *t =
253 vlib_add_trace (vm, node, b1, sizeof (*t));
254 t->sw_if_index = sw_if_index1;
255 t->next_index = next1;
259 /* verify speculative enqueues, maybe switch current next frame */
260 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
261 to_next, n_left_to_next,
262 bi0, bi1, next0, next1);
266 while (n_left_from > 0 && n_left_to_next > 0)
273 ip_adjacency_t * adj0;
274 ip6_hop_by_hop_header_t *hbh0;
275 ip6_hop_by_hop_option_t *opt0, *limit0;
276 ioam_trace_option_t * trace0;
277 ioam_data_list_element_t * elt0;
280 /* speculatively enqueue b0 to the current next frame */
288 b0 = vlib_get_buffer (vm, bi0);
290 ip0 = vlib_buffer_get_current (b0);
291 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
292 adj0 = ip_get_adjacency (lm, adj_index0);
293 hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
294 opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
295 limit0 = (ip6_hop_by_hop_option_t *)
296 ((u8 *)hbh0 + ((hbh0->length+1)<<3));
298 /* Scan the set of h-b-h options, process ones that we understand */
299 while (opt0 < limit0)
301 type0 = opt0->type & HBH_OPTION_TYPE_MASK;
304 case HBH_OPTION_TYPE_IOAM_DATA_LIST:
305 trace0 = (ioam_trace_option_t *)opt0;
306 if (PREDICT_TRUE (trace0->data_list_elts_left))
308 trace0->data_list_elts_left--;
309 elt0 = &trace0->elts[trace0->data_list_elts_left];
311 clib_host_to_net_u32 ((ip0->hop_limit<<24)
314 vnet_buffer(b0)->sw_if_index[VLIB_RX];
315 elt0->egress_if = adj0->rewrite_header.sw_if_index;
316 elt0->timestamp = 123; /* $$$$ */
317 /* $$$ set elt0->app_data */
320 opt0 = (ip6_hop_by_hop_option_t *)
321 (((u8 *)opt0) + opt0->length
322 + sizeof (ip6_hop_by_hop_option_t));
325 case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK:
326 opt0 = (ip6_hop_by_hop_option_t *)
327 (((u8 *)opt0) + sizeof (ioam_pow_option_t));
331 opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
339 * Since we push pkts here from the h-b-h header imposition code
340 * we have to be careful what we wish for...
342 next0 = adj0->lookup_next_index != IP_LOOKUP_NEXT_ADD_HOP_BY_HOP ?
343 adj0->lookup_next_index : adj0->saved_lookup_next_index;
345 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
346 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
348 ip6_hop_by_hop_trace_t *t =
349 vlib_add_trace (vm, node, b0, sizeof (*t));
350 u32 trace_len = (hbh0->length+1)<<3;
351 t->next_index = next0;
352 /* Capture the h-b-h option verbatim */
353 trace_len = trace_len < ARRAY_LEN(t->option_data) ?
354 trace_len : ARRAY_LEN(t->option_data);
355 t->trace_len = trace_len;
356 memcpy (t->option_data, hbh0, trace_len);
361 /* verify speculative enqueue, maybe switch current next frame */
362 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
363 to_next, n_left_to_next,
367 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
370 vlib_node_increment_counter (vm, ip6_hop_by_hop_node.index,
371 IP6_HOP_BY_HOP_ERROR_PROCESSED, processed);
372 return frame->n_vectors;
375 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = {
376 .function = ip6_hop_by_hop_node_fn,
377 .name = "ip6-hop-by-hop",
378 .vector_size = sizeof (u32),
379 .format_trace = format_ip6_hop_by_hop_trace,
380 .type = VLIB_NODE_TYPE_INTERNAL,
382 .n_errors = ARRAY_LEN(ip6_hop_by_hop_error_strings),
383 .error_strings = ip6_hop_by_hop_error_strings,
385 /* See ip/lookup.h */
386 .n_next_nodes = IP_LOOKUP_N_NEXT,
388 [IP_LOOKUP_NEXT_MISS] = "ip6-miss",
389 [IP_LOOKUP_NEXT_DROP] = "ip6-drop",
390 [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",
391 [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",
392 [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",
393 [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",
394 [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify",
395 [IP_LOOKUP_NEXT_MAP] = "ip6-map",
396 [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",
397 [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",
398 /* Next 3 arcs probably never used */
399 [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",
400 [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop",
401 [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop",
405 /* The main h-b-h tracer will be invoked, no need to do much here */
408 } ip6_add_hop_by_hop_trace_t;
410 /* packet trace format function */
411 static u8 * format_ip6_add_hop_by_hop_trace (u8 * s, va_list * args)
413 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
414 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
415 ip6_add_hop_by_hop_trace_t * t = va_arg (*args,
416 ip6_add_hop_by_hop_trace_t *);
418 s = format (s, "IP6_ADD_HOP_BY_HOP: next index %d",
423 vlib_node_registration_t ip6_add_hop_by_hop_node;
425 #define foreach_ip6_add_hop_by_hop_error \
426 _(PROCESSED, "Pkts w/ added ip6 hop-by-hop options")
429 #define _(sym,str) IP6_ADD_HOP_BY_HOP_ERROR_##sym,
430 foreach_ip6_add_hop_by_hop_error
432 IP6_ADD_HOP_BY_HOP_N_ERROR,
433 } ip6_add_hop_by_hop_error_t;
435 static char * ip6_add_hop_by_hop_error_strings[] = {
436 #define _(sym,string) string,
437 foreach_ip6_add_hop_by_hop_error
442 ip6_add_hop_by_hop_node_fn (vlib_main_t * vm,
443 vlib_node_runtime_t * node,
444 vlib_frame_t * frame)
446 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
447 u32 n_left_from, * from, * to_next;
448 ip_lookup_next_t next_index;
450 u8 * rewrite = hm->rewrite;
451 u32 rewrite_length = vec_len (rewrite);
453 from = vlib_frame_vector_args (frame);
454 n_left_from = frame->n_vectors;
455 next_index = node->cached_next_index;
457 while (n_left_from > 0)
461 vlib_get_next_frame (vm, node, next_index,
462 to_next, n_left_to_next);
465 while (n_left_from >= 4 && n_left_to_next >= 2)
467 u32 next0 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
468 u32 next1 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
469 u32 sw_if_index0, sw_if_index1;
471 ethernet_header_t *en0, *en1;
473 vlib_buffer_t * b0, * b1;
475 /* Prefetch next iteration. */
477 vlib_buffer_t * p2, * p3;
479 p2 = vlib_get_buffer (vm, from[2]);
480 p3 = vlib_get_buffer (vm, from[3]);
482 vlib_prefetch_buffer_header (p2, LOAD);
483 vlib_prefetch_buffer_header (p3, LOAD);
485 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
486 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
489 /* speculatively enqueue b0 and b1 to the current next frame */
490 to_next[0] = bi0 = from[0];
491 to_next[1] = bi1 = from[1];
497 b0 = vlib_get_buffer (vm, bi0);
498 b1 = vlib_get_buffer (vm, bi1);
500 /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
501 ASSERT (b0->current_data == 0);
502 ASSERT (b1->current_data == 0);
504 ip0 = vlib_buffer_get_current (b0);
505 ip1 = vlib_buffer_get_current (b0);
507 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
508 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
510 /* $$$$$ End of processing 2 x packets $$$$$ */
512 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
514 if (b0->flags & VLIB_BUFFER_IS_TRACED)
516 ip6_add_hop_by_hop_trace_t *t =
517 vlib_add_trace (vm, node, b0, sizeof (*t));
518 t->sw_if_index = sw_if_index0;
519 t->next_index = next0;
521 if (b1->flags & VLIB_BUFFER_IS_TRACED)
523 ip6_add_hop_by_hop_trace_t *t =
524 vlib_add_trace (vm, node, b1, sizeof (*t));
525 t->sw_if_index = sw_if_index1;
526 t->next_index = next1;
530 /* verify speculative enqueues, maybe switch current next frame */
531 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
532 to_next, n_left_to_next,
533 bi0, bi1, next0, next1);
537 while (n_left_from > 0 && n_left_to_next > 0)
543 ip6_hop_by_hop_header_t * hbh0;
544 u64 * copy_src0, * copy_dst0;
547 /* speculatively enqueue b0 to the current next frame */
555 b0 = vlib_get_buffer (vm, bi0);
557 ip0 = vlib_buffer_get_current (b0);
559 /* Copy the ip header left by the required amount */
560 copy_dst0 = (u64 *)(((u8 *)ip0) - rewrite_length);
561 copy_src0 = (u64 *) ip0;
563 copy_dst0 [0] = copy_src0 [0];
564 copy_dst0 [1] = copy_src0 [1];
565 copy_dst0 [2] = copy_src0 [2];
566 copy_dst0 [3] = copy_src0 [3];
567 copy_dst0 [4] = copy_src0 [4];
568 vlib_buffer_advance (b0, - (word)rewrite_length);
569 ip0 = vlib_buffer_get_current (b0);
571 hbh0 = (ip6_hop_by_hop_header_t *)(ip0 + 1);
572 /* $$$ tune, rewrite_length is a multiple of 8 */
573 memcpy (hbh0, rewrite, rewrite_length);
574 /* Patch the protocol chain, insert the h-b-h (type 0) header */
575 hbh0->protocol = ip0->protocol;
577 new_l0 = clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
578 ip0->payload_length = clib_host_to_net_u16 (new_l0);
580 /* Populate the (first) h-b-h list elt */
581 next0 = IP_LOOKUP_NEXT_HOP_BY_HOP;
583 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
584 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
586 ip6_add_hop_by_hop_trace_t *t =
587 vlib_add_trace (vm, node, b0, sizeof (*t));
588 t->next_index = next0;
593 /* verify speculative enqueue, maybe switch current next frame */
594 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
595 to_next, n_left_to_next,
599 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
602 vlib_node_increment_counter (vm, ip6_add_hop_by_hop_node.index,
603 IP6_ADD_HOP_BY_HOP_ERROR_PROCESSED, processed);
604 return frame->n_vectors;
607 VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = {
608 .function = ip6_add_hop_by_hop_node_fn,
609 .name = "ip6-add-hop-by-hop",
610 .vector_size = sizeof (u32),
611 .format_trace = format_ip6_add_hop_by_hop_trace,
612 .type = VLIB_NODE_TYPE_INTERNAL,
614 .n_errors = ARRAY_LEN(ip6_add_hop_by_hop_error_strings),
615 .error_strings = ip6_add_hop_by_hop_error_strings,
617 /* See ip/lookup.h */
618 .n_next_nodes = IP_LOOKUP_N_NEXT,
620 [IP_LOOKUP_NEXT_MISS] = "ip6-miss",
621 [IP_LOOKUP_NEXT_DROP] = "ip6-drop",
622 [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",
623 [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",
624 [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",
625 [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",
626 [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify",
627 [IP_LOOKUP_NEXT_MAP] = "ip6-map",
628 [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",
629 [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",
630 /* Next 3 arcs probably never used */
631 [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",
632 [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop",
633 [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop",
638 /* The main h-b-h tracer was already invoked, no need to do much here */
641 } ip6_pop_hop_by_hop_trace_t;
643 /* packet trace format function */
644 static u8 * format_ip6_pop_hop_by_hop_trace (u8 * s, va_list * args)
646 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
647 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
648 ip6_pop_hop_by_hop_trace_t * t = va_arg (*args, ip6_pop_hop_by_hop_trace_t *);
650 s = format (s, "IP6_POP_HOP_BY_HOP: next index %d",
655 vlib_node_registration_t ip6_pop_hop_by_hop_node;
657 #define foreach_ip6_pop_hop_by_hop_error \
658 _(PROCESSED, "Pkts w/ removed ip6 hop-by-hop options") \
659 _(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options")
662 #define _(sym,str) IP6_POP_HOP_BY_HOP_ERROR_##sym,
663 foreach_ip6_pop_hop_by_hop_error
665 IP6_POP_HOP_BY_HOP_N_ERROR,
666 } ip6_pop_hop_by_hop_error_t;
668 static char * ip6_pop_hop_by_hop_error_strings[] = {
669 #define _(sym,string) string,
670 foreach_ip6_pop_hop_by_hop_error
675 ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm,
676 vlib_node_runtime_t * node,
677 vlib_frame_t * frame)
679 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
680 ip6_main_t * im = &ip6_main;
681 ip_lookup_main_t * lm = &im->lookup_main;
682 u32 n_left_from, * from, * to_next;
683 ip_lookup_next_t next_index;
686 u32 (*ioam_end_of_path_cb) (vlib_main_t *, vlib_node_runtime_t *,
687 vlib_buffer_t *, ip6_header_t *,
690 ioam_end_of_path_cb = hm->ioam_end_of_path_cb;
692 from = vlib_frame_vector_args (frame);
693 n_left_from = frame->n_vectors;
694 next_index = node->cached_next_index;
696 while (n_left_from > 0)
700 vlib_get_next_frame (vm, node, next_index,
701 to_next, n_left_to_next);
704 while (n_left_from >= 4 && n_left_to_next >= 2)
706 u32 next0 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
707 u32 next1 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
708 u32 sw_if_index0, sw_if_index1;
710 ethernet_header_t *en0, *en1;
712 vlib_buffer_t * b0, * b1;
714 /* Prefetch next iteration. */
716 vlib_buffer_t * p2, * p3;
718 p2 = vlib_get_buffer (vm, from[2]);
719 p3 = vlib_get_buffer (vm, from[3]);
721 vlib_prefetch_buffer_header (p2, LOAD);
722 vlib_prefetch_buffer_header (p3, LOAD);
724 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
725 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
728 /* speculatively enqueue b0 and b1 to the current next frame */
729 to_next[0] = bi0 = from[0];
730 to_next[1] = bi1 = from[1];
736 b0 = vlib_get_buffer (vm, bi0);
737 b1 = vlib_get_buffer (vm, bi1);
739 /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
740 ASSERT (b0->current_data == 0);
741 ASSERT (b1->current_data == 0);
743 ip0 = vlib_buffer_get_current (b0);
744 ip1 = vlib_buffer_get_current (b0);
746 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
747 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
749 /* $$$$$ End of processing 2 x packets $$$$$ */
751 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
753 if (b0->flags & VLIB_BUFFER_IS_TRACED)
755 ip6_pop_hop_by_hop_trace_t *t =
756 vlib_add_trace (vm, node, b0, sizeof (*t));
757 t->sw_if_index = sw_if_index0;
758 t->next_index = next0;
760 if (b1->flags & VLIB_BUFFER_IS_TRACED)
762 ip6_pop_hop_by_hop_trace_t *t =
763 vlib_add_trace (vm, node, b1, sizeof (*t));
764 t->sw_if_index = sw_if_index1;
765 t->next_index = next1;
769 /* verify speculative enqueues, maybe switch current next frame */
770 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
771 to_next, n_left_to_next,
772 bi0, bi1, next0, next1);
776 while (n_left_from > 0 && n_left_to_next > 0)
783 ip_adjacency_t * adj0;
784 ip6_hop_by_hop_header_t *hbh0;
785 u64 * copy_dst0, * copy_src0;
788 /* speculatively enqueue b0 to the current next frame */
796 b0 = vlib_get_buffer (vm, bi0);
798 ip0 = vlib_buffer_get_current (b0);
799 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
800 adj0 = ip_get_adjacency (lm, adj_index0);
802 /* Perfectly normal to end up here w/ out h-b-h header */
803 if (PREDICT_TRUE (ip0->protocol == 0))
805 hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
807 /* Collect data from trace via callback */
808 next0 = ioam_end_of_path_cb ?
809 ioam_end_of_path_cb (vm, node, b0, ip0, adj0)
810 : adj0->saved_lookup_next_index;
813 /* Pop the trace data */
814 vlib_buffer_advance (b0, (hbh0->length+1)<<3);
815 new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
816 ((hbh0->length+1)<<3);
817 ip0->payload_length = clib_host_to_net_u16 (new_l0);
818 ip0->protocol = hbh0->protocol;
819 copy_src0 = (u64 *)ip0;
820 copy_dst0 = copy_src0 + (hbh0->length+1);
821 copy_dst0 [4] = copy_src0[4];
822 copy_dst0 [3] = copy_src0[3];
823 copy_dst0 [2] = copy_src0[2];
824 copy_dst0 [1] = copy_src0[1];
825 copy_dst0 [0] = copy_src0[0];
830 next0 = adj0->saved_lookup_next_index;
834 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
835 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
837 ip6_pop_hop_by_hop_trace_t *t =
838 vlib_add_trace (vm, node, b0, sizeof (*t));
839 t->next_index = next0;
842 /* verify speculative enqueue, maybe switch current next frame */
843 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
844 to_next, n_left_to_next,
848 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
851 vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
852 IP6_POP_HOP_BY_HOP_ERROR_PROCESSED, processed);
853 vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
854 IP6_POP_HOP_BY_HOP_ERROR_NO_HOHO, no_header);
855 return frame->n_vectors;
858 VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = {
859 .function = ip6_pop_hop_by_hop_node_fn,
860 .name = "ip6-pop-hop-by-hop",
861 .vector_size = sizeof (u32),
862 .format_trace = format_ip6_pop_hop_by_hop_trace,
863 .type = VLIB_NODE_TYPE_INTERNAL,
865 .n_errors = ARRAY_LEN(ip6_pop_hop_by_hop_error_strings),
866 .error_strings = ip6_pop_hop_by_hop_error_strings,
868 /* See ip/lookup.h */
869 .n_next_nodes = IP_LOOKUP_N_NEXT,
871 [IP_LOOKUP_NEXT_MISS] = "ip6-miss",
872 [IP_LOOKUP_NEXT_DROP] = "ip6-drop",
873 [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",
874 [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",
875 [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",
876 [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",
877 [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify",
878 [IP_LOOKUP_NEXT_MAP] = "ip6-map",
879 [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",
880 [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",
881 /* Next 3 arcs probably never used */
882 [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",
883 [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop",
884 [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop",
889 static clib_error_t *
890 ip6_hop_by_hop_init (vlib_main_t * vm)
892 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
895 hm->vnet_main = vnet_get_main();
900 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
902 int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_option_elts, int has_pow_option)
906 ip6_hop_by_hop_header_t *hbh;
907 ioam_trace_option_t * trace_option;
908 ioam_pow_option_t * pow_option;
913 if (trace_option_elts == 0 && has_pow_option == 0)
916 if (trace_option_elts * sizeof (ioam_data_list_element_t) > 254)
917 return VNET_API_ERROR_INVALID_VALUE;
919 /* Work out how much space we need */
920 size = sizeof (ip6_hop_by_hop_header_t);
922 if (trace_option_elts)
924 size += sizeof (ip6_hop_by_hop_option_t);
925 size += trace_option_elts * (sizeof (ioam_data_list_element_t));
929 size += sizeof (ip6_hop_by_hop_option_t);
930 size += sizeof (ioam_pow_option_t);
933 /* Round to a multiple of 8 octets */
934 rnd_size = (size + 7) & ~7;
936 /* allocate it, zero-fill / pad by construction */
937 vec_validate (rewrite, rnd_size-1);
939 hbh = (ip6_hop_by_hop_header_t *) rewrite;
940 /* Length of header in 8 octet units, not incl first 8 octets */
941 hbh->length = (rnd_size>>3) - 1;
942 current = (u8 *)(hbh+1);
944 if (trace_option_elts)
946 trace_option = (ioam_trace_option_t *)current;
947 trace_option->hdr.type = HBH_OPTION_TYPE_IOAM_DATA_LIST
948 | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
949 trace_option->hdr.length = 1 /*data_list_elts_left */ +
950 trace_option_elts * sizeof (ioam_data_list_element_t);
951 trace_option->data_list_elts_left = trace_option_elts;
952 current += sizeof (ioam_trace_option_t) +
953 trace_option_elts * sizeof (ioam_data_list_element_t);
957 pow_option = (ioam_pow_option_t *)current;
958 pow_option->hdr.type = HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK
959 | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
960 pow_option->hdr.length = sizeof (ioam_pow_option_t) -
961 sizeof (ip6_hop_by_hop_option_t);
962 current += sizeof (ioam_pow_option_t);
969 static clib_error_t *
970 ip6_ioam_set_rewrite_command_fn (vlib_main_t * vm,
971 unformat_input_t * input,
972 vlib_cli_command_t * cmd)
974 ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
975 u32 trace_option_elts = 0;
976 int has_pow_option = 0;
979 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
981 if (unformat (input, "trace-elts %d", &trace_option_elts))
983 else if (unformat (input, "pow"))
989 rv = ip6_ioam_set_rewrite (&hm->rewrite, trace_option_elts, has_pow_option);
996 return clib_error_return (0, "ip6_ioam_set_rewrite returned %d", rv);
1002 VLIB_CLI_COMMAND (ip6_ioam_set_rewrite_cmd, static) = {
1003 .path = "ioam set rewrite",
1004 .short_help = "ioam set rewrite [trace-elts <nn>] [pow]",
1005 .function = ip6_ioam_set_rewrite_command_fn,
1008 int ip6_ioam_set_destination (ip6_address_t *addr, u32 mask_width, u32 vrf_id,
1009 int is_add, int is_pop, int is_none)
1011 ip6_main_t * im = &ip6_main;
1012 ip_lookup_main_t * lm = &im->lookup_main;
1013 ip_adjacency_t * adj;
1018 BVT(clib_bihash_kv) kv, value;
1020 if ((is_add + is_pop + is_none) != 1)
1021 return VNET_API_ERROR_INVALID_VALUE_2;
1023 /* Go find the adjacency we're supposed to tickle */
1024 p = hash_get (im->fib_index_by_table_id, vrf_id);
1027 return VNET_API_ERROR_NO_SUCH_FIB;
1031 len = vec_len (im->prefix_lengths_in_search_order);
1033 for (i = 0; i < len; i++)
1035 int dst_address_length = im->prefix_lengths_in_search_order[i];
1036 ip6_address_t * mask = &im->fib_masks[dst_address_length];
1038 if (dst_address_length != mask_width)
1041 kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
1042 kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
1043 kv.key[2] = ((u64)((fib_index))<<32) | dst_address_length;
1045 rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value);
1050 return VNET_API_ERROR_NO_SUCH_ENTRY;
1054 /* Got it, modify as directed... */
1055 adj_index = value.value;
1056 adj = ip_get_adjacency (lm, adj_index);
1058 /* Restore original lookup-next action */
1059 if (adj->saved_lookup_next_index)
1061 adj->lookup_next_index = adj->saved_lookup_next_index;
1062 adj->saved_lookup_next_index = 0;
1065 /* Save current action */
1066 if (is_add || is_pop)
1067 adj->saved_lookup_next_index = adj->lookup_next_index;
1070 adj->lookup_next_index = IP_LOOKUP_NEXT_ADD_HOP_BY_HOP;
1073 adj->lookup_next_index = IP_LOOKUP_NEXT_POP_HOP_BY_HOP;
1078 static clib_error_t *
1079 ip6_ioam_set_destination_command_fn (vlib_main_t * vm,
1080 unformat_input_t * input,
1081 vlib_cli_command_t * cmd)
1084 u32 mask_width = ~0;
1091 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1093 if (unformat (input, "%U/%d",
1094 unformat_ip6_address, &addr, &mask_width))
1096 else if (unformat (input, "vrf-id %d", &vrf_id))
1098 else if (unformat (input, "add"))
1100 else if (unformat (input, "pop"))
1102 else if (unformat (input, "none"))
1108 if ((is_add + is_pop + is_none) != 1)
1109 return clib_error_return (0, "One of (add, pop, none) required");
1110 if (mask_width == ~0)
1111 return clib_error_return (0, "<address>/<mask-width> required");
1113 rv = ip6_ioam_set_destination (&addr, mask_width, vrf_id,
1114 is_add, is_pop, is_none);
1121 return clib_error_return (0, "ip6_ioam_set_destination returned %d", rv);
1127 VLIB_CLI_COMMAND (ip6_ioam_set_destination_cmd, static) = {
1128 .path = "ioam set destination",
1129 .short_help = "ioam set destination <ip6-address>/<width> add | pop | none",
1130 .function = ip6_ioam_set_destination_command_fn,
1133 void vnet_register_ioam_end_of_path_callback (void *cb)
1135 ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
1137 hm->ioam_end_of_path_cb = cb;