2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/adj/adj_nbr.h>
17 #include <vnet/adj/adj_internal.h>
18 #include <vnet/adj/adj_l2.h>
19 #include <vnet/adj/adj_nsh.h>
20 #include <vnet/adj/adj_midchain.h>
21 #include <vnet/ethernet/arp_packet.h>
22 #include <vnet/dpo/drop_dpo.h>
23 #include <vnet/dpo/load_balance.h>
24 #include <vnet/fib/fib_walk.h>
25 #include <vnet/fib/fib_entry.h>
26 #include <vnet/ip/ip4_inlines.h>
27 #include <vnet/ip/ip6_inlines.h>
30 * @brief Trace data for packets traversing the midchain tx node
32 typedef struct adj_midchain_tx_trace_t_
35 * @brief the midchain adj we are traversing
38 } adj_midchain_tx_trace_t;
41 adj_midchain_tx_inline (vlib_main_t * vm,
42 vlib_node_runtime_t * node,
46 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
47 u16 nexts[VLIB_FRAME_SIZE], *next;
48 u32 * from, n_left, thread_index;
49 vnet_main_t *vnm = vnet_get_main ();
50 vnet_interface_main_t *im = &vnm->interface_main;
52 thread_index = vm->thread_index;
53 n_left = frame->n_vectors;
54 from = vlib_frame_vector_args (frame);
56 vlib_get_buffers (vm, from, bufs, n_left);
63 u32 adj_index0, adj_index1, adj_index2, adj_index3;
64 const ip_adjacency_t *adj0, *adj1, *adj2, *adj3;
65 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
67 /* Prefetch next iteration. */
69 vlib_prefetch_buffer_header (b[4], LOAD);
70 vlib_prefetch_buffer_header (b[5], LOAD);
71 vlib_prefetch_buffer_header (b[6], LOAD);
72 vlib_prefetch_buffer_header (b[7], LOAD);
75 /* Follow the DPO on which the midchain is stacked */
76 adj_index0 = vnet_buffer(b[0])->ip.adj_index[VLIB_TX];
77 adj_index1 = vnet_buffer(b[1])->ip.adj_index[VLIB_TX];
78 adj_index2 = vnet_buffer(b[2])->ip.adj_index[VLIB_TX];
79 adj_index3 = vnet_buffer(b[3])->ip.adj_index[VLIB_TX];
81 adj0 = adj_get(adj_index0);
82 adj1 = adj_get(adj_index1);
83 adj2 = adj_get(adj_index2);
84 adj3 = adj_get(adj_index3);
86 dpo0 = &adj0->sub_type.midchain.next_dpo;
87 dpo1 = &adj1->sub_type.midchain.next_dpo;
88 dpo2 = &adj2->sub_type.midchain.next_dpo;
89 dpo3 = &adj3->sub_type.midchain.next_dpo;
91 next[0] = dpo0->dpoi_next_node;
92 next[1] = dpo1->dpoi_next_node;
93 next[2] = dpo2->dpoi_next_node;
94 next[3] = dpo3->dpoi_next_node;
96 vnet_buffer(b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
97 vnet_buffer(b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
98 vnet_buffer(b[2])->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
99 vnet_buffer(b[3])->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
103 vlib_increment_combined_counter (im->combined_sw_if_counters
104 + VNET_INTERFACE_COUNTER_TX,
106 adj0->rewrite_header.sw_if_index,
108 vlib_buffer_length_in_chain (vm, b[0]));
109 vlib_increment_combined_counter (im->combined_sw_if_counters
110 + VNET_INTERFACE_COUNTER_TX,
112 adj1->rewrite_header.sw_if_index,
114 vlib_buffer_length_in_chain (vm, b[1]));
115 vlib_increment_combined_counter (im->combined_sw_if_counters
116 + VNET_INTERFACE_COUNTER_TX,
118 adj2->rewrite_header.sw_if_index,
120 vlib_buffer_length_in_chain (vm, b[2]));
121 vlib_increment_combined_counter (im->combined_sw_if_counters
122 + VNET_INTERFACE_COUNTER_TX,
124 adj3->rewrite_header.sw_if_index,
126 vlib_buffer_length_in_chain (vm, b[3]));
129 if (PREDICT_FALSE(node->flags & VLIB_NODE_FLAG_TRACE))
131 if (PREDICT_FALSE(b[0]->flags & VLIB_BUFFER_IS_TRACED))
133 adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
137 if (PREDICT_FALSE(b[1]->flags & VLIB_BUFFER_IS_TRACED))
139 adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
143 if (PREDICT_FALSE(b[2]->flags & VLIB_BUFFER_IS_TRACED))
145 adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
149 if (PREDICT_FALSE(b[3]->flags & VLIB_BUFFER_IS_TRACED))
151 adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
163 const ip_adjacency_t * adj0;
164 const dpo_id_t *dpo0;
167 /* Follow the DPO on which the midchain is stacked */
168 adj_index0 = vnet_buffer(b[0])->ip.adj_index[VLIB_TX];
169 adj0 = adj_get(adj_index0);
170 dpo0 = &adj0->sub_type.midchain.next_dpo;
171 next[0] = dpo0->dpoi_next_node;
172 vnet_buffer(b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
176 vlib_increment_combined_counter (im->combined_sw_if_counters
177 + VNET_INTERFACE_COUNTER_TX,
179 adj0->rewrite_header.sw_if_index,
181 vlib_buffer_length_in_chain (vm, b[0]));
184 if (PREDICT_FALSE(b[0]->flags & VLIB_BUFFER_IS_TRACED))
186 adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
196 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
198 return frame->n_vectors;
202 format_adj_midchain_tx_trace (u8 * s, va_list * args)
204 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
205 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
206 adj_midchain_tx_trace_t *tr = va_arg (*args, adj_midchain_tx_trace_t*);
208 s = format(s, "adj-midchain:[%d]:%U", tr->ai,
209 format_ip_adjacency, tr->ai,
210 FORMAT_IP_ADJACENCY_NONE);
216 adj_midchain_tx (vlib_main_t * vm,
217 vlib_node_runtime_t * node,
218 vlib_frame_t * frame)
220 return (adj_midchain_tx_inline(vm, node, frame, 1));
223 VLIB_REGISTER_NODE (adj_midchain_tx_node) = {
224 .function = adj_midchain_tx,
225 .name = "adj-midchain-tx",
226 .vector_size = sizeof (u32),
228 .format_trace = format_adj_midchain_tx_trace,
237 adj_midchain_tx_no_count (vlib_main_t * vm,
238 vlib_node_runtime_t * node,
239 vlib_frame_t * frame)
241 return (adj_midchain_tx_inline(vm, node, frame, 0));
244 VLIB_REGISTER_NODE (adj_midchain_tx_no_count_node) = {
245 .function = adj_midchain_tx_no_count,
246 .name = "adj-midchain-tx-no-count",
247 .vector_size = sizeof (u32),
249 .format_trace = format_adj_midchain_tx_trace,
250 .sibling_of = "adj-midchain-tx",
253 #ifndef CLIB_MARCH_VARIANT
256 adj_is_midchain (adj_index_t ai)
262 switch (adj->lookup_next_index)
264 case IP_LOOKUP_NEXT_MIDCHAIN:
265 case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
267 case IP_LOOKUP_NEXT_ARP:
268 case IP_LOOKUP_NEXT_GLEAN:
269 case IP_LOOKUP_NEXT_BCAST:
270 case IP_LOOKUP_NEXT_MCAST:
271 case IP_LOOKUP_NEXT_DROP:
272 case IP_LOOKUP_NEXT_PUNT:
273 case IP_LOOKUP_NEXT_LOCAL:
274 case IP_LOOKUP_NEXT_REWRITE:
275 case IP_LOOKUP_NEXT_ICMP_ERROR:
276 case IP_LOOKUP_N_NEXT:
284 adj_get_midchain_node (vnet_link_t link)
288 return (ip4_midchain_node.index);
290 return (ip6_midchain_node.index);
292 return (mpls_midchain_node.index);
293 case VNET_LINK_ETHERNET:
294 return (adj_l2_midchain_node.index);
296 return (adj_nsh_midchain_node.index);
305 adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj)
308 switch (adj->ia_link)
312 arc = ip4_main.lookup_main.output_feature_arc_index;
317 arc = ip6_main.lookup_main.output_feature_arc_index;
322 arc = mpls_main.output_feature_arc_index;
325 case VNET_LINK_ETHERNET:
327 arc = ethernet_main.output_feature_arc_index;
332 arc = nsh_main_placeholder.output_feature_arc_index;
340 ASSERT (arc != (u8) ~0);
346 adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj)
348 return ((adj->ia_flags & ADJ_FLAG_MIDCHAIN_NO_COUNT) ?
349 adj_midchain_tx_no_count_node.index :
350 adj_midchain_tx_node.index);
356 * Setup the adj as a mid-chain
359 adj_midchain_teardown (ip_adjacency_t *adj)
361 vlib_main_t *vm = vlib_get_main();
363 dpo_reset(&adj->sub_type.midchain.next_dpo);
365 vlib_worker_thread_barrier_sync(vm);
366 adj->ia_cfg_index = vnet_feature_modify_end_node(
367 adj_midchain_get_feature_arc_index_for_link_type (adj),
368 adj->rewrite_header.sw_if_index,
369 vlib_get_node_by_name (vlib_get_main(),
370 (u8*) "interface-output")->index);
371 vlib_worker_thread_barrier_release(vm);
377 * Setup the adj as a mid-chain
380 adj_midchain_setup (adj_index_t adj_index,
381 adj_midchain_fixup_t fixup,
385 vlib_main_t *vm = vlib_get_main();
389 ASSERT(ADJ_INDEX_INVALID != adj_index);
391 adj = adj_get(adj_index);
393 adj->sub_type.midchain.fixup_func = fixup;
394 adj->sub_type.midchain.fixup_data = data;
395 adj->sub_type.midchain.fei = FIB_NODE_INDEX_INVALID;
396 adj->ia_flags |= flags;
398 if (flags & ADJ_FLAG_MIDCHAIN_FIXUP_IP4O4_HDR)
400 adj->rewrite_header.flags |= VNET_REWRITE_FIXUP_IP4_O_4;
404 adj->rewrite_header.flags &= ~VNET_REWRITE_FIXUP_IP4_O_4;
406 if (!(flags & ADJ_FLAG_MIDCHAIN_FIXUP_FLOW_HASH))
408 adj->rewrite_header.flags &= ~VNET_REWRITE_FIXUP_FLOW_HASH;
411 tx_node = adj_nbr_midchain_get_tx_node(adj);
413 vlib_worker_thread_barrier_sync(vm);
414 adj->ia_cfg_index = vnet_feature_modify_end_node(
415 adj_midchain_get_feature_arc_index_for_link_type (adj),
416 adj->rewrite_header.sw_if_index,
418 vlib_worker_thread_barrier_release(vm);
421 * stack the midchain on the drop so it's ready to forward in the adj-midchain-tx.
422 * The graph arc used/created here is from the midchain-tx node to the
423 * child's registered node. This is because post adj processing the next
424 * node are any output features, then the midchain-tx. from there we
425 * need to get to the stacked child's node.
427 dpo_stack_from_node(tx_node,
428 &adj->sub_type.midchain.next_dpo,
429 drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
433 * adj_nbr_midchain_update_rewrite
435 * Update the adjacency's rewrite string. A NULL string implies the
436 * rewrite is reset (i.e. when ARP/ND entry is gone).
437 * NB: the adj being updated may be handling traffic in the DP.
440 adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
441 adj_midchain_fixup_t fixup,
442 const void *fixup_data,
448 ASSERT(ADJ_INDEX_INVALID != adj_index);
450 adj = adj_get(adj_index);
453 * one time only update. since we don't support changing the tunnel
454 * src,dst, this is all we need.
456 if (adj->lookup_next_index != IP_LOOKUP_NEXT_MIDCHAIN &&
457 adj->lookup_next_index != IP_LOOKUP_NEXT_MCAST_MIDCHAIN)
459 adj_midchain_setup(adj_index, fixup, fixup_data, flags);
463 * update the rewrite with the workers paused.
465 adj_nbr_update_rewrite_internal(adj,
466 IP_LOOKUP_NEXT_MIDCHAIN,
467 adj_get_midchain_node(adj->ia_link),
468 adj_nbr_midchain_get_tx_node(adj),
473 adj_nbr_midchain_update_next_node (adj_index_t adj_index,
479 ASSERT(ADJ_INDEX_INVALID != adj_index);
481 adj = adj_get(adj_index);
482 vm = vlib_get_main();
484 vlib_worker_thread_barrier_sync(vm);
486 adj->rewrite_header.next_index = vlib_node_add_next(vlib_get_main(),
490 adj->ia_cfg_index = vnet_feature_modify_end_node(
491 adj_midchain_get_feature_arc_index_for_link_type (adj),
492 adj->rewrite_header.sw_if_index,
495 vlib_worker_thread_barrier_release(vm);
499 adj_nbr_midchain_reset_next_node (adj_index_t adj_index)
504 ASSERT(ADJ_INDEX_INVALID != adj_index);
506 adj = adj_get(adj_index);
507 vm = vlib_get_main();
509 vlib_worker_thread_barrier_sync(vm);
511 adj->rewrite_header.next_index =
512 vlib_node_add_next(vlib_get_main(),
514 adj_nbr_midchain_get_tx_node(adj));
516 adj->ia_cfg_index = vnet_feature_modify_end_node(
517 adj_midchain_get_feature_arc_index_for_link_type (adj),
518 adj->rewrite_header.sw_if_index,
519 adj_nbr_midchain_get_tx_node(adj));
521 vlib_worker_thread_barrier_release(vm);
525 * adj_nbr_midchain_unstack
527 * Unstack the adj. stack it on drop
530 adj_nbr_midchain_unstack (adj_index_t adj_index)
532 fib_node_index_t *entry_indicies, tmp;
535 ASSERT(ADJ_INDEX_INVALID != adj_index);
536 adj = adj_get (adj_index);
539 * check to see if this unstacking breaks a recursion loop
541 entry_indicies = NULL;
542 tmp = adj->sub_type.midchain.fei;
543 adj->sub_type.midchain.fei = FIB_NODE_INDEX_INVALID;
545 if (FIB_NODE_INDEX_INVALID != tmp)
547 fib_entry_recursive_loop_detect(tmp, &entry_indicies);
548 vec_free(entry_indicies);
554 dpo_stack(DPO_ADJACENCY_MIDCHAIN,
555 vnet_link_to_dpo_proto(adj->ia_link),
556 &adj->sub_type.midchain.next_dpo,
557 drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
558 CLIB_MEMORY_BARRIER();
562 adj_nbr_midchain_stack_on_fib_entry (adj_index_t ai,
563 fib_node_index_t fei,
564 fib_forward_chain_type_t fct)
566 fib_node_index_t *entry_indicies;
567 dpo_id_t tmp = DPO_INVALID;
573 * check to see if this stacking will form a recursion loop
575 entry_indicies = NULL;
576 adj->sub_type.midchain.fei = fei;
578 if (fib_entry_recursive_loop_detect(adj->sub_type.midchain.fei, &entry_indicies))
581 * loop formed, stack on the drop.
583 dpo_copy(&tmp, drop_dpo_get(fib_forw_chain_type_to_dpo_proto(fct)));
587 fib_entry_contribute_forwarding (fei, fct, &tmp);
589 if (DPO_LOAD_BALANCE == tmp.dpoi_type)
593 lb = load_balance_get (tmp.dpoi_index);
595 if ((adj->ia_flags & ADJ_FLAG_MIDCHAIN_IP_STACK) ||
596 lb->lb_n_buckets == 1)
599 * do that hash now and stack on the choice.
600 * If the choice is an incomplete adj then we will need a poke when
601 * it becomes complete. This happens since the adj update walk propagates
602 * as far a recursive paths.
604 const dpo_id_t *choice;
607 if (FIB_FORW_CHAIN_TYPE_UNICAST_IP4 == fct)
609 hash = ip4_compute_flow_hash ((ip4_header_t *) adj_get_rewrite (ai),
612 else if (FIB_FORW_CHAIN_TYPE_UNICAST_IP6 == fct)
614 hash = ip6_compute_flow_hash ((ip6_header_t *) adj_get_rewrite (ai),
623 choice = load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
624 dpo_copy (&tmp, choice);
626 else if (lb->lb_n_buckets > 1)
629 * the client has chosen not to use the stacking to select a
630 * bucket, and there are more than one buckets. there's no
631 * value in using the midchain's fixed rewrite string to select
632 * the path, so force a flow hash on the inner.
634 adj->rewrite_header.flags |= VNET_REWRITE_FIXUP_FLOW_HASH;
637 if (adj->ia_flags & ADJ_FLAG_MIDCHAIN_FIXUP_FLOW_HASH)
640 * The client, for reasons unbeknownst to adj, wants to force
641 * a flow hash on the inner, we will oblige.
643 adj->rewrite_header.flags |= VNET_REWRITE_FIXUP_FLOW_HASH;
647 adj_nbr_midchain_stack (ai, &tmp);
649 vec_free(entry_indicies);
653 * adj_nbr_midchain_stack
656 adj_nbr_midchain_stack (adj_index_t adj_index,
657 const dpo_id_t *next)
661 ASSERT(ADJ_INDEX_INVALID != adj_index);
663 adj = adj_get(adj_index);
665 ASSERT((IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index) ||
666 (IP_LOOKUP_NEXT_MCAST_MIDCHAIN == adj->lookup_next_index));
668 dpo_stack_from_node(adj_nbr_midchain_get_tx_node(adj),
669 &adj->sub_type.midchain.next_dpo,
674 adj_ndr_midchain_recursive_loop_detect (adj_index_t ai,
675 fib_node_index_t **entry_indicies)
677 fib_node_index_t *entry_index, *entries;
678 ip_adjacency_t * adj;
681 entries = *entry_indicies;
683 vec_foreach(entry_index, entries)
685 if (*entry_index == adj->sub_type.midchain.fei)
688 * The entry this midchain links to is already in the set
689 * of visited entries, this is a loop
691 adj->ia_flags |= ADJ_FLAG_MIDCHAIN_LOOPED;
696 adj->ia_flags &= ~ADJ_FLAG_MIDCHAIN_LOOPED;
701 format_adj_midchain (u8* s, va_list *ap)
703 index_t index = va_arg(*ap, index_t);
704 u32 indent = va_arg(*ap, u32);
705 ip_adjacency_t * adj = adj_get(index);
707 s = format (s, "%U", format_vnet_link, adj->ia_link);
708 if (adj->rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)
709 s = format(s, " [features]");
710 s = format (s, " via %U",
711 format_ip46_address, &adj->sub_type.nbr.next_hop,
712 adj_proto_to_46(adj->ia_nh_proto));
713 s = format (s, " %U",
715 &adj->rewrite_header, sizeof (adj->rewrite_data), indent);
716 s = format (s, "\n%Ustacked-on",
717 format_white_space, indent);
719 if (FIB_NODE_INDEX_INVALID != adj->sub_type.midchain.fei)
721 s = format (s, " entry:%d", adj->sub_type.midchain.fei);
724 s = format (s, ":\n%U%U",
725 format_white_space, indent+2,
726 format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2);
732 adj_dpo_lock (dpo_id_t *dpo)
734 adj_lock(dpo->dpoi_index);
737 adj_dpo_unlock (dpo_id_t *dpo)
739 adj_unlock(dpo->dpoi_index);
742 const static dpo_vft_t adj_midchain_dpo_vft = {
743 .dv_lock = adj_dpo_lock,
744 .dv_unlock = adj_dpo_unlock,
745 .dv_format = format_adj_midchain,
746 .dv_get_urpf = adj_dpo_get_urpf,
747 .dv_get_mtu = adj_dpo_get_mtu,
751 * @brief The per-protocol VLIB graph nodes that are assigned to a midchain
754 * this means that these graph nodes are ones from which a midchain is the
755 * parent object in the DPO-graph.
757 const static char* const midchain_ip4_nodes[] =
762 const static char* const midchain_ip6_nodes[] =
767 const static char* const midchain_mpls_nodes[] =
772 const static char* const midchain_ethernet_nodes[] =
777 const static char* const midchain_nsh_nodes[] =
783 const static char* const * const midchain_nodes[DPO_PROTO_NUM] =
785 [DPO_PROTO_IP4] = midchain_ip4_nodes,
786 [DPO_PROTO_IP6] = midchain_ip6_nodes,
787 [DPO_PROTO_MPLS] = midchain_mpls_nodes,
788 [DPO_PROTO_ETHERNET] = midchain_ethernet_nodes,
789 [DPO_PROTO_NSH] = midchain_nsh_nodes,
793 adj_midchain_module_init (void)
795 dpo_register(DPO_ADJACENCY_MIDCHAIN, &adj_midchain_dpo_vft, midchain_nodes);