2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * An adjacency is a representation of an attached L3 peer.
18 * Adjacency Sub-types:
19 * - neighbour: a representation of an attached L3 peer.
20 * Key:{addr,interface,link/ether-type}
22 * - glean: used to drive ARP/ND for packets destined to a local sub-net.
23 * 'glean' mean use the packet's destination address as the target
24 * address in the ARP packet.
25 * UNSHARED. Only one per-interface.
26 * - midchain: a neighbour adj on a virtual/tunnel interface.
28 * The API to create and update the adjacency is very sub-type specific. This
29 * is intentional as it encourages the user to carefully consider which adjacency
30 * sub-type they are really using, and hence assign it data in the appropriate
31 * sub-type space in the union of sub-types. This prevents the adj becoming a
32 * disorganised dumping group for 'my features needs a u16 somewhere' data. It
33 * is important to enforce this approach as space in the adjacency is a premium,
34 * as we need it to fit in 1 cache line.
36 * the API is also based around an index to an adjacency not a raw pointer. This
37 * is so the user doesn't suffer the same limp inducing firearm injuries that
38 * the author suffered as the adjacencies can realloc.
44 #include <vnet/adj/adj_types.h>
45 #include <vnet/adj/adj_nbr.h>
46 #include <vnet/adj/adj_glean.h>
47 #include <vnet/adj/rewrite.h>
49 /** @brief Common (IP4/IP6) next index stored in adjacency. */
52 /** Adjacency to drop this packet. */
54 /** Adjacency to punt this packet. */
57 /** This packet is for one of our own IP addresses. */
60 /** This packet matches an "incomplete adjacency" and packets
61 need to be passed to ARP to find rewrite string for
65 /** This packet matches an "interface route" and packets
66 need to be passed to ARP to find rewrite string for
70 /** This packet is to be rewritten and forwarded to the next
71 processing node. This is typically the output interface but
72 might be another node for further output processing. */
73 IP_LOOKUP_NEXT_REWRITE,
75 /** This packets follow a mid-chain adjacency */
76 IP_LOOKUP_NEXT_MIDCHAIN,
78 /** This packets needs to go to ICMP error */
79 IP_LOOKUP_NEXT_ICMP_ERROR,
81 /** Multicast Adjacency. */
84 /** Broadcast Adjacency. */
87 /** Multicast Midchain Adjacency. An Adjacency for sending multicast packets
88 * on a tunnel/virtual interface */
89 IP_LOOKUP_NEXT_MCAST_MIDCHAIN,
92 } __attribute__ ((packed)) ip_lookup_next_t;
96 IP4_LOOKUP_N_NEXT = IP_LOOKUP_N_NEXT,
101 /* Hop-by-hop header handling */
102 IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT,
103 IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP,
104 IP6_LOOKUP_NEXT_POP_HOP_BY_HOP,
108 #define IP4_LOOKUP_NEXT_NODES { \
109 [IP_LOOKUP_NEXT_DROP] = "ip4-drop", \
110 [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", \
111 [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \
112 [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \
113 [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \
114 [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite", \
115 [IP_LOOKUP_NEXT_MCAST] = "ip4-rewrite-mcast", \
116 [IP_LOOKUP_NEXT_BCAST] = "ip4-rewrite-bcast", \
117 [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \
118 [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip4-mcast-midchain", \
119 [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \
122 #define IP6_LOOKUP_NEXT_NODES { \
123 [IP_LOOKUP_NEXT_DROP] = "ip6-drop", \
124 [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", \
125 [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", \
126 [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \
127 [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean", \
128 [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \
129 [IP_LOOKUP_NEXT_BCAST] = "ip6-rewrite-bcast", \
130 [IP_LOOKUP_NEXT_MCAST] = "ip6-rewrite-mcast", \
131 [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \
132 [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip6-mcast-midchain", \
133 [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \
134 [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \
135 [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \
136 [IP6_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", \
140 * The special broadcast address (to construct a broadcast adjacency
142 extern const ip46_address_t ADJ_BCAST_ADDR;
145 * Forward declaration
147 struct ip_adjacency_t_;
150 * @brief A function type for post-rewrite fixups on midchain adjacency
152 typedef void (*adj_midchain_fixup_t) (vlib_main_t * vm,
153 const struct ip_adjacency_t_ * adj,
158 * @brief Flags on an IP adjacency
160 typedef enum adj_attr_t_
163 * Currently a sync walk is active. Used to prevent re-entrant walking
165 ADJ_ATTR_SYNC_WALK_ACTIVE = 0,
168 * Packets TX through the midchain do not increment the interface
169 * counters. This should be used when the adj is associated with an L2
170 * interface and that L2 interface is in a bridge domain. In that case
171 * the packet will have traversed the interface's TX node, and hence have
172 * been counted, before it traverses ths midchain
174 ADJ_ATTR_MIDCHAIN_NO_COUNT,
176 * When stacking midchains on a fib-entry extract the choice from the
177 * load-balance returned based on an IP hash of the adj's rewrite
179 ADJ_ATTR_MIDCHAIN_IP_STACK,
181 * If the midchain were to stack on its FIB entry a loop would form.
183 ADJ_ATTR_MIDCHAIN_LOOPED,
185 * the fixup function is standard IP4o4 header
187 ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR,
189 * the fixup function performs the flow hash
190 * this means the flow hash is performed on the inner
191 * header, where the entropy is higher.
193 ADJ_ATTR_MIDCHAIN_FIXUP_FLOW_HASH,
196 #define ADJ_ATTR_NAMES { \
197 [ADJ_ATTR_SYNC_WALK_ACTIVE] = "walk-active", \
198 [ADJ_ATTR_MIDCHAIN_NO_COUNT] = "midchain-no-count", \
199 [ADJ_ATTR_MIDCHAIN_IP_STACK] = "midchain-ip-stack", \
200 [ADJ_ATTR_MIDCHAIN_LOOPED] = "midchain-looped", \
201 [ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR] = "midchain-ip4o4-hdr-fixup", \
202 [ADJ_ATTR_MIDCHAIN_FIXUP_FLOW_HASH] = "midchain-flow-hash", \
205 #define FOR_EACH_ADJ_ATTR(_attr) \
206 for (_attr = ADJ_ATTR_SYNC_WALK_ACTIVE; \
207 _attr <= ADJ_ATTR_MIDCHAIN_FIXUP_FLOW_HASH; \
211 * @brief Flags on an IP adjacency
213 typedef enum adj_flags_t_
216 ADJ_FLAG_SYNC_WALK_ACTIVE = (1 << ADJ_ATTR_SYNC_WALK_ACTIVE),
217 ADJ_FLAG_MIDCHAIN_NO_COUNT = (1 << ADJ_ATTR_MIDCHAIN_NO_COUNT),
218 ADJ_FLAG_MIDCHAIN_IP_STACK = (1 << ADJ_ATTR_MIDCHAIN_IP_STACK),
219 ADJ_FLAG_MIDCHAIN_LOOPED = (1 << ADJ_ATTR_MIDCHAIN_LOOPED),
220 ADJ_FLAG_MIDCHAIN_FIXUP_IP4O4_HDR = (1 << ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR),
221 ADJ_FLAG_MIDCHAIN_FIXUP_FLOW_HASH = (1 << ADJ_ATTR_MIDCHAIN_FIXUP_FLOW_HASH),
222 } __attribute__ ((packed)) adj_flags_t;
225 * @brief Format adjacency flags
227 extern u8* format_adj_flags(u8 * s, va_list * args);
230 * @brief IP unicast adjacency.
231 * @note cache aligned.
233 * An adjacency is a representation of a peer on a particular link.
235 typedef struct ip_adjacency_t_
237 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
240 * Linkage into the FIB node graph. First member since this type
241 * has 8 byte alignment requirements.
245 * feature [arc] config index
252 * IP_LOOKUP_NEXT_ARP/IP_LOOKUP_NEXT_REWRITE
254 * neighbour adjacency sub-type;
258 ip46_address_t next_hop;
261 * IP_LOOKUP_NEXT_MIDCHAIN
263 * A nbr adj that is also recursive. Think tunnels.
264 * A nbr adj can transition to be of type MIDCHAIN
265 * so be sure to leave the two structs with the next_hop
271 * The recursive next-hop.
272 * This field MUST be at the same memory location as
273 * sub_type.nbr.next_hop
275 ip46_address_t next_hop;
277 * The next DPO to use
281 * A function to perform the post-rewrite fixup
283 adj_midchain_fixup_t fixup_func;
285 * Fixup data passed back to the client in the fixup function
287 const void *fixup_data;
289 * the FIB entry this midchain resolves through. required for recursive
292 fib_node_index_t fei;
295 u8 __ia_midchain_pad[4];
299 * IP_LOOKUP_NEXT_GLEAN
301 * Glean the address to ARP for from the packet's destination.
302 * Technically these aren't adjacencies, i.e. they are not a
303 * representation of a peer. One day we might untangle this coupling
304 * and use a new Glean DPO.
312 CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
314 /** Rewrite in second and third cache lines */
315 VNET_DECLARE_REWRITE;
318 * more control plane members that do not fit on the first cacheline
320 CLIB_CACHE_LINE_ALIGN_MARK (cacheline3);
323 * A sorted vector of delegates
325 struct adj_delegate_t_ *ia_delegates;
328 * The VLIB node in which this adj is used to forward packets
333 * Next hop after ip4-lookup.
334 * This is not accessed in the rewrite nodes.
337 ip_lookup_next_t lookup_next_index;
346 * The protocol of the neighbor/peer. i.e. the protocol with
347 * which to interpret the 'next-hop' attributes of the sub-types.
350 fib_protocol_t ia_nh_proto;
353 * Flags on the adjacency
356 adj_flags_t ia_flags;
359 * Free space on the fourth cacheline (not used in the DP)
364 STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline0) == 0),
365 "IP adjacency cacheline 0 is not offset");
366 STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline1) ==
367 CLIB_CACHE_LINE_BYTES),
368 "IP adjacency cacheline 1 is more than one cacheline size offset");
369 #if defined __x86_64__
370 STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline3) ==
371 3 * CLIB_CACHE_LINE_BYTES),
372 "IP adjacency cacheline 3 is more than one cacheline size offset");
373 /* An adj fits into 4 cachelines on your average machine */
374 STATIC_ASSERT_SIZEOF (ip_adjacency_t, 4 * 64);
379 * Take a reference counting lock on the adjacency
381 extern void adj_lock(adj_index_t adj_index);
384 * Release a reference counting lock on the adjacency
386 extern void adj_unlock(adj_index_t adj_index);
390 * Add a child dependent to an adjacency. The child will
391 * thus be informed via its registered back-walk function
392 * when the adjacency state changes.
394 extern u32 adj_child_add(adj_index_t adj_index,
395 fib_node_type_t type,
396 fib_node_index_t child_index);
399 * Remove a child dependent
401 extern void adj_child_remove(adj_index_t adj_index,
405 * @brief Walk the Adjacencies on a given interface
407 extern void adj_walk (u32 sw_if_index,
412 * @brief Return the link type of the adjacency
414 extern vnet_link_t adj_get_link_type (adj_index_t ai);
417 * @brief Return the sw interface index of the adjacency.
419 extern u32 adj_get_sw_if_index (adj_index_t ai);
422 * @brief Return true if the adjacency is 'UP', i.e. can be used for forwarding.
423 * 0 is down, !0 is up.
425 extern int adj_is_up (adj_index_t ai);
428 * @brief Return the link type of the adjacency
430 extern const u8* adj_get_rewrite (adj_index_t ai);
433 * @brief descend the FIB graph looking for loops
436 * The adj index to traverse
438 * @param entry_indicies)
439 * A pointer to a vector of FIB entries already visited.
441 extern int adj_recursive_loop_detect (adj_index_t ai,
442 fib_node_index_t **entry_indicies);
446 * The global adjacency pool. Exposed for fast/inline data-plane access
448 extern ip_adjacency_t *adj_pool;
452 * Adjacency packet counters
454 extern vlib_combined_counter_main_t adjacency_counters;
457 * @brief Global Config for enabling per-adjacency counters
458 * This is configurable because it comes with a non-negligible
459 * performance cost. */
460 extern int adj_per_adj_counters;
464 * Get a pointer to an adjacency object from its index
466 static inline ip_adjacency_t *
467 adj_get (adj_index_t adj_index)
469 return (pool_elt_at_index(adj_pool, adj_index));
473 adj_is_valid(adj_index_t adj_index)
475 return !(pool_is_free_index(adj_pool, adj_index));
479 * @brief Get the global configuration option for enabling per-adj counters
482 adj_are_counters_enabled (void)
484 return (adj_per_adj_counters);