2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip_lookup.h: ip (4 or 6) lookup structures, adjacencies, ...
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
42 * Definitions for all things IP (v4|v6) unicast and multicast lookup related.
44 * - Adjacency definitions and registration.
45 * - Callbacks on route add.
46 * - Callbacks on interface address change.
49 #ifndef included_ip_lookup_h
50 #define included_ip_lookup_h
52 #include <vnet/vnet.h>
53 #include <vlib/buffer.h>
54 #include <vnet/ip/ip4_packet.h>
55 #include <vnet/ip/ip6_packet.h>
57 /** @brief Common (IP4/IP6) next index stored in adjacency. */
59 /** Packet does not match any route in table. */
62 /** Adjacency to drop this packet. */
64 /** Adjacency to punt this packet. */
67 /** This packet is for one of our own IP addresses. */
70 /** This packet matches an "interface route" and packets
71 need to be passed to ARP to find rewrite string for
75 /** This packet is to be rewritten and forwarded to the next
76 processing node. This is typically the output interface but
77 might be another node for further output processing. */
78 IP_LOOKUP_NEXT_REWRITE,
80 /** This packet needs to be classified */
81 IP_LOOKUP_NEXT_CLASSIFY,
83 /** This packet needs to go to MAP - RFC7596, RFC7597 */
86 /** This packet needs to go to MAP with Translation - RFC7599 */
89 /** This packets needs to go to indirect next hop */
90 IP_LOOKUP_NEXT_INDIRECT,
92 /** This packets needs to go to ICMP error */
93 IP_LOOKUP_NEXT_ICMP_ERROR,
99 IP4_LOOKUP_N_NEXT = IP_LOOKUP_N_NEXT,
103 /** Hop-by-hop header handling */
104 IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT,
105 IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP,
106 IP6_LOOKUP_NEXT_POP_HOP_BY_HOP,
110 #define IP4_LOOKUP_NEXT_NODES { \
111 [IP_LOOKUP_NEXT_MISS] = "ip4-miss", \
112 [IP_LOOKUP_NEXT_DROP] = "ip4-drop", \
113 [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", \
114 [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \
115 [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \
116 [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", \
117 [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify", \
118 [IP_LOOKUP_NEXT_MAP] = "ip4-map", \
119 [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t", \
120 [IP_LOOKUP_NEXT_INDIRECT] = "ip4-indirect", \
121 [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \
124 #define IP6_LOOKUP_NEXT_NODES { \
125 [IP_LOOKUP_NEXT_MISS] = "ip6-miss", \
126 [IP_LOOKUP_NEXT_DROP] = "ip6-drop", \
127 [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", \
128 [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", \
129 [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \
130 [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \
131 [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", \
132 [IP_LOOKUP_NEXT_MAP] = "ip6-map", \
133 [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t", \
134 [IP_LOOKUP_NEXT_INDIRECT] = "ip6-indirect", \
135 [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \
136 [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \
137 [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \
138 [IP6_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", \
141 /** Flow hash configuration */
142 #define IP_FLOW_HASH_SRC_ADDR (1<<0)
143 #define IP_FLOW_HASH_DST_ADDR (1<<1)
144 #define IP_FLOW_HASH_PROTO (1<<2)
145 #define IP_FLOW_HASH_SRC_PORT (1<<3)
146 #define IP_FLOW_HASH_DST_PORT (1<<4)
147 #define IP_FLOW_HASH_REVERSE_SRC_DST (1<<5)
149 /** Default: 5-tuple without the "reverse" bit */
150 #define IP_FLOW_HASH_DEFAULT (0x1F)
152 #define foreach_flow_hash_bit \
153 _(src, IP_FLOW_HASH_SRC_ADDR) \
154 _(dst, IP_FLOW_HASH_DST_ADDR) \
155 _(sport, IP_FLOW_HASH_SRC_PORT) \
156 _(dport, IP_FLOW_HASH_DST_PORT) \
157 _(proto, IP_FLOW_HASH_PROTO) \
158 _(reverse, IP_FLOW_HASH_REVERSE_SRC_DST)
160 #define IP_ADJACENCY_OPAQUE_SZ 16
161 /** @brief IP unicast adjacency.
165 CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
166 /** Handle for this adjacency in adjacency heap. */
169 STRUCT_MARK(signature_start);
171 /** Interface address index for this local/arp adjacency. */
172 u32 if_address_index;
174 /** Number of adjecencies in block. Greater than 1 means multipath;
175 otherwise equal to 1. */
178 /** Next hop after ip4-lookup. */
180 ip_lookup_next_t lookup_next_index : 16;
181 u16 lookup_next_index_as_int;
184 /** Force re-lookup in a different FIB. ~0 => normal behavior */
185 i16 explicit_fib_index;
186 u16 mcast_group_index;
188 /** Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */
189 u16 saved_lookup_next_index;
192 /** IP_LOOKUP_NEXT_ARP only */
194 ip46_address_t next_hop;
196 /** IP_LOOKUP_NEXT_CLASSIFY only */
200 /** IP_LOOKUP_NEXT_INDIRECT only */
202 ip46_address_t next_hop;
204 u8 opaque[IP_ADJACENCY_OPAQUE_SZ];
207 /** @brief Special format function for this adjacency.
208 * Specifically good for cases which use the entire rewrite
209 * for their own purposes. Can easily reduce to a u16 or a u8 if/when
210 * the first cache line reads "full" on the free space gas gauge.
212 u32 special_adjacency_format_function_index; /* 0 is invalid */
213 STRUCT_MARK(signature_end);
215 /** Number of FIB entries sharing this adjacency */
217 /** Use this adjacency instead */
218 u32 next_adj_with_signature;
220 CLIB_CACHE_LINE_ALIGN_MARK(cacheline1);
222 /** Rewrite in second/third cache lines */
223 vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE);
227 vnet_ip_adjacency_signature (ip_adjacency_t * adj)
229 uword signature = 0xfeedfaceULL;
231 /* Skip heap handle, sum everything up to but not including share_count */
232 signature = hash_memory
233 (STRUCT_MARK_PTR(adj, signature_start),
234 STRUCT_OFFSET_OF(ip_adjacency_t, signature_end)
235 - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start),
238 /* and the rewrite */
239 signature = hash_memory (&adj->rewrite_header, VLIB_BUFFER_PRE_DATA_SIZE,
245 vnet_ip_adjacency_share_compare (ip_adjacency_t * a1, ip_adjacency_t *a2)
247 if (memcmp (STRUCT_MARK_PTR(a1, signature_start),
248 STRUCT_MARK_PTR(a2, signature_start),
249 STRUCT_OFFSET_OF(ip_adjacency_t, signature_end)
250 - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start)))
252 if (memcmp (&a1->rewrite_header, &a2->rewrite_header,
253 VLIB_BUFFER_PRE_DATA_SIZE))
258 /* Index into adjacency table. */
259 typedef u32 ip_adjacency_index_t;
262 /* Directly connected next-hop adjacency index. */
263 u32 next_hop_adj_index;
265 /* Path weight for this adjacency. */
267 } ip_multipath_next_hop_t;
270 /* Adjacency index of first index in block. */
273 /* Power of 2 size of adjacency block. */
276 /* Number of prefixes that point to this adjacency. */
279 /* Normalized next hops are used as hash keys: they are sorted by weight
280 and weights are chosen so they add up to 1 << log2_n_adj_in_block (with
281 zero-weighted next hops being deleted).
282 Unnormalized next hops are saved so that control plane has a record of exactly
283 what the RIB told it. */
285 /* Number of hops in the multipath. */
288 /* Offset into next hop heap for this block. */
291 /* Heap handle used to for example free block when we're done with it. */
293 } normalized_next_hops, unnormalized_next_hops;
294 } ip_multipath_adjacency_t;
296 /* IP multicast adjacency. */
298 /* Handle for this adjacency in adjacency heap. */
301 /* Number of adjecencies in block. */
304 /* Rewrite string. */
305 vnet_declare_rewrite (64 - 2*sizeof(u32));
306 } ip_multicast_rewrite_t;
309 /* ip4-multicast-rewrite next index. */
314 u8 rewrite_string[64 - 1*sizeof(u32) - 1*sizeof(u8)];
315 } ip_multicast_rewrite_string_t;
318 ip_multicast_rewrite_t * rewrite_heap;
320 ip_multicast_rewrite_string_t * rewrite_strings;
322 /* Negative rewrite string index; >= 0 sw_if_index.
323 Sorted. Used to hash. */
324 i32 ** adjacency_id_vector;
326 uword * adjacency_by_id_vector;
327 } ip_multicast_lookup_main_t;
330 /* Key for mhash; in fact, just a byte offset into mhash key vector. */
333 /* Interface which has this address. */
336 /* Adjacency for neighbor probe (ARP) for this interface address. */
337 u32 neighbor_probe_adj_index;
339 /* Address (prefix) length for this interface. */
342 /* Will be used for something eventually. Primary vs. secondary? */
345 /* Next and previous pointers for doubly linked list of
346 addresses per software interface. */
347 u32 next_this_sw_interface;
348 u32 prev_this_sw_interface;
349 } ip_interface_address_t;
354 IP_LOCAL_NEXT_UDP_LOOKUP,
359 struct ip_lookup_main_t;
361 typedef void (* ip_add_del_adjacency_callback_t) (struct ip_lookup_main_t * lm,
363 ip_adjacency_t * adj,
367 vnet_config_main_t config_main;
369 u32 * config_index_by_sw_if_index;
373 * This structure is used to dynamically register a custom adjacency
375 * Typically used with
376 * VNET_IP4_REGISTER_ADJACENCY or
377 * VNET_IP6_REGISTER_ADJACENCY macros.
379 typedef struct ip_adj_register_struct {
380 /** Name of the node for this registered adjacency. */
383 /** Formatting function for the adjacency.
384 * Variadic arguments given to the function are:
385 * - struct ip_lookup_main_t *
386 * - ip_adjacency_t *adj
388 format_function_t *fn;
391 * When the adjacency is registered, the ip-lookup next index will
392 * be written where this pointer points.
396 struct ip_adj_register_struct *next;
399 typedef struct ip_lookup_main_t {
400 /** Adjacency heap. */
401 ip_adjacency_t * adjacency_heap;
403 /** Adjacency packet/byte counters indexed by adjacency index. */
404 vlib_combined_counter_main_t adjacency_counters;
406 /** Heap of (next hop, weight) blocks. Sorted by next hop. */
407 ip_multipath_next_hop_t * next_hop_heap;
409 /** Indexed by heap_handle from ip_adjacency_t. */
410 ip_multipath_adjacency_t * multipath_adjacencies;
412 /** Adjacency by signature hash */
413 uword * adj_index_by_signature;
415 /** any-tx-feature-enabled interface bitmap */
416 uword * tx_sw_if_has_ip_output_features;
418 /** count of enabled features, per sw_if_index, to maintain bitmap */
419 i16 * tx_feature_count_by_sw_if_index;
421 /** Temporary vectors for looking up next hops in hash. */
422 ip_multipath_next_hop_t * next_hop_hash_lookup_key;
423 ip_multipath_next_hop_t * next_hop_hash_lookup_key_normalized;
425 /** Hash table mapping normalized next hops and weights
426 to multipath adjacency index. */
427 uword * multipath_adjacency_by_next_hops;
429 u32 * adjacency_remap_table;
430 u32 n_adjacency_remaps;
432 /** If average error per adjacency is less than this threshold adjacency block
434 f64 multipath_next_hop_error_tolerance;
436 /** Adjacency index for routing table misses, local punts, and drops. */
437 u32 miss_adj_index, drop_adj_index, local_adj_index;
439 /** Miss adjacency is always first in adjacency table. */
440 #define IP_LOOKUP_MISS_ADJ_INDEX 0
442 ip_add_del_adjacency_callback_t * add_del_adjacency_callbacks;
444 /** Pool of addresses that are assigned to interfaces. */
445 ip_interface_address_t * if_address_pool;
447 /** Hash table mapping address to index in interface address pool. */
448 mhash_t address_to_if_address_index;
450 /** Head of doubly linked list of interface addresses for each software interface.
451 ~0 means this interface has no address. */
452 u32 * if_address_pool_index_by_sw_if_index;
454 /** First table index to use for this interface, ~0 => none */
455 u32 * classify_table_index_by_sw_if_index;
457 /** rx unicast, multicast, tx interface/feature configuration. */
458 ip_config_main_t feature_config_mains[VNET_N_IP_FEAT];
460 /** Number of bytes in a fib result. Must be at least
461 sizeof (uword). First word is always adjacency index. */
462 u32 fib_result_n_bytes, fib_result_n_words;
464 format_function_t * format_fib_result;
466 /** 1 for ip6; 0 for ip4. */
469 /** Either format_ip4_address_and_length or format_ip6_address_and_length. */
470 format_function_t * format_address_and_length;
472 /** Special adjacency format functions */
473 format_function_t ** special_adjacency_format_functions;
475 /** Table mapping ip protocol to ip[46]-local node next index. */
476 u8 local_next_by_ip_protocol[256];
478 /** IP_BUILTIN_PROTOCOL_{TCP,UDP,ICMP,OTHER} by protocol in IP header. */
479 u8 builtin_protocol_by_ip_protocol[256];
481 /** Registered adjacencies */
482 ip_adj_register_t *registered_adjacencies;
485 always_inline ip_adjacency_t *
486 ip_get_adjacency (ip_lookup_main_t * lm,
489 ip_adjacency_t * adj;
491 adj = vec_elt_at_index (lm->adjacency_heap, adj_index);
493 ASSERT (adj->heap_handle != ~0);
498 #define ip_prefetch_adjacency(lm,adj_index,type) \
500 ip_adjacency_t * _adj = (lm)->adjacency_heap + (adj_index); \
501 CLIB_PREFETCH (_adj, sizeof (_adj[0]), type); \
504 /* Adds a next node to ip4 or ip6 lookup node which can be then used in adjacencies.
505 * @param vlib_main pointer
506 * @param lm ip4_main.lookup_main or ip6_main.lookup_main
507 * @param reg registration structure
508 * @param next_node_index Returned index to be used in adjacencies.
509 * @return 0 on success. -1 on failure.
511 int ip_register_adjacency(vlib_main_t *vm, u8 is_ip4,
512 ip_adj_register_t *reg);
515 * Construction helpers to add IP adjacency at init.
517 #define VNET_IP_REGISTER_ADJACENCY(ip,x,...) \
518 __VA_ARGS__ ip_adj_register_t ip##adj_##x; \
519 static void __vnet_##ip##_register_adjacency_##x (void) \
520 __attribute__((__constructor__)) ; \
521 static void __vnet_##ip##_register_adjacency_##x (void) \
523 ip_lookup_main_t *lm = &ip##_main.lookup_main; \
524 ip##adj_##x.next = lm->registered_adjacencies; \
525 lm->registered_adjacencies = &ip##adj_##x; \
527 __VA_ARGS__ ip_adj_register_t ip##adj_##x
529 #define VNET_IP4_REGISTER_ADJACENCY(x,...) \
530 VNET_IP_REGISTER_ADJACENCY(ip4, x, __VA_ARGS__)
532 #define VNET_IP6_REGISTER_ADJACENCY(x,...) \
533 VNET_IP_REGISTER_ADJACENCY(ip6, x, __VA_ARGS__)
536 ip_register_add_del_adjacency_callback(ip_lookup_main_t * lm,
537 ip_add_del_adjacency_callback_t cb)
539 vec_add1(lm->add_del_adjacency_callbacks, cb);
543 ip_call_add_del_adjacency_callbacks (ip_lookup_main_t * lm, u32 adj_index, u32 is_del)
545 ip_adjacency_t * adj;
547 adj = ip_get_adjacency (lm, adj_index);
548 for (i = 0; i < vec_len (lm->add_del_adjacency_callbacks); i++)
549 lm->add_del_adjacency_callbacks[i] (lm, adj_index, adj, is_del);
552 /* Create new block of given number of contiguous adjacencies. */
554 ip_add_adjacency (ip_lookup_main_t * lm,
555 ip_adjacency_t * adj,
557 u32 * adj_index_result);
559 void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index);
561 ip_update_adjacency (ip_lookup_main_t * lm,
563 ip_adjacency_t * copy_adj);
566 ip_adjacency_is_multipath(ip_lookup_main_t * lm, u32 adj_index)
568 if (!vec_len(lm->multipath_adjacencies))
571 if (vec_len(lm->multipath_adjacencies) < adj_index - 1)
575 return (lm->multipath_adjacencies[adj_index].adj_index == adj_index &&
576 lm->multipath_adjacencies[adj_index].n_adj_in_block > 0);
580 ip_multipath_adjacency_free (ip_lookup_main_t * lm,
581 ip_multipath_adjacency_t * a);
584 ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm,
586 u32 old_mp_adj_index,
587 u32 next_hop_adj_index,
589 u32 * new_mp_adj_index);
592 ip_interface_address_add_del (ip_lookup_main_t * lm,
599 always_inline ip_interface_address_t *
600 ip_get_interface_address (ip_lookup_main_t * lm, void * addr_fib)
602 uword * p = mhash_get (&lm->address_to_if_address_index, addr_fib);
603 return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
607 ip_interface_address_get_address (ip_lookup_main_t * lm, ip_interface_address_t * a)
608 { return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key); }
610 always_inline ip_interface_address_t *
611 ip_interface_address_for_packet (ip_lookup_main_t * lm, vlib_buffer_t * b, u32 sw_if_index)
613 ip_adjacency_t * adj;
614 u32 if_address_index;
616 adj = ip_get_adjacency (lm, vnet_buffer (b)->ip.adj_index[VLIB_TX]);
618 ASSERT (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP
619 || adj->lookup_next_index == IP_LOOKUP_NEXT_LOCAL);
620 if_address_index = adj->if_address_index;
621 if_address_index = (if_address_index == ~0 ?
622 vec_elt (lm->if_address_pool_index_by_sw_if_index, sw_if_index)
625 return (if_address_index != ~0)?pool_elt_at_index (lm->if_address_pool, if_address_index):NULL;
628 #define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \
630 vnet_main_t *_vnm = vnet_get_main(); \
631 u32 _sw_if_index = sw_if_index; \
632 vnet_sw_interface_t *_swif; \
633 _swif = vnet_get_sw_interface (_vnm, _sw_if_index); \
636 * Loop => honor unnumbered interface addressing. \
638 if (loop && _swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) \
639 _sw_if_index = _swif->unnumbered_sw_if_index; \
641 (vec_len((lm)->if_address_pool_index_by_sw_if_index) \
643 ? vec_elt ((lm)->if_address_pool_index_by_sw_if_index, \
644 (_sw_if_index)) : (u32)~0; \
645 ip_interface_address_t * _a; \
648 _a = pool_elt_at_index ((lm)->if_address_pool, _ia); \
649 _ia = _a->next_this_sw_interface; \
655 void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
656 u32 vnet_register_special_adjacency_format_function
657 (ip_lookup_main_t * lm, format_function_t * fp);
659 #endif /* included_ip_lookup_h */