2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip_lookup.h: ip (4 or 6) lookup structures, adjacencies, ...
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
42 * Definitions for all things IP (v4|v6) unicast and multicast lookup related.
44 * - Adjacency definitions and registration.
45 * - Callbacks on route add.
46 * - Callbacks on interface address change.
48 #ifndef included_ip_lookup_h
49 #define included_ip_lookup_h
51 #include <vnet/vnet.h>
52 #include <vlib/buffer.h>
53 #include <vnet/ip/ip4_packet.h>
54 #include <vnet/ip/ip6_packet.h>
55 #include <vnet/fib/fib_node.h>
56 #include <vnet/dpo/dpo.h>
58 /** @brief Common (IP4/IP6) next index stored in adjacency. */
60 /** Adjacency to drop this packet. */
62 /** Adjacency to punt this packet. */
65 /** This packet is for one of our own IP addresses. */
68 /** This packet matches an "incomplete adjacency" and packets
69 need to be passed to ARP to find rewrite string for
73 /** This packet matches an "interface route" and packets
74 need to be passed to ARP to find rewrite string for
78 /** This packet is to be rewritten and forwarded to the next
79 processing node. This is typically the output interface but
80 might be another node for further output processing. */
81 IP_LOOKUP_NEXT_REWRITE,
83 /** This packets follow a load-balance */
84 IP_LOOKUP_NEXT_LOAD_BALANCE,
86 /** This packets follow a mid-chain adjacency */
87 IP_LOOKUP_NEXT_MIDCHAIN,
89 /** This packets needs to go to ICMP error */
90 IP_LOOKUP_NEXT_ICMP_ERROR,
96 IP4_LOOKUP_N_NEXT = IP_LOOKUP_N_NEXT,
100 /* Hop-by-hop header handling */
101 IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT,
102 IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP,
103 IP6_LOOKUP_NEXT_POP_HOP_BY_HOP,
107 #define IP4_LOOKUP_NEXT_NODES { \
108 [IP_LOOKUP_NEXT_DROP] = "ip4-drop", \
109 [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", \
110 [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \
111 [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \
112 [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \
113 [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", \
114 [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \
115 [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip4-load-balance", \
116 [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \
119 #define IP6_LOOKUP_NEXT_NODES { \
120 [IP_LOOKUP_NEXT_DROP] = "ip6-drop", \
121 [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", \
122 [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", \
123 [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \
124 [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean", \
125 [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \
126 [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \
127 [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip6-load-balance", \
128 [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \
129 [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \
130 [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \
131 [IP6_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", \
134 /** Flow hash configuration */
135 #define IP_FLOW_HASH_SRC_ADDR (1<<0)
136 #define IP_FLOW_HASH_DST_ADDR (1<<1)
137 #define IP_FLOW_HASH_PROTO (1<<2)
138 #define IP_FLOW_HASH_SRC_PORT (1<<3)
139 #define IP_FLOW_HASH_DST_PORT (1<<4)
140 #define IP_FLOW_HASH_REVERSE_SRC_DST (1<<5)
142 /** Default: 5-tuple without the "reverse" bit */
143 #define IP_FLOW_HASH_DEFAULT (0x1F)
145 #define foreach_flow_hash_bit \
146 _(src, IP_FLOW_HASH_SRC_ADDR) \
147 _(dst, IP_FLOW_HASH_DST_ADDR) \
148 _(sport, IP_FLOW_HASH_SRC_PORT) \
149 _(dport, IP_FLOW_HASH_DST_PORT) \
150 _(proto, IP_FLOW_HASH_PROTO) \
151 _(reverse, IP_FLOW_HASH_REVERSE_SRC_DST)
154 * A flow hash configuration is a mask of the flow hash options
156 typedef u32 flow_hash_config_t;
158 #define IP_ADJACENCY_OPAQUE_SZ 16
159 /** @brief IP unicast adjacency.
163 CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
164 /* Handle for this adjacency in adjacency heap. */
167 /** Number of adjecencies in block. Greater than 1 means multipath;
168 otherwise equal to 1. */
171 /** Next hop after ip4-lookup. */
173 ip_lookup_next_t lookup_next_index : 16;
174 u16 lookup_next_index_as_int;
177 /** Interface address index for this local/arp adjacency. */
178 u32 if_address_index;
180 /** Force re-lookup in a different FIB. ~0 => normal behavior */
181 u16 mcast_group_index;
183 /** Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */
184 u16 saved_lookup_next_index;
195 * IP_LOOKUP_NEXT_ARP/IP_LOOKUP_NEXT_REWRITE
197 * neighbour adjacency sub-type;
200 ip46_address_t next_hop;
203 * IP_LOOKUP_NEXT_MIDCHAIN
205 * A nbr adj that is also recursive. Think tunnels.
206 * A nbr adj can transition to be of type MDICHAIN
207 * so be sure to leave the two structs with the next_hop
212 * The recursive next-hop
214 ip46_address_t next_hop;
216 * The node index of the tunnel's post rewrite/TX function.
218 u32 tx_function_node;
220 * The next DPO to use
225 * IP_LOOKUP_NEXT_GLEAN
227 * Glean the address to ARP for from the packet's destination
230 ip46_address_t receive_addr;
233 u16 opaque[IP_ADJACENCY_OPAQUE_SZ];
236 /** @brief Special format function for this adjacency.
237 * Specifically good for cases which use the entire rewrite
238 * for their own purposes. Can easily reduce to a u16 or a u8 if/when
239 * the first cache line reads "full" on the free space gas gauge.
241 u32 special_adjacency_format_function_index; /* 0 is invalid */
243 CLIB_CACHE_LINE_ALIGN_MARK(cacheline1);
245 /* Rewrite in second/third cache lines */
246 vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE);
249 * member not accessed in the data plane are relgated to the
250 * remaining cachelines
255 _Static_assert((STRUCT_OFFSET_OF(ip_adjacency_t, cacheline0) == 0),
256 "IP adjacency cachline 0 is not offset");
257 _Static_assert((STRUCT_OFFSET_OF(ip_adjacency_t, cacheline1) ==
258 CLIB_CACHE_LINE_BYTES),
259 "IP adjacency cachline 1 is more than one cachline size offset");
261 /* An all zeros address */
262 extern const ip46_address_t zero_addr;
264 /* Index into adjacency table. */
265 typedef u32 ip_adjacency_index_t;
268 /* Adjacency index of first index in block. */
271 /* Power of 2 size of adjacency block. */
274 /* Number of prefixes that point to this adjacency. */
277 /* Normalized next hops are saved for stats/display purposes */
279 /* Number of hops in the multipath. */
282 /* Offset into next hop heap for this block. */
285 /* Heap handle used to for example free block when we're done with it. */
287 } normalized_next_hops;
288 } ip_multipath_adjacency_t;
290 /* IP multicast adjacency. */
292 /* Handle for this adjacency in adjacency heap. */
295 /* Number of adjecencies in block. */
298 /* Rewrite string. */
299 vnet_declare_rewrite (64 - 2*sizeof(u32));
300 } ip_multicast_rewrite_t;
303 /* ip4-multicast-rewrite next index. */
308 u8 rewrite_string[64 - 1*sizeof(u32) - 1*sizeof(u8)];
309 } ip_multicast_rewrite_string_t;
312 ip_multicast_rewrite_t * rewrite_heap;
314 ip_multicast_rewrite_string_t * rewrite_strings;
316 /* Negative rewrite string index; >= 0 sw_if_index.
317 Sorted. Used to hash. */
318 i32 ** adjacency_id_vector;
320 uword * adjacency_by_id_vector;
321 } ip_multicast_lookup_main_t;
324 /* Key for mhash; in fact, just a byte offset into mhash key vector. */
327 /* Interface which has this address. */
330 /* Adjacency for neighbor probe (ARP) for this interface address. */
331 u32 neighbor_probe_adj_index;
333 /* Address (prefix) length for this interface. */
336 /* Will be used for something eventually. Primary vs. secondary? */
339 /* Next and previous pointers for doubly linked list of
340 addresses per software interface. */
341 u32 next_this_sw_interface;
342 u32 prev_this_sw_interface;
343 } ip_interface_address_t;
348 IP_LOCAL_NEXT_UDP_LOOKUP,
353 struct ip_lookup_main_t;
355 typedef void (* ip_add_del_adjacency_callback_t) (struct ip_lookup_main_t * lm,
357 ip_adjacency_t * adj,
361 vnet_config_main_t config_main;
363 u32 * config_index_by_sw_if_index;
367 * This structure is used to dynamically register a custom adjacency
369 * Typically used with
370 * VNET_IP4_REGISTER_ADJACENCY or
371 * VNET_IP6_REGISTER_ADJACENCY macros.
373 typedef struct ip_adj_register_struct {
374 /** Name of the node for this registered adjacency. */
377 /** Formatting function for the adjacency.
378 * Variadic arguments given to the function are:
379 * - struct ip_lookup_main_t *
380 * - ip_adjacency_t *adj
382 format_function_t *fn;
385 * When the adjacency is registered, the ip-lookup next index will
386 * be written where this pointer points.
390 struct ip_adj_register_struct *next;
393 typedef struct ip_lookup_main_t {
394 /* Adjacency heap. */
395 ip_adjacency_t * adjacency_heap;
397 /** load-balance packet/byte counters indexed by LB index. */
398 vlib_combined_counter_main_t load_balance_counters;
400 /** any-tx-feature-enabled interface bitmap */
401 uword * tx_sw_if_has_ip_output_features;
403 /** count of enabled features, per sw_if_index, to maintain bitmap */
404 i16 * tx_feature_count_by_sw_if_index;
406 /** Pool of addresses that are assigned to interfaces. */
407 ip_interface_address_t * if_address_pool;
409 /** Hash table mapping address to index in interface address pool. */
410 mhash_t address_to_if_address_index;
412 /** Head of doubly linked list of interface addresses for each software interface.
413 ~0 means this interface has no address. */
414 u32 * if_address_pool_index_by_sw_if_index;
416 /** First table index to use for this interface, ~0 => none */
417 u32 * classify_table_index_by_sw_if_index;
419 /** rx unicast, multicast, tx interface/feature configuration. */
420 ip_config_main_t feature_config_mains[VNET_N_IP_FEAT];
422 /** Number of bytes in a fib result. Must be at least
423 sizeof (uword). First word is always adjacency index. */
424 u32 fib_result_n_bytes, fib_result_n_words;
426 format_function_t * format_fib_result;
428 /** 1 for ip6; 0 for ip4. */
431 /** Either format_ip4_address_and_length or format_ip6_address_and_length. */
432 format_function_t * format_address_and_length;
434 /** Special adjacency format functions */
435 format_function_t ** special_adjacency_format_functions;
437 /** Table mapping ip protocol to ip[46]-local node next index. */
438 u8 local_next_by_ip_protocol[256];
440 /** IP_BUILTIN_PROTOCOL_{TCP,UDP,ICMP,OTHER} by protocol in IP header. */
441 u8 builtin_protocol_by_ip_protocol[256];
443 /** Registered adjacencies */
444 ip_adj_register_t *registered_adjacencies;
447 always_inline ip_adjacency_t *
448 ip_get_adjacency (ip_lookup_main_t * lm,
451 ip_adjacency_t * adj;
453 adj = vec_elt_at_index (lm->adjacency_heap, adj_index);
455 ASSERT (adj->heap_handle != ~0);
460 #define ip_prefetch_adjacency(lm,adj_index,type) \
462 ip_adjacency_t * _adj = (lm)->adjacency_heap + (adj_index); \
463 CLIB_PREFETCH (_adj, sizeof (_adj[0]), type); \
466 /* Create new block of given number of contiguous adjacencies. */
468 ip_add_adjacency (ip_lookup_main_t * lm,
469 ip_adjacency_t * adj,
471 u32 * adj_index_result);
474 ip_interface_address_add_del (ip_lookup_main_t * lm,
482 format_ip_flow_hash_config (u8 * s, va_list * args);
484 always_inline ip_interface_address_t *
485 ip_get_interface_address (ip_lookup_main_t * lm, void * addr_fib)
487 uword * p = mhash_get (&lm->address_to_if_address_index, addr_fib);
488 return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
492 fib_table_id_find_fib_index (fib_protocol_t proto,
496 ip_interface_address_get_address (ip_lookup_main_t * lm, ip_interface_address_t * a)
497 { return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key); }
499 #define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \
501 vnet_main_t *_vnm = vnet_get_main(); \
502 u32 _sw_if_index = sw_if_index; \
503 vnet_sw_interface_t *_swif; \
504 _swif = vnet_get_sw_interface (_vnm, _sw_if_index); \
507 * Loop => honor unnumbered interface addressing. \
509 if (loop && _swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) \
510 _sw_if_index = _swif->unnumbered_sw_if_index; \
512 (vec_len((lm)->if_address_pool_index_by_sw_if_index) \
514 ? vec_elt ((lm)->if_address_pool_index_by_sw_if_index, \
515 (_sw_if_index)) : (u32)~0; \
516 ip_interface_address_t * _a; \
519 _a = pool_elt_at_index ((lm)->if_address_pool, _ia); \
520 _ia = _a->next_this_sw_interface; \
526 void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
528 #endif /* included_ip_lookup_h */