2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip_lookup.h: ip (4 or 6) lookup structures, adjacencies, ...
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #ifndef included_ip_lookup_h
41 #define included_ip_lookup_h
43 #include <vnet/vnet.h>
44 #include <vlib/buffer.h>
46 /* Next index stored in adjacency. */
48 /* Packet does not match any route in table. */
51 /* Adjacency says to drop or punt this packet. */
55 /* This packet is for one of our own IP addresses. */
58 /* This packet matches an "interface route" and packets
59 need to be passed to ARP to find rewrite string for
63 /* This packet is to be rewritten and forwarded to the next
64 processing node. This is typically the output interface but
65 might be another node for further output processing. */
66 IP_LOOKUP_NEXT_REWRITE,
68 /* This packet needs to be classified */
69 IP_LOOKUP_NEXT_CLASSIFY,
71 /* This packet needs to go to MAP - RFC7596, RFC7597 */
74 /* This packet needs to go to MAP with Translation - RFC7599 */
77 /* This packets needs to go to 6RD (RFC5969) */
80 /* Hop-by-hop header handling */
81 IP_LOOKUP_NEXT_HOP_BY_HOP,
82 IP_LOOKUP_NEXT_ADD_HOP_BY_HOP,
83 IP_LOOKUP_NEXT_POP_HOP_BY_HOP,
88 /* Flow hash configuration */
89 #define IP_FLOW_HASH_SRC_ADDR (1<<0)
90 #define IP_FLOW_HASH_DST_ADDR (1<<1)
91 #define IP_FLOW_HASH_PROTO (1<<2)
92 #define IP_FLOW_HASH_SRC_PORT (1<<3)
93 #define IP_FLOW_HASH_DST_PORT (1<<4)
94 #define IP_FLOW_HASH_REVERSE_SRC_DST (1<<5)
96 /* Default: 5-tuple without the "reverse" bit */
97 #define IP_FLOW_HASH_DEFAULT (0x1F)
99 #define foreach_flow_hash_bit \
100 _(src, IP_FLOW_HASH_SRC_ADDR) \
101 _(dst, IP_FLOW_HASH_DST_ADDR) \
102 _(sport, IP_FLOW_HASH_SRC_PORT) \
103 _(dport, IP_FLOW_HASH_DST_PORT) \
104 _(proto, IP_FLOW_HASH_PROTO) \
105 _(reverse, IP_FLOW_HASH_REVERSE_SRC_DST)
107 /* IP unicast adjacency. */
109 CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
110 /* Handle for this adjacency in adjacency heap. */
113 STRUCT_MARK(signature_start);
115 /* Interface address index for this local/arp adjacency. */
116 u32 if_address_index;
118 /* Number of adjecencies in block. Greater than 1 means multipath;
119 otherwise equal to 1. */
122 /* Next hop after ip4-lookup. */
124 ip_lookup_next_t lookup_next_index : 16;
125 u16 lookup_next_index_as_int;
128 /* Force re-lookup in a different FIB. ~0 => normal behavior */
129 i16 explicit_fib_index;
130 u16 mcast_group_index;
132 /* Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */
133 u16 saved_lookup_next_index;
136 /* IP_LOOKUP_NEXT_CLASSIFY only */
142 STRUCT_MARK(signature_end);
144 /* Number of FIB entries sharing this adjacency */
146 /* Use this adjacency instead */
147 u32 next_adj_with_signature;
149 CLIB_CACHE_LINE_ALIGN_MARK(cacheline1);
151 /* Rewrite in second/third cache lines */
152 vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE);
156 vnet_ip_adjacency_signature (ip_adjacency_t * adj)
158 uword signature = 0xfeedfaceULL;
160 /* Skip heap handle, sum everything up to but not including share_count */
161 signature = hash_memory64
162 (STRUCT_MARK_PTR(adj, signature_start),
163 STRUCT_OFFSET_OF(ip_adjacency_t, signature_end)
164 - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start),
167 /* and the rewrite */
168 signature = hash_memory64 (&adj->rewrite_header, VLIB_BUFFER_PRE_DATA_SIZE,
174 vnet_ip_adjacency_share_compare (ip_adjacency_t * a1, ip_adjacency_t *a2)
176 if (memcmp (STRUCT_MARK_PTR(a1, signature_start),
177 STRUCT_MARK_PTR(a2, signature_start),
178 STRUCT_OFFSET_OF(ip_adjacency_t, signature_end)
179 - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start)))
181 if (memcmp (&a1->rewrite_header, &a2->rewrite_header,
182 VLIB_BUFFER_PRE_DATA_SIZE))
187 /* Index into adjacency table. */
188 typedef u32 ip_adjacency_index_t;
191 /* Directly connected next-hop adjacency index. */
192 u32 next_hop_adj_index;
194 /* Path weight for this adjacency. */
196 } ip_multipath_next_hop_t;
199 /* Adjacency index of first index in block. */
202 /* Power of 2 size of adjacency block. */
205 /* Number of prefixes that point to this adjacency. */
208 /* Normalized next hops are used as hash keys: they are sorted by weight
209 and weights are chosen so they add up to 1 << log2_n_adj_in_block (with
210 zero-weighted next hops being deleted).
211 Unnormalized next hops are saved so that control plane has a record of exactly
212 what the RIB told it. */
214 /* Number of hops in the multipath. */
217 /* Offset into next hop heap for this block. */
220 /* Heap handle used to for example free block when we're done with it. */
222 } normalized_next_hops, unnormalized_next_hops;
223 } ip_multipath_adjacency_t;
225 /* IP multicast adjacency. */
227 /* Handle for this adjacency in adjacency heap. */
230 /* Number of adjecencies in block. */
233 /* Rewrite string. */
234 vnet_declare_rewrite (64 - 2*sizeof(u32));
235 } ip_multicast_rewrite_t;
238 /* ip4-multicast-rewrite next index. */
243 u8 rewrite_string[64 - 1*sizeof(u32) - 1*sizeof(u8)];
244 } ip_multicast_rewrite_string_t;
247 ip_multicast_rewrite_t * rewrite_heap;
249 ip_multicast_rewrite_string_t * rewrite_strings;
251 /* Negative rewrite string index; >= 0 sw_if_index.
252 Sorted. Used to hash. */
253 i32 ** adjacency_id_vector;
255 uword * adjacency_by_id_vector;
256 } ip_multicast_lookup_main_t;
259 /* Key for mhash; in fact, just a byte offset into mhash key vector. */
262 /* Interface which has this address. */
265 /* Adjacency for neighbor probe (ARP) for this interface address. */
266 u32 neighbor_probe_adj_index;
268 /* Address (prefix) length for this interface. */
271 /* Will be used for something eventually. Primary vs. secondary? */
274 /* Next and previous pointers for doubly linked list of
275 addresses per software interface. */
276 u32 next_this_sw_interface;
277 u32 prev_this_sw_interface;
278 } ip_interface_address_t;
283 IP_LOCAL_NEXT_UDP_LOOKUP,
288 struct ip_lookup_main_t;
290 typedef void (* ip_add_del_adjacency_callback_t) (struct ip_lookup_main_t * lm,
292 ip_adjacency_t * adj,
296 vnet_config_main_t config_main;
298 u32 * config_index_by_sw_if_index;
301 typedef struct ip_lookup_main_t {
302 /* Adjacency heap. */
303 ip_adjacency_t * adjacency_heap;
305 /* Adjacency packet/byte counters indexed by adjacency index. */
306 vlib_combined_counter_main_t adjacency_counters;
308 /* Heap of (next hop, weight) blocks. Sorted by next hop. */
309 ip_multipath_next_hop_t * next_hop_heap;
311 /* Indexed by heap_handle from ip_adjacency_t. */
312 ip_multipath_adjacency_t * multipath_adjacencies;
314 /* Adjacency by signature hash */
315 uword * adj_index_by_signature;
317 /* Temporary vectors for looking up next hops in hash. */
318 ip_multipath_next_hop_t * next_hop_hash_lookup_key;
319 ip_multipath_next_hop_t * next_hop_hash_lookup_key_normalized;
321 /* Hash table mapping normalized next hops and weights
322 to multipath adjacency index. */
323 uword * multipath_adjacency_by_next_hops;
325 u32 * adjacency_remap_table;
326 u32 n_adjacency_remaps;
328 /* If average error per adjacency is less than this threshold adjacency block
330 f64 multipath_next_hop_error_tolerance;
332 /* Adjacency index for routing table misses, local punts, and drops. */
333 u32 miss_adj_index, drop_adj_index, local_adj_index;
335 /* Miss adjacency is always first in adjacency table. */
336 #define IP_LOOKUP_MISS_ADJ_INDEX 0
338 ip_add_del_adjacency_callback_t * add_del_adjacency_callbacks;
340 /* Pool of addresses that are assigned to interfaces. */
341 ip_interface_address_t * if_address_pool;
343 /* Hash table mapping address to index in interface address pool. */
344 mhash_t address_to_if_address_index;
346 /* Head of doubly linked list of interface addresses for each software interface.
347 ~0 means this interface has no address. */
348 u32 * if_address_pool_index_by_sw_if_index;
350 /* First table index to use for this interface, ~0 => none */
351 u32 * classify_table_index_by_sw_if_index;
353 /* rx/tx interface/feature configuration. */
354 ip_config_main_t rx_config_mains[VNET_N_CAST], tx_config_main;
356 /* Number of bytes in a fib result. Must be at least
357 sizeof (uword). First word is always adjacency index. */
358 u32 fib_result_n_bytes, fib_result_n_words;
360 format_function_t * format_fib_result;
362 /* 1 for ip6; 0 for ip4. */
365 /* Either format_ip4_address_and_length or format_ip6_address_and_length. */
366 format_function_t * format_address_and_length;
368 /* Table mapping ip protocol to ip[46]-local node next index. */
369 u8 local_next_by_ip_protocol[256];
371 /* IP_BUILTIN_PROTOCOL_{TCP,UDP,ICMP,OTHER} by protocol in IP header. */
372 u8 builtin_protocol_by_ip_protocol[256];
375 always_inline ip_adjacency_t *
376 ip_get_adjacency (ip_lookup_main_t * lm,
379 ip_adjacency_t * adj;
381 adj = vec_elt_at_index (lm->adjacency_heap, adj_index);
383 ASSERT (adj->heap_handle != ~0);
388 #define ip_prefetch_adjacency(lm,adj_index,type) \
390 ip_adjacency_t * _adj = (lm)->adjacency_heap + (adj_index); \
391 CLIB_PREFETCH (_adj, sizeof (_adj[0]), type); \
395 ip_call_add_del_adjacency_callbacks (ip_lookup_main_t * lm, u32 adj_index, u32 is_del)
397 ip_adjacency_t * adj;
399 adj = ip_get_adjacency (lm, adj_index);
400 for (i = 0; i < vec_len (lm->add_del_adjacency_callbacks); i++)
401 lm->add_del_adjacency_callbacks[i] (lm, adj_index, adj, is_del);
404 /* Create new block of given number of contiguous adjacencies. */
406 ip_add_adjacency (ip_lookup_main_t * lm,
407 ip_adjacency_t * adj,
409 u32 * adj_index_result);
411 void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index);
414 ip_multipath_adjacency_free (ip_lookup_main_t * lm,
415 ip_multipath_adjacency_t * a);
418 ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm,
420 u32 old_mp_adj_index,
421 u32 next_hop_adj_index,
423 u32 * new_mp_adj_index);
426 ip_interface_address_add_del (ip_lookup_main_t * lm,
433 always_inline ip_interface_address_t *
434 ip_get_interface_address (ip_lookup_main_t * lm, void * addr_fib)
436 uword * p = mhash_get (&lm->address_to_if_address_index, addr_fib);
437 return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
441 ip_interface_address_get_address (ip_lookup_main_t * lm, ip_interface_address_t * a)
442 { return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key); }
444 always_inline ip_interface_address_t *
445 ip_interface_address_for_packet (ip_lookup_main_t * lm, vlib_buffer_t * b, u32 sw_if_index)
447 ip_adjacency_t * adj;
448 u32 if_address_index;
450 adj = ip_get_adjacency (lm, vnet_buffer (b)->ip.adj_index[VLIB_TX]);
452 ASSERT (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP
453 || adj->lookup_next_index == IP_LOOKUP_NEXT_LOCAL);
454 if_address_index = adj->if_address_index;
455 if_address_index = (if_address_index == ~0 ?
456 vec_elt (lm->if_address_pool_index_by_sw_if_index, sw_if_index)
459 return pool_elt_at_index (lm->if_address_pool, if_address_index);
462 #define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \
464 vnet_main_t *_vnm = vnet_get_main(); \
465 u32 _sw_if_index = sw_if_index; \
466 vnet_sw_interface_t *_swif; \
467 _swif = vnet_get_sw_interface (_vnm, _sw_if_index); \
470 * Loop => honor unnumbered interface addressing. \
472 if (loop && _swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) \
473 _sw_if_index = _swif->unnumbered_sw_if_index; \
475 (vec_len((lm)->if_address_pool_index_by_sw_if_index) \
477 ? vec_elt ((lm)->if_address_pool_index_by_sw_if_index, \
478 (_sw_if_index)) : (u32)~0; \
479 ip_interface_address_t * _a; \
482 _a = pool_elt_at_index ((lm)->if_address_pool, _ia); \
483 _ia = _a->next_this_sw_interface; \
489 void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
491 #endif /* included_ip_lookup_h */