a7aef9f52a890b10db8749c272ce96ba2c507ba5
[vpp.git] / vnet / vnet / ip / lookup.h
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip_lookup.h: ip (4 or 6) lookup structures, adjacencies, ...
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #ifndef included_ip_lookup_h
41 #define included_ip_lookup_h
42
43 #include <vnet/vnet.h>
44 #include <vlib/buffer.h>
45 #include <vnet/ip/ip4_packet.h>
46 #include <vnet/ip/ip6_packet.h>
47
48 /* Common (IP4/IP6) next index stored in adjacency. */
49 typedef enum {
50   /* Packet does not match any route in table. */
51   IP_LOOKUP_NEXT_MISS,
52
53   /* Adjacency says to drop or punt this packet. */
54   IP_LOOKUP_NEXT_DROP,
55   IP_LOOKUP_NEXT_PUNT,
56
57   /* This packet is for one of our own IP addresses. */
58   IP_LOOKUP_NEXT_LOCAL,
59
60   /* This packet matches an "interface route" and packets
61      need to be passed to ARP to find rewrite string for
62      this destination. */
63   IP_LOOKUP_NEXT_ARP,
64
65   /* This packet is to be rewritten and forwarded to the next
66      processing node.  This is typically the output interface but
67      might be another node for further output processing. */
68   IP_LOOKUP_NEXT_REWRITE,
69
70   /* This packet needs to be classified */
71   IP_LOOKUP_NEXT_CLASSIFY,
72
73   /* This packet needs to go to MAP - RFC7596, RFC7597 */
74   IP_LOOKUP_NEXT_MAP,
75
76   /* This packet needs to go to MAP with Translation - RFC7599 */
77   IP_LOOKUP_NEXT_MAP_T,
78
79   /* This packets needs to go to 6RD (RFC5969) */
80   IP_LOOKUP_NEXT_SIXRD,
81
82   /* This packets needs to go to indirect next hop */
83   IP_LOOKUP_NEXT_INDIRECT,
84
85   IP_LOOKUP_NEXT_ICMP_ERROR,
86
87   IP_LOOKUP_N_NEXT,
88 } ip_lookup_next_t;
89
90 typedef enum {
91   IP4_LOOKUP_N_NEXT = IP_LOOKUP_N_NEXT,
92 } ip4_lookup_next_t;
93
94 typedef enum {
95   /* Hop-by-hop header handling */
96   IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT,
97   IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP,
98   IP6_LOOKUP_NEXT_POP_HOP_BY_HOP,
99   IP6_LOOKUP_N_NEXT,
100 } ip6_lookup_next_t;
101
102 #define IP4_LOOKUP_NEXT_NODES {                                 \
103     [IP_LOOKUP_NEXT_MISS] = "ip4-miss",                         \
104     [IP_LOOKUP_NEXT_DROP] = "ip4-drop",                         \
105     [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",                         \
106     [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",                       \
107     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",                           \
108     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",           \
109     [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",                 \
110     [IP_LOOKUP_NEXT_MAP] = "ip4-map",                           \
111     [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",                       \
112     [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",                       \
113     [IP_LOOKUP_NEXT_INDIRECT] = "ip4-indirect",                 \
114     [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error",             \
115 }
116
117 #define IP6_LOOKUP_NEXT_NODES {                                 \
118     [IP_LOOKUP_NEXT_MISS] = "ip6-miss",                         \
119     [IP_LOOKUP_NEXT_DROP] = "ip6-drop",                         \
120     [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",                         \
121     [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",                       \
122     [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",             \
123     [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",                   \
124     [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify",                 \
125     [IP_LOOKUP_NEXT_MAP] = "ip6-map",                           \
126     [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",                       \
127     [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",                       \
128     [IP_LOOKUP_NEXT_INDIRECT] = "ip6-indirect",                 \
129     [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error",             \
130     [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",            \
131     [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop",    \
132     [IP6_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop",    \
133 }
134
135 /* Flow hash configuration */
136 #define IP_FLOW_HASH_SRC_ADDR (1<<0)
137 #define IP_FLOW_HASH_DST_ADDR (1<<1)
138 #define IP_FLOW_HASH_PROTO (1<<2)
139 #define IP_FLOW_HASH_SRC_PORT (1<<3)
140 #define IP_FLOW_HASH_DST_PORT (1<<4)
141 #define IP_FLOW_HASH_REVERSE_SRC_DST (1<<5)
142
143 /* Default: 5-tuple without the "reverse" bit */
144 #define IP_FLOW_HASH_DEFAULT (0x1F)
145
146 #define foreach_flow_hash_bit                   \
147 _(src, IP_FLOW_HASH_SRC_ADDR)                   \
148 _(dst, IP_FLOW_HASH_DST_ADDR)                   \
149 _(sport, IP_FLOW_HASH_SRC_PORT)                 \
150 _(dport, IP_FLOW_HASH_DST_PORT)                 \
151 _(proto, IP_FLOW_HASH_PROTO)                    \
152 _(reverse, IP_FLOW_HASH_REVERSE_SRC_DST)
153
154 /* IP unicast adjacency. */
155 typedef struct {
156   CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
157   /* Handle for this adjacency in adjacency heap. */
158   u32 heap_handle;
159
160   STRUCT_MARK(signature_start);
161
162   /* Interface address index for this local/arp adjacency. */
163   u32 if_address_index;
164
165   /* Number of adjecencies in block.  Greater than 1 means multipath;
166      otherwise equal to 1. */
167   u16 n_adj;
168
169   /* Next hop after ip4-lookup. */
170   union {
171     ip_lookup_next_t lookup_next_index : 16;
172     u16 lookup_next_index_as_int;
173   };
174
175   /* Force re-lookup in a different FIB. ~0 => normal behavior */
176   i16 explicit_fib_index;
177   u16 mcast_group_index;  
178
179   /* Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */
180   u16 saved_lookup_next_index;
181
182   union {
183     /* IP_LOOKUP_NEXT_ARP only */
184     struct {
185       ip46_address_t next_hop;
186     } arp;
187     /* IP_LOOKUP_NEXT_CLASSIFY only */
188     struct {
189       u16 table_index;
190     } classify;
191     /* IP_LOOKUP_NEXT_INDIRECT only */
192     struct {
193         ip46_address_t next_hop;
194     } indirect;
195   };
196
197   STRUCT_MARK(signature_end);
198
199   /* Number of FIB entries sharing this adjacency */
200   u32 share_count;
201   /* Use this adjacency instead */
202   u32 next_adj_with_signature;
203
204   CLIB_CACHE_LINE_ALIGN_MARK(cacheline1);
205
206   /* Rewrite in second/third cache lines */
207   vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE);
208 } ip_adjacency_t;
209
210 static inline uword
211 vnet_ip_adjacency_signature (ip_adjacency_t * adj)
212 {
213   uword signature = 0xfeedfaceULL;
214
215   /* Skip heap handle, sum everything up to but not including share_count */
216   signature = hash_memory
217       (STRUCT_MARK_PTR(adj, signature_start),
218        STRUCT_OFFSET_OF(ip_adjacency_t, signature_end)
219        - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start),
220        signature);
221
222   /* and the rewrite */
223   signature = hash_memory (&adj->rewrite_header, VLIB_BUFFER_PRE_DATA_SIZE,
224                              signature);
225   return signature;
226 }
227
228 static inline int
229 vnet_ip_adjacency_share_compare (ip_adjacency_t * a1, ip_adjacency_t *a2)
230 {
231   if (memcmp (STRUCT_MARK_PTR(a1, signature_start),
232               STRUCT_MARK_PTR(a2, signature_start),
233               STRUCT_OFFSET_OF(ip_adjacency_t, signature_end)
234               - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start)))
235     return 0;
236   if (memcmp (&a1->rewrite_header, &a2->rewrite_header,
237               VLIB_BUFFER_PRE_DATA_SIZE))
238     return 0;
239   return 1;
240 }
241
242 /* Index into adjacency table. */
243 typedef u32 ip_adjacency_index_t;
244
245 typedef struct {
246   /* Directly connected next-hop adjacency index. */
247   u32 next_hop_adj_index;
248
249   /* Path weight for this adjacency. */
250   u32 weight;
251 } ip_multipath_next_hop_t;
252
253 typedef struct {
254   /* Adjacency index of first index in block. */
255   u32 adj_index;
256   
257   /* Power of 2 size of adjacency block. */
258   u32 n_adj_in_block;
259
260   /* Number of prefixes that point to this adjacency. */
261   u32 reference_count;
262
263   /* Normalized next hops are used as hash keys: they are sorted by weight
264      and weights are chosen so they add up to 1 << log2_n_adj_in_block (with
265      zero-weighted next hops being deleted).
266      Unnormalized next hops are saved so that control plane has a record of exactly
267      what the RIB told it. */
268   struct {
269     /* Number of hops in the multipath. */
270     u32 count;
271
272     /* Offset into next hop heap for this block. */
273     u32 heap_offset;
274
275     /* Heap handle used to for example free block when we're done with it. */
276     u32 heap_handle;
277   } normalized_next_hops, unnormalized_next_hops;
278 } ip_multipath_adjacency_t;
279
280 /* IP multicast adjacency. */
281 typedef struct {
282   /* Handle for this adjacency in adjacency heap. */
283   u32 heap_handle;
284
285   /* Number of adjecencies in block. */
286   u32 n_adj;
287
288   /* Rewrite string. */
289   vnet_declare_rewrite (64 - 2*sizeof(u32));
290 } ip_multicast_rewrite_t;
291
292 typedef struct {
293   /* ip4-multicast-rewrite next index. */
294   u32 next_index;
295
296   u8 n_rewrite_bytes;
297
298   u8 rewrite_string[64 - 1*sizeof(u32) - 1*sizeof(u8)];
299 } ip_multicast_rewrite_string_t;
300
301 typedef struct {
302   ip_multicast_rewrite_t * rewrite_heap;
303
304   ip_multicast_rewrite_string_t * rewrite_strings;
305
306   /* Negative rewrite string index; >= 0 sw_if_index.
307      Sorted.  Used to hash. */
308   i32 ** adjacency_id_vector;
309
310   uword * adjacency_by_id_vector;
311 } ip_multicast_lookup_main_t;
312
313 typedef struct {
314   /* Key for mhash; in fact, just a byte offset into mhash key vector. */
315   u32 address_key;
316
317   /* Interface which has this address. */
318   u32 sw_if_index;
319
320   /* Adjacency for neighbor probe (ARP) for this interface address. */
321   u32 neighbor_probe_adj_index;
322
323   /* Address (prefix) length for this interface. */
324   u16 address_length;
325
326   /* Will be used for something eventually.  Primary vs. secondary? */
327   u16 flags;
328
329   /* Next and previous pointers for doubly linked list of
330      addresses per software interface. */
331   u32 next_this_sw_interface;
332   u32 prev_this_sw_interface;
333 } ip_interface_address_t;
334
335 typedef enum {
336   IP_LOCAL_NEXT_DROP,
337   IP_LOCAL_NEXT_PUNT,
338   IP_LOCAL_NEXT_UDP_LOOKUP,
339   IP_LOCAL_NEXT_ICMP,
340   IP_LOCAL_N_NEXT,
341 } ip_local_next_t;
342
343 struct ip_lookup_main_t;
344
345 typedef void (* ip_add_del_adjacency_callback_t) (struct ip_lookup_main_t * lm,
346                                                   u32 adj_index,
347                                                   ip_adjacency_t * adj,
348                                                   u32 is_del);
349
350 typedef struct {
351   vnet_config_main_t config_main;
352
353   u32 * config_index_by_sw_if_index;
354 } ip_config_main_t;
355
356 typedef struct ip_lookup_main_t {
357   /* Adjacency heap. */
358   ip_adjacency_t * adjacency_heap;
359
360   /* Adjacency packet/byte counters indexed by adjacency index. */
361   vlib_combined_counter_main_t adjacency_counters;
362
363   /* Heap of (next hop, weight) blocks.  Sorted by next hop. */
364   ip_multipath_next_hop_t * next_hop_heap;
365
366   /* Indexed by heap_handle from ip_adjacency_t. */
367   ip_multipath_adjacency_t * multipath_adjacencies;
368
369   /* Adjacency by signature hash */
370   uword * adj_index_by_signature;
371
372   /* Temporary vectors for looking up next hops in hash. */
373   ip_multipath_next_hop_t * next_hop_hash_lookup_key;
374   ip_multipath_next_hop_t * next_hop_hash_lookup_key_normalized;
375
376   /* Hash table mapping normalized next hops and weights
377      to multipath adjacency index. */
378   uword * multipath_adjacency_by_next_hops;
379
380   u32 * adjacency_remap_table;
381   u32 n_adjacency_remaps;
382
383   /* If average error per adjacency is less than this threshold adjacency block
384      size is accepted. */
385   f64 multipath_next_hop_error_tolerance;
386
387   /* Adjacency index for routing table misses, local punts, and drops. */
388   u32 miss_adj_index, drop_adj_index, local_adj_index;
389
390   /* Miss adjacency is always first in adjacency table. */
391 #define IP_LOOKUP_MISS_ADJ_INDEX 0
392
393   ip_add_del_adjacency_callback_t * add_del_adjacency_callbacks;
394
395   /* Pool of addresses that are assigned to interfaces. */
396   ip_interface_address_t * if_address_pool;
397
398   /* Hash table mapping address to index in interface address pool. */
399   mhash_t address_to_if_address_index;
400
401   /* Head of doubly linked list of interface addresses for each software interface.
402      ~0 means this interface has no address. */
403   u32 * if_address_pool_index_by_sw_if_index;
404
405   /* First table index to use for this interface, ~0 => none */
406   u32 * classify_table_index_by_sw_if_index;
407
408   /* rx/tx interface/feature configuration. */
409   ip_config_main_t rx_config_mains[VNET_N_CAST], tx_config_main;
410
411   /* Number of bytes in a fib result.  Must be at least
412      sizeof (uword).  First word is always adjacency index. */
413   u32 fib_result_n_bytes, fib_result_n_words;
414
415   format_function_t * format_fib_result;
416
417   /* 1 for ip6; 0 for ip4. */
418   u32 is_ip6;
419
420   /* Either format_ip4_address_and_length or format_ip6_address_and_length. */
421   format_function_t * format_address_and_length;
422
423   /* Table mapping ip protocol to ip[46]-local node next index. */
424   u8 local_next_by_ip_protocol[256];
425
426   /* IP_BUILTIN_PROTOCOL_{TCP,UDP,ICMP,OTHER} by protocol in IP header. */
427   u8 builtin_protocol_by_ip_protocol[256];
428 } ip_lookup_main_t;
429
430 always_inline ip_adjacency_t *
431 ip_get_adjacency (ip_lookup_main_t * lm,
432                   u32 adj_index)
433 {
434   ip_adjacency_t * adj;
435
436   adj = vec_elt_at_index (lm->adjacency_heap, adj_index);
437
438   ASSERT (adj->heap_handle != ~0);
439
440   return adj;
441 }
442
443 #define ip_prefetch_adjacency(lm,adj_index,type)                \
444 do {                                                            \
445   ip_adjacency_t * _adj = (lm)->adjacency_heap + (adj_index);   \
446   CLIB_PREFETCH (_adj, sizeof (_adj[0]), type);                 \
447 } while (0)
448
449 static inline void
450 ip_register_add_del_adjacency_callback(ip_lookup_main_t * lm,
451                                        ip_add_del_adjacency_callback_t cb)
452 {
453   vec_add1(lm->add_del_adjacency_callbacks, cb);
454 }
455
456 always_inline void
457 ip_call_add_del_adjacency_callbacks (ip_lookup_main_t * lm, u32 adj_index, u32 is_del)
458 {
459   ip_adjacency_t * adj;
460   uword i;
461   adj = ip_get_adjacency (lm, adj_index);
462   for (i = 0; i < vec_len (lm->add_del_adjacency_callbacks); i++)
463     lm->add_del_adjacency_callbacks[i] (lm, adj_index, adj, is_del);
464 }
465
466 /* Create new block of given number of contiguous adjacencies. */
467 ip_adjacency_t *
468 ip_add_adjacency (ip_lookup_main_t * lm,
469                   ip_adjacency_t * adj,
470                   u32 n_adj,
471                   u32 * adj_index_result);
472
473 void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index);
474 void
475 ip_update_adjacency (ip_lookup_main_t * lm,
476                      u32 adj_index,
477                      ip_adjacency_t * copy_adj);
478
479 static inline int
480 ip_adjacency_is_multipath(ip_lookup_main_t * lm, u32 adj_index)
481 {
482   if (!vec_len(lm->multipath_adjacencies))
483     return 0;
484
485   if (vec_len(lm->multipath_adjacencies) < adj_index - 1)
486     return 0;
487
488
489   return (lm->multipath_adjacencies[adj_index].adj_index == adj_index &&
490           lm->multipath_adjacencies[adj_index].n_adj_in_block > 0);
491 }
492
493 void
494 ip_multipath_adjacency_free (ip_lookup_main_t * lm,
495                              ip_multipath_adjacency_t * a);
496
497 u32
498 ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm,
499                                          u32 is_del,
500                                          u32 old_mp_adj_index,
501                                          u32 next_hop_adj_index,
502                                          u32 next_hop_weight,
503                                          u32 * new_mp_adj_index);
504
505 clib_error_t *
506 ip_interface_address_add_del (ip_lookup_main_t * lm,
507                               u32 sw_if_index,
508                               void * address,
509                               u32 address_length,
510                               u32 is_del,
511                               u32 * result_index);
512
513 always_inline ip_interface_address_t *
514 ip_get_interface_address (ip_lookup_main_t * lm, void * addr_fib)
515 {
516   uword * p = mhash_get (&lm->address_to_if_address_index, addr_fib);
517   return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
518 }
519
520 always_inline void *
521 ip_interface_address_get_address (ip_lookup_main_t * lm, ip_interface_address_t * a)
522 { return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key); }
523
524 always_inline ip_interface_address_t *
525 ip_interface_address_for_packet (ip_lookup_main_t * lm, vlib_buffer_t * b, u32 sw_if_index)
526 {
527   ip_adjacency_t * adj;
528   u32 if_address_index;
529
530   adj = ip_get_adjacency (lm, vnet_buffer (b)->ip.adj_index[VLIB_TX]);
531
532   ASSERT (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP
533           || adj->lookup_next_index == IP_LOOKUP_NEXT_LOCAL);
534   if_address_index = adj->if_address_index;
535   if_address_index = (if_address_index == ~0 ?
536                       vec_elt (lm->if_address_pool_index_by_sw_if_index, sw_if_index)
537                       : if_address_index);
538
539   return pool_elt_at_index (lm->if_address_pool, if_address_index);
540 }
541
542 #define foreach_ip_interface_address(lm,a,sw_if_index,loop,body)        \
543 do {                                                                    \
544     vnet_main_t *_vnm = vnet_get_main();                                     \
545     u32 _sw_if_index = sw_if_index;                                     \
546     vnet_sw_interface_t *_swif;                                         \
547     _swif = vnet_get_sw_interface (_vnm, _sw_if_index);                 \
548                                                                         \
549     /*                                                                  \
550      * Loop => honor unnumbered interface addressing.                   \
551      */                                                                 \
552     if (loop && _swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)       \
553       _sw_if_index = _swif->unnumbered_sw_if_index;                     \
554     u32 _ia =                                                           \
555       (vec_len((lm)->if_address_pool_index_by_sw_if_index)              \
556        > (_sw_if_index))                                                \
557         ? vec_elt ((lm)->if_address_pool_index_by_sw_if_index,          \
558                    (_sw_if_index)) : (u32)~0;                           \
559     ip_interface_address_t * _a;                                        \
560     while (_ia != ~0)                                                   \
561     {                                                                   \
562         _a = pool_elt_at_index ((lm)->if_address_pool, _ia);            \
563         _ia = _a->next_this_sw_interface;                               \
564         (a) = _a;                                                       \
565         body;                                                           \
566     }                                                                   \
567 } while (0)
568
569 void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
570
571 #endif /* included_ip_lookup_h */