2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip_lookup.c: ip4/6 adjacency and lookup table managment
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vppinfra/math.h> /* for fabs */
41 #include <vnet/ip/ip.h>
44 ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index);
47 ip_poison_adjacencies (ip_adjacency_t * adj, uword n_adj)
50 memset (adj, 0xfe, n_adj * sizeof (adj[0]));
53 /* Create new block of given number of contiguous adjacencies. */
55 ip_add_adjacency (ip_lookup_main_t * lm,
56 ip_adjacency_t * copy_adj,
58 u32 * adj_index_return)
63 ai = heap_alloc (lm->adjacency_heap, n_adj, handle);
64 adj = heap_elt_at_index (lm->adjacency_heap, ai);
66 ip_poison_adjacencies (adj, n_adj);
68 /* Validate adjacency counters. */
69 vlib_validate_combined_counter (&lm->adjacency_counters, ai + n_adj - 1);
71 for (i = 0; i < n_adj; i++)
73 /* Make sure certain fields are always initialized. */
74 adj[i].rewrite_header.sw_if_index = ~0;
75 adj[i].explicit_fib_index = ~0;
76 adj[i].mcast_group_index = ~0;
77 adj[i].classify_table_index = ~0;
78 adj[i].saved_lookup_next_index = 0;
83 adj[i].heap_handle = handle;
86 /* Zero possibly stale counters for re-used adjacencies. */
87 vlib_zero_combined_counter (&lm->adjacency_counters, ai + i);
90 *adj_index_return = ai;
94 static void ip_del_adjacency2 (ip_lookup_main_t * lm, u32 adj_index, u32 delete_multipath_adjacency)
99 ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 1);
101 adj = ip_get_adjacency (lm, adj_index);
102 handle = adj->heap_handle;
104 if (delete_multipath_adjacency)
105 ip_multipath_del_adjacency (lm, adj_index);
107 ip_poison_adjacencies (adj, adj->n_adj);
109 heap_dealloc (lm->adjacency_heap, handle);
112 void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index)
113 { ip_del_adjacency2 (lm, adj_index, /* delete_multipath_adjacency */ 1); }
116 next_hop_sort_by_weight (ip_multipath_next_hop_t * n1,
117 ip_multipath_next_hop_t * n2)
119 int cmp = (int) n1->weight - (int) n2->weight;
121 ? (int) n1->next_hop_adj_index - (int) n2->next_hop_adj_index
122 : (cmp > 0 ? +1 : -1));
125 /* Given next hop vector is over-written with normalized one with sorted weights and
126 with weights corresponding to the number of adjacencies for each next hop.
127 Returns number of adjacencies in block. */
128 static u32 ip_multipath_normalize_next_hops (ip_lookup_main_t * lm,
129 ip_multipath_next_hop_t * raw_next_hops,
130 ip_multipath_next_hop_t ** normalized_next_hops)
132 ip_multipath_next_hop_t * nhs;
133 uword n_nhs, n_adj, n_adj_left, i;
134 f64 sum_weight, norm, error;
136 n_nhs = vec_len (raw_next_hops);
141 /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
142 nhs = *normalized_next_hops;
143 vec_validate (nhs, 2*n_nhs - 1);
145 /* Fast path: 1 next hop in block. */
149 nhs[0] = raw_next_hops[0];
157 int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
160 nhs[0] = raw_next_hops[cmp];
161 nhs[1] = raw_next_hops[cmp ^ 1];
163 /* Fast path: equal cost multipath with 2 next hops. */
164 if (nhs[0].weight == nhs[1].weight)
166 nhs[0].weight = nhs[1].weight = 1;
173 memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
174 qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
177 /* Find total weight to normalize weights. */
179 for (i = 0; i < n_nhs; i++)
180 sum_weight += nhs[i].weight;
182 /* In the unlikely case that all weights are given as 0, set them all to 1. */
185 for (i = 0; i < n_nhs; i++)
190 /* Save copies of all next hop weights to avoid being overwritten in loop below. */
191 for (i = 0; i < n_nhs; i++)
192 nhs[n_nhs + i].weight = nhs[i].weight;
194 /* Try larger and larger power of 2 sized adjacency blocks until we
195 find one where traffic flows to within 1% of specified weights. */
196 for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
200 norm = n_adj / sum_weight;
202 for (i = 0; i < n_nhs; i++)
204 f64 nf = nhs[n_nhs + i].weight * norm; /* use saved weights */
205 word n = flt_round_nearest (nf);
207 n = n > n_adj_left ? n_adj_left : n;
209 error += fabs (nf - n);
213 nhs[0].weight += n_adj_left;
215 /* Less than 5% average error per adjacency with this size adjacency block? */
216 if (error <= lm->multipath_next_hop_error_tolerance*n_adj)
218 /* Truncate any next hops with zero weight. */
225 /* Save vector for next call. */
226 *normalized_next_hops = nhs;
231 ip_next_hop_hash_key_from_handle (uword handle)
232 { return 1 + 2*handle; }
235 ip_next_hop_hash_key_is_heap_handle (uword k)
239 ip_next_hop_hash_key_get_heap_handle (uword k)
241 ASSERT (ip_next_hop_hash_key_is_heap_handle (k));
246 ip_multipath_adjacency_get (ip_lookup_main_t * lm,
247 ip_multipath_next_hop_t * raw_next_hops,
248 uword create_if_non_existent)
251 u32 i, j, n_adj, adj_index, adj_heap_handle;
252 ip_adjacency_t * adj, * copy_adj;
253 ip_multipath_next_hop_t * nh, * nhs;
254 ip_multipath_adjacency_t * madj;
256 n_adj = ip_multipath_normalize_next_hops (lm, raw_next_hops, &lm->next_hop_hash_lookup_key_normalized);
257 nhs = lm->next_hop_hash_lookup_key_normalized;
260 ASSERT (n_adj >= vec_len (raw_next_hops));
262 /* Use normalized next hops to see if we've seen a block equivalent to this one before. */
263 p = hash_get_mem (lm->multipath_adjacency_by_next_hops, nhs);
267 if (! create_if_non_existent)
270 adj = ip_add_adjacency (lm, /* copy_adj */ 0, n_adj, &adj_index);
271 adj_heap_handle = adj[0].heap_handle;
273 /* Fill in adjacencies in block based on corresponding next hop adjacencies. */
275 vec_foreach (nh, nhs)
277 copy_adj = ip_get_adjacency (lm, nh->next_hop_adj_index);
278 for (j = 0; j < nh->weight; j++)
280 adj[i] = copy_adj[0];
281 adj[i].heap_handle = adj_heap_handle;
282 adj[i].n_adj = n_adj;
287 /* All adjacencies should have been initialized. */
290 vec_validate (lm->multipath_adjacencies, adj_heap_handle);
291 madj = vec_elt_at_index (lm->multipath_adjacencies, adj_heap_handle);
293 madj->adj_index = adj_index;
294 madj->n_adj_in_block = n_adj;
295 madj->reference_count = 0; /* caller will set to one. */
297 madj->normalized_next_hops.count = vec_len (nhs);
298 madj->normalized_next_hops.heap_offset
299 = heap_alloc (lm->next_hop_heap, vec_len (nhs),
300 madj->normalized_next_hops.heap_handle);
301 memcpy (lm->next_hop_heap + madj->normalized_next_hops.heap_offset,
302 nhs, vec_bytes (nhs));
304 hash_set (lm->multipath_adjacency_by_next_hops,
305 ip_next_hop_hash_key_from_handle (madj->normalized_next_hops.heap_handle),
306 madj - lm->multipath_adjacencies);
308 madj->unnormalized_next_hops.count = vec_len (raw_next_hops);
309 madj->unnormalized_next_hops.heap_offset
310 = heap_alloc (lm->next_hop_heap, vec_len (raw_next_hops),
311 madj->unnormalized_next_hops.heap_handle);
312 memcpy (lm->next_hop_heap + madj->unnormalized_next_hops.heap_offset,
313 raw_next_hops, vec_bytes (raw_next_hops));
315 ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
317 return adj_heap_handle;
320 /* Returns 0 for next hop not found. */
322 ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm,
324 u32 old_mp_adj_index,
325 u32 next_hop_adj_index,
327 u32 * new_mp_adj_index)
329 ip_multipath_adjacency_t * mp_old, * mp_new;
330 ip_multipath_next_hop_t * nh, * nhs, * hash_nhs;
338 /* If old multipath adjacency is valid, find requested next hop. */
339 if (old_mp_adj_index < vec_len (lm->multipath_adjacencies)
340 && lm->multipath_adjacencies[old_mp_adj_index].normalized_next_hops.count > 0)
342 mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
344 nhs = vec_elt_at_index (lm->next_hop_heap, mp_old->unnormalized_next_hops.heap_offset);
345 n_nhs = mp_old->unnormalized_next_hops.count;
347 /* Linear search: ok since n_next_hops is small. */
348 for (i_nh = 0; i_nh < n_nhs; i_nh++)
349 if (nhs[i_nh].next_hop_adj_index == next_hop_adj_index)
352 /* Given next hop not found. */
353 if (i_nh >= n_nhs && is_del)
357 hash_nhs = lm->next_hop_hash_lookup_key;
359 _vec_len (hash_nhs) = 0;
365 /* Prepare lookup key for multipath with target next hop deleted. */
367 vec_add (hash_nhs, nhs + 0, i_nh);
368 if (i_nh + 1 < n_nhs)
369 vec_add (hash_nhs, nhs + i_nh + 1, n_nhs - (i_nh + 1));
372 else /* it's an add. */
374 /* If next hop is already there with the same weight, we have nothing to do. */
375 if (i_nh < n_nhs && nhs[i_nh].weight == next_hop_weight)
377 new_mp_adj_index[0] = ~0;
381 /* Copy old next hops to lookup key vector. */
383 vec_add (hash_nhs, nhs, n_nhs);
387 /* Change weight of existing next hop. */
388 nh = vec_elt_at_index (hash_nhs, i_nh);
392 /* Add a new next hop. */
393 vec_add2 (hash_nhs, nh, 1);
394 nh->next_hop_adj_index = next_hop_adj_index;
397 /* Set weight for added or old next hop. */
398 nh->weight = next_hop_weight;
401 if (vec_len (hash_nhs) > 0)
403 u32 tmp = ip_multipath_adjacency_get (lm, hash_nhs,
404 /* create_if_non_existent */ 1);
406 mp_new = vec_elt_at_index (lm->multipath_adjacencies, tmp);
408 /* Fetch again since pool may have moved. */
410 mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
413 new_mp_adj_index[0] = mp_new ? mp_new - lm->multipath_adjacencies : ~0;
415 if (mp_new != mp_old)
419 ASSERT (mp_old->reference_count > 0);
420 mp_old->reference_count -= 1;
423 mp_new->reference_count += 1;
426 if (mp_old && mp_old->reference_count == 0)
427 ip_multipath_adjacency_free (lm, mp_old);
430 /* Save key vector next call. */
431 lm->next_hop_hash_lookup_key = hash_nhs;
437 ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index)
439 ip_adjacency_t * adj = ip_get_adjacency (lm, del_adj_index);
440 ip_multipath_adjacency_t * madj, * new_madj;
441 ip_multipath_next_hop_t * nhs, * hash_nhs;
442 u32 i, n_nhs, madj_index, new_madj_index;
444 if (adj->heap_handle >= vec_len (lm->multipath_adjacencies))
447 vec_validate (lm->adjacency_remap_table, vec_len (lm->adjacency_heap) - 1);
449 for (madj_index = 0; madj_index < vec_len (lm->multipath_adjacencies); madj_index++)
451 madj = vec_elt_at_index (lm->multipath_adjacencies, madj_index);
452 if (madj->n_adj_in_block == 0)
455 nhs = heap_elt_at_index (lm->next_hop_heap, madj->unnormalized_next_hops.heap_offset);
456 n_nhs = madj->unnormalized_next_hops.count;
457 for (i = 0; i < n_nhs; i++)
458 if (nhs[i].next_hop_adj_index == del_adj_index)
461 /* del_adj_index not found in unnormalized_next_hops? We're done. */
468 hash_nhs = lm->next_hop_hash_lookup_key;
470 _vec_len (hash_nhs) = 0;
472 vec_add (hash_nhs, nhs + 0, i);
474 vec_add (hash_nhs, nhs + i + 1, n_nhs - (i + 1));
476 new_madj_index = ip_multipath_adjacency_get (lm, hash_nhs, /* create_if_non_existent */ 1);
478 lm->next_hop_hash_lookup_key = hash_nhs;
480 if (new_madj_index == madj_index)
483 new_madj = vec_elt_at_index (lm->multipath_adjacencies, new_madj_index);
486 lm->adjacency_remap_table[madj->adj_index] = new_madj ? 1 + new_madj->adj_index : ~0;
487 lm->n_adjacency_remaps += 1;
488 ip_multipath_adjacency_free (lm, madj);
493 ip_multipath_adjacency_free (ip_lookup_main_t * lm,
494 ip_multipath_adjacency_t * a)
496 hash_unset (lm->multipath_adjacency_by_next_hops,
497 ip_next_hop_hash_key_from_handle (a->normalized_next_hops.heap_handle));
498 heap_dealloc (lm->next_hop_heap, a->normalized_next_hops.heap_handle);
499 heap_dealloc (lm->next_hop_heap, a->unnormalized_next_hops.heap_handle);
501 ip_del_adjacency2 (lm, a->adj_index, a->reference_count == 0);
502 memset (a, 0, sizeof (a[0]));
505 always_inline ip_multipath_next_hop_t *
506 ip_next_hop_hash_key_get_next_hops (ip_lookup_main_t * lm, uword k,
509 ip_multipath_next_hop_t * nhs;
511 if (ip_next_hop_hash_key_is_heap_handle (k))
513 uword handle = ip_next_hop_hash_key_get_heap_handle (k);
514 nhs = heap_elt_with_handle (lm->next_hop_heap, handle);
515 n_nhs = heap_len (lm->next_hop_heap, handle);
519 nhs = uword_to_pointer (k, ip_multipath_next_hop_t *);
520 n_nhs = vec_len (nhs);
522 *n_next_hops = n_nhs;
527 ip_next_hop_hash_key_sum (hash_t * h, uword key0)
529 ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *);
530 ip_multipath_next_hop_t * k0;
533 k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0);
534 return hash_memory (k0, n0 * sizeof (k0[0]), /* seed */ n0);
538 ip_next_hop_hash_key_equal (hash_t * h, uword key0, uword key1)
540 ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *);
541 ip_multipath_next_hop_t * k0, * k1;
544 k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0);
545 k1 = ip_next_hop_hash_key_get_next_hops (lm, key1, &n1);
547 return n0 == n1 && ! memcmp (k0, k1, n0 * sizeof (k0[0]));
551 ip_interface_address_add_del (ip_lookup_main_t * lm,
556 u32 * result_if_address_index)
558 vnet_main_t * vnm = vnet_get_main();
559 ip_interface_address_t * a, * prev, * next;
560 uword * p = mhash_get (&lm->address_to_if_address_index, addr_fib);
562 vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
563 a = p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
565 /* Verify given length. */
566 if ((a && (address_length != a->address_length)) || (address_length == 0))
568 vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
569 return clib_error_create
570 ( "%U wrong length (expected %d) for interface %U",
571 lm->format_address_and_length, addr_fib,
572 address_length, a? a->address_length : -1,
573 format_vnet_sw_if_index_name, vnm, sw_if_index);
580 vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, sw_if_index);
581 vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
582 return clib_error_create ("%U not found for interface %U",
583 lm->format_address_and_length,
584 addr_fib, address_length,
585 format_vnet_sw_interface_name, vnm, si);
588 if (a->prev_this_sw_interface != ~0)
590 prev = pool_elt_at_index (lm->if_address_pool, a->prev_this_sw_interface);
591 prev->next_this_sw_interface = a->next_this_sw_interface;
593 if (a->next_this_sw_interface != ~0)
595 next = pool_elt_at_index (lm->if_address_pool, a->next_this_sw_interface);
596 next->prev_this_sw_interface = a->prev_this_sw_interface;
598 if(a->prev_this_sw_interface == ~0)
599 lm->if_address_pool_index_by_sw_if_index[sw_if_index] = a->next_this_sw_interface;
602 if ((a->next_this_sw_interface == ~0) && (a->prev_this_sw_interface == ~0))
603 lm->if_address_pool_index_by_sw_if_index[sw_if_index] = ~0;
605 mhash_unset (&lm->address_to_if_address_index, addr_fib,
607 pool_put (lm->if_address_pool, a);
609 if (result_if_address_index)
610 *result_if_address_index = ~0;
615 u32 pi; /* previous index */
617 u32 hi; /* head index */
619 pool_get (lm->if_address_pool, a);
620 memset (a, ~0, sizeof (a[0]));
621 ai = a - lm->if_address_pool;
623 hi = pi = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
625 while (pi != (u32)~0)
627 prev = pool_elt_at_index(lm->if_address_pool, pi);
628 pi = prev->next_this_sw_interface;
630 pi = prev ? prev - lm->if_address_pool : (u32)~0;
632 a->address_key = mhash_set (&lm->address_to_if_address_index,
633 addr_fib, ai, /* old_value */ 0);
634 a->address_length = address_length;
635 a->sw_if_index = sw_if_index;
637 a->prev_this_sw_interface = pi;
638 a->next_this_sw_interface = ~0;
640 prev->next_this_sw_interface = ai;
642 lm->if_address_pool_index_by_sw_if_index[sw_if_index] =
643 (hi != ~0) ? hi : ai;
644 if (result_if_address_index)
645 *result_if_address_index = ai;
649 if (result_if_address_index)
650 *result_if_address_index = a - lm->if_address_pool;
654 return /* no error */ 0;
657 void serialize_vec_ip_adjacency (serialize_main_t * m, va_list * va)
659 ip_adjacency_t * a = va_arg (*va, ip_adjacency_t *);
660 u32 n = va_arg (*va, u32);
662 for (i = 0; i < n; i++)
664 serialize_integer (m, a[i].heap_handle, sizeof (a[i].heap_handle));
665 serialize_integer (m, a[i].n_adj, sizeof (a[i].n_adj));
666 serialize_integer (m, a[i].lookup_next_index, sizeof (a[i].lookup_next_index_as_int));
667 switch (a[i].lookup_next_index)
669 case IP_LOOKUP_NEXT_LOCAL:
670 serialize_integer (m, a[i].if_address_index, sizeof (a[i].if_address_index));
673 case IP_LOOKUP_NEXT_ARP:
674 serialize_integer (m, a[i].if_address_index, sizeof (a[i].if_address_index));
675 serialize_integer (m, a[i].rewrite_header.sw_if_index, sizeof (a[i].rewrite_header.sw_if_index));
678 case IP_LOOKUP_NEXT_REWRITE:
679 serialize (m, serialize_vnet_rewrite, &a[i].rewrite_header, sizeof (a[i].rewrite_data));
683 /* nothing else to serialize. */
689 void unserialize_vec_ip_adjacency (serialize_main_t * m, va_list * va)
691 ip_adjacency_t * a = va_arg (*va, ip_adjacency_t *);
692 u32 n = va_arg (*va, u32);
694 ip_poison_adjacencies (a, n);
695 for (i = 0; i < n; i++)
697 unserialize_integer (m, &a[i].heap_handle, sizeof (a[i].heap_handle));
698 unserialize_integer (m, &a[i].n_adj, sizeof (a[i].n_adj));
699 unserialize_integer (m, &a[i].lookup_next_index_as_int, sizeof (a[i].lookup_next_index_as_int));
700 switch (a[i].lookup_next_index)
702 case IP_LOOKUP_NEXT_LOCAL:
703 unserialize_integer (m, &a[i].if_address_index, sizeof (a[i].if_address_index));
706 case IP_LOOKUP_NEXT_ARP:
707 unserialize_integer (m, &a[i].if_address_index, sizeof (a[i].if_address_index));
708 unserialize_integer (m, &a[i].rewrite_header.sw_if_index, sizeof (a[i].rewrite_header.sw_if_index));
711 case IP_LOOKUP_NEXT_REWRITE:
712 unserialize (m, unserialize_vnet_rewrite, &a[i].rewrite_header, sizeof (a[i].rewrite_data));
716 /* nothing else to unserialize. */
722 static void serialize_vec_ip_multipath_next_hop (serialize_main_t * m, va_list * va)
724 ip_multipath_next_hop_t * nh = va_arg (*va, ip_multipath_next_hop_t *);
725 u32 n = va_arg (*va, u32);
727 for (i = 0; i < n; i++)
729 serialize_integer (m, nh[i].next_hop_adj_index, sizeof (nh[i].next_hop_adj_index));
730 serialize_integer (m, nh[i].weight, sizeof (nh[i].weight));
734 static void unserialize_vec_ip_multipath_next_hop (serialize_main_t * m, va_list * va)
736 ip_multipath_next_hop_t * nh = va_arg (*va, ip_multipath_next_hop_t *);
737 u32 n = va_arg (*va, u32);
739 for (i = 0; i < n; i++)
741 unserialize_integer (m, &nh[i].next_hop_adj_index, sizeof (nh[i].next_hop_adj_index));
742 unserialize_integer (m, &nh[i].weight, sizeof (nh[i].weight));
746 static void serialize_vec_ip_multipath_adjacency (serialize_main_t * m, va_list * va)
748 ip_multipath_adjacency_t * a = va_arg (*va, ip_multipath_adjacency_t *);
749 u32 n = va_arg (*va, u32);
751 for (i = 0; i < n; i++)
753 #define foreach_ip_multipath_adjacency_field \
754 _ (adj_index) _ (n_adj_in_block) _ (reference_count) \
755 _ (normalized_next_hops.count) \
756 _ (normalized_next_hops.heap_offset) \
757 _ (normalized_next_hops.heap_handle) \
758 _ (unnormalized_next_hops.count) \
759 _ (unnormalized_next_hops.heap_offset) \
760 _ (unnormalized_next_hops.heap_handle)
762 #define _(f) serialize_integer (m, a[i].f, sizeof (a[i].f));
763 foreach_ip_multipath_adjacency_field;
768 static void unserialize_vec_ip_multipath_adjacency (serialize_main_t * m, va_list * va)
770 ip_multipath_adjacency_t * a = va_arg (*va, ip_multipath_adjacency_t *);
771 u32 n = va_arg (*va, u32);
773 for (i = 0; i < n; i++)
775 #define _(f) unserialize_integer (m, &a[i].f, sizeof (a[i].f));
776 foreach_ip_multipath_adjacency_field;
781 void serialize_ip_lookup_main (serialize_main_t * m, va_list * va)
783 ip_lookup_main_t * lm = va_arg (*va, ip_lookup_main_t *);
785 /* If this isn't true you need to call e.g. ip4_maybe_remap_adjacencies
787 ASSERT (lm->n_adjacency_remaps == 0);
789 serialize (m, serialize_heap, lm->adjacency_heap, serialize_vec_ip_adjacency);
791 serialize (m, serialize_heap, lm->next_hop_heap, serialize_vec_ip_multipath_next_hop);
792 vec_serialize (m, lm->multipath_adjacencies, serialize_vec_ip_multipath_adjacency);
794 /* Adjacency counters (FIXME disabled for now). */
796 serialize (m, serialize_vlib_combined_counter_main, &lm->adjacency_counters, /* incremental */ 0);
799 void unserialize_ip_lookup_main (serialize_main_t * m, va_list * va)
801 ip_lookup_main_t * lm = va_arg (*va, ip_lookup_main_t *);
803 unserialize (m, unserialize_heap, &lm->adjacency_heap, unserialize_vec_ip_adjacency);
804 unserialize (m, unserialize_heap, &lm->next_hop_heap, unserialize_vec_ip_multipath_next_hop);
805 vec_unserialize (m, &lm->multipath_adjacencies, unserialize_vec_ip_multipath_adjacency);
807 /* Build hash table from unserialized data. */
809 ip_multipath_adjacency_t * a;
811 vec_foreach (a, lm->multipath_adjacencies)
813 if (a->n_adj_in_block > 0 && a->reference_count > 0)
814 hash_set (lm->multipath_adjacency_by_next_hops,
815 ip_next_hop_hash_key_from_handle (a->normalized_next_hops.heap_handle),
816 a - lm->multipath_adjacencies);
820 /* Validate adjacency counters. */
821 vlib_validate_combined_counter (&lm->adjacency_counters,
822 vec_len (lm->adjacency_heap) - 1);
824 /* Adjacency counters (FIXME disabled for now). */
826 unserialize (m, unserialize_vlib_combined_counter_main, &lm->adjacency_counters, /* incremental */ 0);
829 void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6)
831 ip_adjacency_t * adj;
833 /* Hand-craft special miss adjacency to use when nothing matches in the
834 routing table. Same for drop adjacency. */
835 adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, &lm->miss_adj_index);
836 adj->lookup_next_index = IP_LOOKUP_NEXT_MISS;
837 ASSERT (lm->miss_adj_index == IP_LOOKUP_MISS_ADJ_INDEX);
839 adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, &lm->drop_adj_index);
840 adj->lookup_next_index = IP_LOOKUP_NEXT_DROP;
842 adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, &lm->local_adj_index);
843 adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
844 adj->if_address_index = ~0;
846 if (! lm->fib_result_n_bytes)
847 lm->fib_result_n_bytes = sizeof (uword);
849 lm->multipath_adjacency_by_next_hops
850 = hash_create2 (/* elts */ 0,
851 /* user */ pointer_to_uword (lm),
852 /* value_bytes */ sizeof (uword),
853 ip_next_hop_hash_key_sum,
854 ip_next_hop_hash_key_equal,
855 /* format pair/arg */
858 /* 1% max error tolerance for multipath. */
859 lm->multipath_next_hop_error_tolerance = .01;
864 lm->format_address_and_length = format_ip6_address_and_length;
865 mhash_init (&lm->address_to_if_address_index, sizeof (uword),
866 sizeof (ip6_address_fib_t));
870 lm->format_address_and_length = format_ip4_address_and_length;
871 mhash_init (&lm->address_to_if_address_index, sizeof (uword),
872 sizeof (ip4_address_fib_t));
878 /* Setup all IP protocols to be punted and builtin-unknown. */
879 for (i = 0; i < 256; i++)
881 lm->local_next_by_ip_protocol[i] = IP_LOCAL_NEXT_PUNT;
882 lm->builtin_protocol_by_ip_protocol[i] = IP_BUILTIN_PROTOCOL_UNKNOWN;
885 /* Eliot's TCP doesn't actually work */
886 lm->local_next_by_ip_protocol[IP_PROTOCOL_TCP] = IP_LOCAL_NEXT_TCP_LOOKUP;
887 lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_TCP] =
888 IP_BUILTIN_PROTOCOL_TCP;
891 lm->local_next_by_ip_protocol[IP_PROTOCOL_UDP] = IP_LOCAL_NEXT_UDP_LOOKUP;
892 lm->local_next_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : IP_PROTOCOL_ICMP] = IP_LOCAL_NEXT_ICMP;
893 lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_UDP] = IP_BUILTIN_PROTOCOL_UDP;
894 lm->builtin_protocol_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : IP_PROTOCOL_ICMP] = IP_BUILTIN_PROTOCOL_ICMP;
898 u8 * format_ip_flow_hash_config (u8 * s, va_list * args)
900 u32 flow_hash_config = va_arg (*args, u32);
902 #define _(n,v) if (flow_hash_config & v) s = format (s, "%s ", #n);
903 foreach_flow_hash_bit;
909 u8 * format_ip_lookup_next (u8 * s, va_list * args)
911 ip_lookup_next_t n = va_arg (*args, ip_lookup_next_t);
917 s = format (s, "unknown %d", n);
920 case IP_LOOKUP_NEXT_MISS: t = "miss"; break;
921 case IP_LOOKUP_NEXT_DROP: t = "drop"; break;
922 case IP_LOOKUP_NEXT_PUNT: t = "punt"; break;
923 case IP_LOOKUP_NEXT_LOCAL: t = "local"; break;
924 case IP_LOOKUP_NEXT_ARP: t = "arp"; break;
925 case IP_LOOKUP_NEXT_CLASSIFY: t = "classify"; break;
926 case IP_LOOKUP_NEXT_MAP: t = "map"; break;
927 case IP_LOOKUP_NEXT_MAP_T: t = "map-t"; break;
928 case IP_LOOKUP_NEXT_SIXRD: t = "sixrd"; break;
929 case IP_LOOKUP_NEXT_REWRITE:
934 vec_add (s, t, strlen (t));
939 static u8 * format_ip_interface_address (u8 * s, va_list * args)
941 ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
942 u32 if_address_index = va_arg (*args, u32);
943 ip_interface_address_t * ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
944 void * a = ip_interface_address_get_address (lm, ia);
947 return format (s, "%U", format_ip6_address_and_length, a, ia->address_length);
949 return format (s, "%U", format_ip4_address_and_length, a, ia->address_length);
952 u8 * format_ip_adjacency (u8 * s, va_list * args)
954 vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
955 ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
956 u32 adj_index = va_arg (*args, u32);
957 ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
959 switch (adj->lookup_next_index)
961 case IP_LOOKUP_NEXT_REWRITE:
964 vnm->vlib_main, &adj->rewrite_header, sizeof (adj->rewrite_data));
968 s = format (s, "%U", format_ip_lookup_next, adj->lookup_next_index);
969 if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP)
970 s = format (s, " %U",
971 format_vnet_sw_interface_name,
973 vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index));
974 switch (adj->lookup_next_index)
976 case IP_LOOKUP_NEXT_ARP:
977 case IP_LOOKUP_NEXT_LOCAL:
978 if (adj->if_address_index != ~0)
979 s = format (s, " %U", format_ip_interface_address, lm, adj->if_address_index);
982 case IP_LOOKUP_NEXT_CLASSIFY:
983 s = format (s, " table %d", adj->classify_table_index);
990 if (adj->explicit_fib_index != ~0 && adj->explicit_fib_index != 0)
991 s = format (s, " lookup fib index %d", adj->explicit_fib_index);
996 u8 * format_ip_adjacency_packet_data (u8 * s, va_list * args)
998 vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
999 ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
1000 u32 adj_index = va_arg (*args, u32);
1001 u8 * packet_data = va_arg (*args, u8 *);
1002 u32 n_packet_data_bytes = va_arg (*args, u32);
1003 ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
1005 switch (adj->lookup_next_index)
1007 case IP_LOOKUP_NEXT_REWRITE:
1008 s = format (s, "%U",
1009 format_vnet_rewrite_header,
1010 vnm->vlib_main, &adj->rewrite_header, packet_data, n_packet_data_bytes);
1020 static uword unformat_ip_lookup_next (unformat_input_t * input, va_list * args)
1022 ip_lookup_next_t * result = va_arg (*args, ip_lookup_next_t *);
1025 if (unformat (input, "drop"))
1026 n = IP_LOOKUP_NEXT_DROP;
1028 else if (unformat (input, "punt"))
1029 n = IP_LOOKUP_NEXT_PUNT;
1031 else if (unformat (input, "local"))
1032 n = IP_LOOKUP_NEXT_LOCAL;
1034 else if (unformat (input, "arp"))
1035 n = IP_LOOKUP_NEXT_ARP;
1037 else if (unformat (input, "classify"))
1038 n = IP_LOOKUP_NEXT_CLASSIFY;
1047 static uword unformat_ip_adjacency (unformat_input_t * input, va_list * args)
1049 vlib_main_t * vm = va_arg (*args, vlib_main_t *);
1050 ip_adjacency_t * adj = va_arg (*args, ip_adjacency_t *);
1051 u32 node_index = va_arg (*args, u32);
1052 vnet_main_t * vnm = vnet_get_main();
1053 u32 sw_if_index, is_ip6;
1055 ip_lookup_next_t next;
1057 is_ip6 = node_index == ip6_rewrite_node.index;
1058 adj->rewrite_header.node_index = node_index;
1059 adj->explicit_fib_index = ~0;
1061 if (unformat (input, "arp %U %U",
1062 unformat_vnet_sw_interface, vnm, &sw_if_index,
1063 unformat_ip46_address, &a46, is_ip6))
1065 ip_lookup_main_t * lm = is_ip6 ? &ip6_main.lookup_main : &ip4_main.lookup_main;
1066 ip_adjacency_t * a_adj;
1070 adj_index = ip6_fib_lookup (&ip6_main, sw_if_index, &a46.ip6);
1072 adj_index = ip4_fib_lookup (&ip4_main, sw_if_index, &a46.ip4);
1074 a_adj = ip_get_adjacency (lm, adj_index);
1076 if (a_adj->rewrite_header.sw_if_index != sw_if_index)
1080 ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index);
1082 ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index);
1085 else if (unformat_user (input, unformat_ip_lookup_next, &next))
1087 adj->lookup_next_index = next;
1088 adj->if_address_index = ~0;
1089 if (next == IP_LOOKUP_NEXT_LOCAL)
1090 (void) unformat (input, "%d", &adj->if_address_index);
1091 else if (next == IP_LOOKUP_NEXT_CLASSIFY)
1092 if (!unformat (input, "%d", &adj->classify_table_index))
1094 clib_warning ("classify adj must specify table index");
1099 else if (unformat_user (input,
1100 unformat_vnet_rewrite,
1101 vm, &adj->rewrite_header, sizeof (adj->rewrite_data)))
1102 adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
1111 vnet_ip_route_cmd (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_command_t * cmd)
1113 vnet_main_t * vnm = vnet_get_main();
1114 clib_error_t * error = 0;
1115 u32 table_id, is_del;
1116 u32 weight, * weights = 0;
1117 u32 * table_ids = 0;
1118 u32 sw_if_index, * sw_if_indices = 0;
1119 ip4_address_t ip4_addr, * ip4_dst_addresses = 0, * ip4_via_next_hops = 0;
1120 ip6_address_t ip6_addr, * ip6_dst_addresses = 0, * ip6_via_next_hops = 0;
1121 u32 dst_address_length, * dst_address_lengths = 0;
1122 ip_adjacency_t parse_adj, * add_adj = 0;
1123 unformat_input_t _line_input, * line_input = &_line_input;
1131 /* Get a line of input. */
1132 if (! unformat_user (main_input, unformat_line_input, line_input))
1135 memset(&parse_adj, 0, sizeof (parse_adj));
1137 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1139 if (unformat (line_input, "table %d", &table_id))
1141 else if (unformat (line_input, "del"))
1143 else if (unformat (line_input, "add"))
1145 else if (unformat (line_input, "count %f", &count))
1148 else if (unformat (line_input, "%U/%d",
1149 unformat_ip4_address, &ip4_addr,
1150 &dst_address_length))
1152 vec_add1 (ip4_dst_addresses, ip4_addr);
1153 vec_add1 (dst_address_lengths, dst_address_length);
1156 else if (unformat (line_input, "%U/%d",
1157 unformat_ip6_address, &ip6_addr,
1158 &dst_address_length))
1160 vec_add1 (ip6_dst_addresses, ip6_addr);
1161 vec_add1 (dst_address_lengths, dst_address_length);
1164 else if (unformat (line_input, "via %U %U weight %u",
1165 unformat_ip4_address, &ip4_addr,
1166 unformat_vnet_sw_interface, vnm, &sw_if_index,
1169 vec_add1 (ip4_via_next_hops, ip4_addr);
1170 vec_add1 (sw_if_indices, sw_if_index);
1171 vec_add1 (weights, weight);
1172 vec_add1 (table_ids, (u32)~0);
1175 else if (unformat (line_input, "via %U %U weight %u",
1176 unformat_ip6_address, &ip6_addr,
1177 unformat_vnet_sw_interface, vnm, &sw_if_index,
1180 vec_add1 (ip6_via_next_hops, ip6_addr);
1181 vec_add1 (sw_if_indices, sw_if_index);
1182 vec_add1 (weights, weight);
1183 vec_add1 (table_ids, (u32)~0);
1186 else if (unformat (line_input, "via %U %U",
1187 unformat_ip4_address, &ip4_addr,
1188 unformat_vnet_sw_interface, vnm, &sw_if_index))
1190 vec_add1 (ip4_via_next_hops, ip4_addr);
1191 vec_add1 (sw_if_indices, sw_if_index);
1192 vec_add1 (weights, 1);
1193 vec_add1 (table_ids, (u32)~0);
1196 else if (unformat (line_input, "via %U %U",
1197 unformat_ip6_address, &ip6_addr,
1198 unformat_vnet_sw_interface, vnm, &sw_if_index))
1200 vec_add1 (ip6_via_next_hops, ip6_addr);
1201 vec_add1 (sw_if_indices, sw_if_index);
1202 vec_add1 (weights, 1);
1203 vec_add1 (table_ids, (u32)~0);
1205 else if (unformat (line_input, "via %U",
1206 unformat_ip4_address, &ip4_addr))
1208 vec_add1 (ip4_via_next_hops, ip4_addr);
1209 vec_add1 (sw_if_indices, (u32)~0);
1210 vec_add1 (weights, 1);
1211 vec_add1 (table_ids, table_id);
1213 else if (unformat (line_input, "via %U",
1214 unformat_ip6_address, &ip6_addr))
1216 vec_add1 (ip6_via_next_hops, ip6_addr);
1217 vec_add1 (sw_if_indices, (u32)~0);
1218 vec_add1 (weights, 1);
1219 vec_add1 (table_ids, (u32)table_id);
1222 else if (vec_len (ip4_dst_addresses) > 0
1223 && unformat (line_input, "via %U",
1224 unformat_ip_adjacency, vm, &parse_adj, ip4_rewrite_node.index))
1225 vec_add1 (add_adj, parse_adj);
1227 else if (vec_len (ip6_dst_addresses) > 0
1228 && unformat (line_input, "via %U",
1229 unformat_ip_adjacency, vm, &parse_adj, ip6_rewrite_node.index))
1230 vec_add1 (add_adj, parse_adj);
1231 else if (unformat (line_input, "lookup in table %d", &outer_table_id))
1235 if (vec_len (ip4_dst_addresses) > 0)
1236 p = hash_get (ip4_main.fib_index_by_table_id, outer_table_id);
1238 p = hash_get (ip6_main.fib_index_by_table_id, outer_table_id);
1242 error = clib_error_return (0, "Nonexistent outer table id %d",
1247 parse_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1248 parse_adj.explicit_fib_index = p[0];
1249 vec_add1 (add_adj, parse_adj);
1253 error = unformat_parse_error (line_input);
1258 unformat_free (line_input);
1260 if (vec_len (ip4_dst_addresses) + vec_len (ip6_dst_addresses) == 0)
1262 error = clib_error_return (0, "expected ip4/ip6 destination address/length.");
1266 if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_dst_addresses) > 0)
1268 error = clib_error_return (0, "mixed ip4/ip6 address/length.");
1272 if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_via_next_hops) > 0)
1274 error = clib_error_return (0, "ip4 destinations with ip6 next hops.");
1278 if (vec_len (ip6_dst_addresses) > 0 && vec_len (ip4_via_next_hops) > 0)
1280 error = clib_error_return (0, "ip6 destinations with ip4 next hops.");
1284 if (! is_del && vec_len (add_adj) + vec_len (weights) == 0)
1286 error = clib_error_return (0, "no next hops or adjacencies to add.");
1290 if (vec_len(ip4_via_next_hops))
1292 if (sw_if_indices[0] == (u32)~0)
1297 ip_adjacency_t *nh_adj;
1299 p = hash_get (ip4_main.fib_index_by_table_id, table_ids[0]);
1302 error = clib_error_return (0, "Nonexistent FIB id %d",
1309 ai = ip4_fib_lookup_with_table (&ip4_main,
1312 1 /* disable default route */);
1315 error = clib_error_return (0, "next hop %U not in FIB",
1320 nh_adj = ip_get_adjacency (&ip4_main.lookup_main, ai);
1321 vec_add1 (add_adj, nh_adj[0]);
1324 if (vec_len(ip6_via_next_hops))
1326 if (sw_if_indices[0] == (u32)~0)
1331 ip_adjacency_t *nh_adj;
1333 p = hash_get (ip6_main.fib_index_by_table_id, table_ids[0]);
1336 error = clib_error_return (0, "Nonexistent FIB id %d",
1342 ai = ip6_fib_lookup_with_table (&ip6_main,
1347 error = clib_error_return (0, "next hop %U not in FIB",
1352 nh_adj = ip_get_adjacency (&ip6_main.lookup_main, ai);
1353 vec_add1 (add_adj, nh_adj[0]);
1359 ip4_main_t * im4 = &ip4_main;
1360 ip6_main_t * im6 = &ip6_main;
1362 for (i = 0; i < vec_len (ip4_dst_addresses); i++)
1364 ip4_add_del_route_args_t a;
1366 memset (&a, 0, sizeof (a));
1367 a.flags = IP4_ROUTE_FLAG_TABLE_ID;
1368 a.table_index_or_table_id = table_id;
1369 a.dst_address = ip4_dst_addresses[i];
1370 a.dst_address_length = dst_address_lengths[i];
1375 if (vec_len (ip4_via_next_hops) == 0)
1377 uword * dst_hash, * dst_result;
1378 u32 dst_address_u32;
1381 fib = find_ip4_fib_by_table_index_or_id (im4, table_id,
1382 0 /* by table id */);
1384 a.flags |= IP4_ROUTE_FLAG_DEL;
1385 dst_address_u32 = a.dst_address.as_u32
1386 & im4->fib_masks[a.dst_address_length];
1389 fib->adj_index_by_dst_address[a.dst_address_length];
1390 dst_result = hash_get (dst_hash, dst_address_u32);
1392 a.adj_index = dst_result[0];
1395 clib_warning ("%U/%d not in FIB",
1396 format_ip4_address, &a.dst_address,
1397 a.dst_address_length);
1401 ip4_add_del_route (im4, &a);
1402 ip4_maybe_remap_adjacencies (im4, table_id,
1403 IP4_ROUTE_FLAG_TABLE_ID);
1407 u32 i, j, n, f, incr;
1408 ip4_address_t dst = a.dst_address;
1411 t[0] = vlib_time_now (vm);
1412 incr = 1<<(32 - a.dst_address_length);
1413 for (i = 0; i < n; i++)
1415 f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0;
1416 a.dst_address = dst;
1417 for (j = 0; j < vec_len (ip4_via_next_hops); j++)
1419 if (table_ids[j] != (u32)~0)
1421 uword * p = hash_get (im4->fib_index_by_table_id,
1425 clib_warning ("no such FIB table %d",
1429 table_ids[j] = p[0];
1432 ip4_add_del_route_next_hop (im4,
1433 IP4_ROUTE_FLAG_DEL | f,
1435 a.dst_address_length,
1436 &ip4_via_next_hops[j],
1438 weights[j], (u32)~0,
1439 table_ids[j] /* fib index */);
1441 dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32));
1443 t[1] = vlib_time_now (vm);
1445 vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0]));
1450 if (vec_len (add_adj) > 0)
1452 a.flags |= IP4_ROUTE_FLAG_ADD;
1453 a.add_adj = add_adj;
1454 a.n_add_adj = vec_len (add_adj);
1456 ip4_add_del_route (im4, &a);
1458 else if (vec_len (ip4_via_next_hops) > 0)
1460 u32 i, j, n, f, incr;
1461 ip4_address_t dst = a.dst_address;
1464 t[0] = vlib_time_now (vm);
1465 incr = 1<<(32 - a.dst_address_length);
1466 for (i = 0; i < n; i++)
1468 f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0;
1469 a.dst_address = dst;
1470 for (j = 0; j < vec_len (ip4_via_next_hops); j++)
1472 if (table_ids[j] != (u32)~0)
1474 uword * p = hash_get (im4->fib_index_by_table_id,
1478 clib_warning ("no such FIB table %d",
1482 table_ids[j] = p[0];
1484 ip4_add_del_route_next_hop (im4,
1485 IP4_ROUTE_FLAG_ADD | f,
1487 a.dst_address_length,
1488 &ip4_via_next_hops[j],
1490 weights[j], (u32)~0,
1491 table_ids[j] /* fib index */);
1493 dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32));
1495 t[1] = vlib_time_now (vm);
1497 vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0]));
1502 for (i = 0; i < vec_len (ip6_dst_addresses); i++)
1504 ip6_add_del_route_args_t a;
1507 memset (&a, 0, sizeof (a));
1508 a.flags = IP6_ROUTE_FLAG_TABLE_ID;
1509 a.table_index_or_table_id = table_id;
1510 a.dst_address = ip6_dst_addresses[i];
1511 a.dst_address_length = dst_address_lengths[i];
1516 if (vec_len (ip6_via_next_hops) == 0)
1518 BVT(clib_bihash_kv) kv, value;
1519 ip6_address_t dst_address;
1522 fib = find_ip6_fib_by_table_index_or_id (im6, table_id,
1523 0 /* by table id */);
1525 a.flags |= IP4_ROUTE_FLAG_DEL;
1527 dst_address = ip6_dst_addresses[i];
1529 ip6_address_mask (&dst_address,
1530 &im6->fib_masks[dst_address_length]);
1532 kv.key[0] = dst_address.as_u64[0];
1533 kv.key[1] = dst_address.as_u64[1];
1534 kv.key[2] = ((u64)(fib - im6->fibs)<<32)
1535 | a.dst_address_length;
1537 if (BV(clib_bihash_search)(&im6->ip6_lookup_table,
1539 a.adj_index = value.value;
1542 clib_warning ("%U/%d not in FIB",
1543 format_ip6_address, &a.dst_address,
1544 a.dst_address_length);
1548 a.flags |= IP6_ROUTE_FLAG_DEL;
1549 ip6_add_del_route (im6, &a);
1550 ip6_maybe_remap_adjacencies (im6, table_id,
1551 IP6_ROUTE_FLAG_TABLE_ID);
1556 for (i = 0; i < vec_len (ip6_via_next_hops); i++)
1558 ip6_add_del_route_next_hop (im6,
1561 a.dst_address_length,
1562 &ip6_via_next_hops[i],
1564 weights[i], (u32)~0,
1565 table_ids[i] /* fib index */);
1571 if (vec_len (add_adj) > 0)
1573 a.flags |= IP6_ROUTE_FLAG_ADD;
1574 a.add_adj = add_adj;
1575 a.n_add_adj = vec_len (add_adj);
1577 ip6_add_del_route (im6, &a);
1579 else if (vec_len (ip6_via_next_hops) > 0)
1582 for (i = 0; i < vec_len (ip6_via_next_hops); i++)
1584 ip6_add_del_route_next_hop (im6,
1587 a.dst_address_length,
1588 &ip6_via_next_hops[i],
1590 weights[i], (u32)~0,
1601 vec_free (dst_address_lengths);
1602 vec_free (ip4_dst_addresses);
1603 vec_free (ip6_dst_addresses);
1604 vec_free (ip4_via_next_hops);
1605 vec_free (ip6_via_next_hops);
1609 VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = {
1611 .short_help = "Internet protocol (IP) commands",
1614 VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = {
1616 .short_help = "Internet protocol (IP) show commands",
1619 VLIB_CLI_COMMAND (vlib_cli_show_ip4_command, static) = {
1621 .short_help = "Internet protocol version 4 (IP4) show commands",
1624 VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
1626 .short_help = "Internet protocol version 6 (IP6) show commands",
1629 VLIB_CLI_COMMAND (ip_route_command, static) = {
1631 .short_help = "Add/delete IP routes",
1632 .function = vnet_ip_route_cmd,
1636 * The next two routines address a longstanding script hemorrhoid.
1637 * Probing a v4 or v6 neighbor needs to appear to be synchronous,
1638 * or dependent route-adds will simply fail.
1640 static clib_error_t *
1641 ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index,
1644 vnet_main_t * vnm = vnet_get_main();
1649 uword *event_data = 0;
1651 ASSERT (vlib_in_process_context(vm));
1653 if (retry_count > 0)
1654 vnet_register_ip6_neighbor_resolution_event
1655 (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
1656 1 /* event */, 0 /* data */);
1658 for (i = 0; i < retry_count; i++)
1660 /* The interface may be down, etc. */
1661 e = ip6_probe_neighbor (vm, a, sw_if_index);
1666 vlib_process_wait_for_event_or_clock (vm, 1.0);
1667 event_type = vlib_process_get_events (vm, &event_data);
1670 case 1: /* resolved... */
1671 vlib_cli_output (vm, "Resolved %U",
1672 format_ip6_address, a);
1676 case ~0: /* timeout */
1680 clib_warning ("unknown event_type %d", event_type);
1685 vec_reset_length (event_data);
1688 return clib_error_return (0, "Resolution failed for %U",
1689 format_ip6_address, a);
1693 static clib_error_t *
1694 ip4_probe_neighbor_wait (vlib_main_t *vm, ip4_address_t * a, u32 sw_if_index,
1697 vnet_main_t * vnm = vnet_get_main();
1702 uword *event_data = 0;
1704 ASSERT (vlib_in_process_context(vm));
1706 if (retry_count > 0)
1707 vnet_register_ip4_arp_resolution_event
1708 (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
1709 1 /* event */, 0 /* data */);
1711 for (i = 0; i < retry_count; i++)
1713 /* The interface may be down, etc. */
1714 e = ip4_probe_neighbor (vm, a, sw_if_index);
1719 vlib_process_wait_for_event_or_clock (vm, 1.0);
1720 event_type = vlib_process_get_events (vm, &event_data);
1723 case 1: /* resolved... */
1724 vlib_cli_output (vm, "Resolved %U",
1725 format_ip4_address, a);
1729 case ~0: /* timeout */
1733 clib_warning ("unknown event_type %d", event_type);
1739 vec_reset_length (event_data);
1742 return clib_error_return (0, "Resolution failed for %U",
1743 format_ip4_address, a);
1747 static clib_error_t *
1748 probe_neighbor_address (vlib_main_t * vm,
1749 unformat_input_t * input,
1750 vlib_cli_command_t * cmd)
1752 vnet_main_t * vnm = vnet_get_main();
1753 unformat_input_t _line_input, * line_input = &_line_input;
1756 clib_error_t * error = 0;
1757 u32 sw_if_index = ~0;
1758 int retry_count = 3;
1760 int address_set = 0;
1762 /* Get a line of input. */
1763 if (! unformat_user (input, unformat_line_input, line_input))
1766 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1768 if (unformat_user (line_input, unformat_vnet_sw_interface, vnm,
1771 else if (unformat (line_input, "retry %d", &retry_count))
1774 else if (unformat (line_input, "%U", unformat_ip4_address, &a4))
1776 else if (unformat (line_input, "%U", unformat_ip6_address, &a6))
1782 return clib_error_return (0, "unknown input '%U'",
1783 format_unformat_error, line_input);
1786 unformat_free (line_input);
1788 if (sw_if_index == ~0)
1789 return clib_error_return (0, "Interface required, not set.");
1790 if (address_set == 0)
1791 return clib_error_return (0, "ip address required, not set.");
1792 if (address_set > 1)
1793 return clib_error_return (0, "Multiple ip addresses not supported.");
1796 error = ip4_probe_neighbor_wait (vm, &a4, sw_if_index, retry_count);
1798 error = ip6_probe_neighbor_wait (vm, &a6, sw_if_index, retry_count);
1803 VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = {
1804 .path = "ip probe-neighbor",
1805 .function = probe_neighbor_address,
1806 .short_help = "ip probe-neighbor <intfc> <ip4-addr> | <ip6-addr> [retry nn]",
1809 typedef CLIB_PACKED (struct {
1810 ip4_address_t address;
1812 u32 address_length : 6;
1818 ip4_route_cmp (void * a1, void * a2)
1820 ip4_route_t * r1 = a1;
1821 ip4_route_t * r2 = a2;
1823 int cmp = ip4_address_compare (&r1->address, &r2->address);
1824 return cmp ? cmp : ((int) r1->address_length - (int) r2->address_length);
1827 static clib_error_t *
1828 ip4_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
1830 vnet_main_t * vnm = vnet_get_main();
1831 ip4_main_t * im4 = &ip4_main;
1832 ip4_route_t * routes, * r;
1834 ip_lookup_main_t * lm = &im4->lookup_main;
1836 int verbose, matching, mtrie, include_empty_fibs;
1837 ip4_address_t matching_address;
1844 include_empty_fibs = 0;
1847 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1849 if (unformat (input, "brief") || unformat (input, "summary")
1850 || unformat (input, "sum"))
1853 else if (unformat (input, "mtrie"))
1856 else if (unformat (input, "include-empty"))
1857 include_empty_fibs = 1;
1859 else if (unformat (input, "%U", unformat_ip4_address, &matching_address))
1862 else if (unformat (input, "clear"))
1865 else if (unformat (input, "table %d", &table_id))
1871 vec_foreach (fib, im4->fibs)
1876 for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
1878 uword * hash = fib->adj_index_by_dst_address[i];
1879 uword n_elts = hash_elts (hash);
1887 if (fib_not_empty == 0 && include_empty_fibs == 0)
1890 if (table_id >= 0 && table_id != (int)fib->table_id)
1893 if (include_empty_fibs)
1894 vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U",
1895 fib->table_id, fib - im4->fibs,
1896 format_ip_flow_hash_config, fib->flow_hash_config);
1901 if (include_empty_fibs == 0)
1902 vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U",
1903 fib->table_id, fib - im4->fibs,
1904 format_ip_flow_hash_config, fib->flow_hash_config);
1905 vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
1906 for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
1908 uword * hash = fib->adj_index_by_dst_address[i];
1909 uword n_elts = hash_elts (hash);
1911 vlib_cli_output (vm, "%20d%16d", i, n_elts);
1917 _vec_len (routes) = 0;
1919 _vec_len (results) = 0;
1921 for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
1923 uword * hash = fib->adj_index_by_dst_address[i];
1927 x.address_length = i;
1931 x.address.as_u32 = matching_address.as_u32 & im4->fib_masks[i];
1932 p = hash_get_pair (hash, x.address.as_u32);
1935 if (lm->fib_result_n_words > 1)
1937 x.index = vec_len (results);
1938 vec_add (results, p->value, lm->fib_result_n_words);
1941 x.index = p->value[0];
1942 vec_add1 (routes, x);
1947 hash_foreach_pair (p, hash, ({
1948 x.address.data_u32 = p->key;
1949 if (lm->fib_result_n_words > 1)
1951 x.index = vec_len (results);
1952 vec_add (results, p->value, lm->fib_result_n_words);
1955 x.index = p->value[0];
1957 vec_add1 (routes, x);
1962 vec_sort_with_function (routes, ip4_route_cmp);
1963 if (vec_len(routes)) {
1964 if (include_empty_fibs == 0)
1965 vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U",
1966 fib->table_id, fib - im4->fibs,
1967 format_ip_flow_hash_config, fib->flow_hash_config);
1969 vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie);
1970 vlib_cli_output (vm, "%=20s%=16s%=16s%=16s",
1971 "Destination", "Packets", "Bytes", "Adjacency");
1973 vec_foreach (r, routes)
1975 vlib_counter_t c, sum;
1976 uword i, j, n_left, n_nhs, adj_index, * result = 0;
1977 ip_adjacency_t * adj;
1978 ip_multipath_next_hop_t * nhs, tmp_nhs[1];
1980 adj_index = r->index;
1981 if (lm->fib_result_n_words > 1)
1983 result = vec_elt_at_index (results, adj_index);
1984 adj_index = result[0];
1987 adj = ip_get_adjacency (lm, adj_index);
1988 if (adj->n_adj == 1)
1991 nhs[0].next_hop_adj_index = ~0; /* not used */
1997 ip_multipath_adjacency_t * madj;
1998 madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle);
1999 nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset);
2000 n_nhs = madj->normalized_next_hops.count;
2003 n_left = nhs[0].weight;
2004 vlib_counter_zero (&sum);
2005 for (i = j = 0; i < adj->n_adj; i++)
2008 vlib_get_combined_counter (&lm->adjacency_counters,
2011 vlib_zero_combined_counter (&lm->adjacency_counters,
2013 vlib_counter_add (&sum, &c);
2020 msg = format (msg, "%-20U",
2021 format_ip4_address_and_length,
2022 r->address.data, r->address_length);
2024 msg = format (msg, "%U", format_white_space, 20);
2026 msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes);
2028 indent = vec_len (msg);
2029 msg = format (msg, "weight %d, index %d\n%U%U",
2030 nhs[j].weight, adj_index + i,
2031 format_white_space, indent,
2032 format_ip_adjacency,
2033 vnm, lm, adj_index + i);
2035 vlib_cli_output (vm, "%v", msg);
2038 if (result && lm->format_fib_result)
2039 vlib_cli_output (vm, "%20s%U", "",
2040 lm->format_fib_result, vm, lm, result,
2041 i + 1 - nhs[j].weight,
2047 n_left = nhs[j].weight;
2048 vlib_counter_zero (&sum);
2061 VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
2062 .path = "show ip fib",
2063 .short_help = "show ip fib [mtrie] [summary] [table <n>] [<ip4-addr>] [clear] [include-empty]",
2064 .function = ip4_show_fib,
2068 ip6_address_t address;
2077 ip6_route_t ** routep;
2078 } add_routes_in_fib_arg_t;
2080 static void add_routes_in_fib (BVT(clib_bihash_kv) * kvp, void *arg)
2082 add_routes_in_fib_arg_t * ap = arg;
2084 if (kvp->key[2]>>32 == ap->fib_index)
2086 ip6_address_t *addr;
2088 addr = (ip6_address_t *) kvp;
2089 vec_add2 (*ap->routep, r, 1);
2090 r->address = addr[0];
2091 r->address_length = kvp->key[2] & 0xFF;
2092 r->index = kvp->value;
2098 u64 count_by_prefix_length[129];
2099 } count_routes_in_fib_at_prefix_length_arg_t;
2101 static void count_routes_in_fib_at_prefix_length
2102 (BVT(clib_bihash_kv) * kvp, void *arg)
2104 count_routes_in_fib_at_prefix_length_arg_t * ap = arg;
2107 if ((kvp->key[2]>>32) != ap->fib_index)
2110 mask_width = kvp->key[2] & 0xFF;
2112 ap->count_by_prefix_length[mask_width]++;
2116 ip6_route_cmp (void * a1, void * a2)
2118 ip6_route_t * r1 = a1;
2119 ip6_route_t * r2 = a2;
2121 int cmp = ip6_address_compare (&r1->address, &r2->address);
2122 return cmp ? cmp : ((int) r1->address_length - (int) r2->address_length);
2125 static clib_error_t *
2126 ip6_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
2128 vnet_main_t * vnm = vnet_get_main();
2129 ip6_main_t * im6 = &ip6_main;
2130 ip6_route_t * routes, * r;
2132 ip_lookup_main_t * lm = &im6->lookup_main;
2135 BVT(clib_bihash) * h = &im6->ip6_lookup_table;
2136 __attribute__((unused)) u8 clear = 0;
2137 add_routes_in_fib_arg_t _a, *a=&_a;
2138 count_routes_in_fib_at_prefix_length_arg_t _ca, *ca = &_ca;
2143 if (unformat (input, "brief") || unformat (input, "summary")
2144 || unformat (input, "sum"))
2147 if (unformat (input, "clear"))
2150 vlib_cli_output (vm, "FIB lookup table: %d buckets, %lld MB heap",
2151 im6->lookup_table_nbuckets, im6->lookup_table_size>>20);
2152 vlib_cli_output (vm, "%U", format_mheap, h->mheap, 0 /*verbose*/);
2153 vlib_cli_output (vm, " ");
2155 vec_foreach (fib, im6->fibs)
2157 vlib_cli_output (vm, "VRF %d, fib_index %d, flow hash: %U",
2158 fib->table_id, fib - im6->fibs,
2159 format_ip_flow_hash_config, fib->flow_hash_config);
2165 vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
2167 memset (ca, 0, sizeof(*ca));
2168 ca->fib_index = fib - im6->fibs;
2170 BV(clib_bihash_foreach_key_value_pair)
2171 (h, count_routes_in_fib_at_prefix_length, ca);
2173 for (len = 128; len >= 0; len--)
2175 if (ca->count_by_prefix_length[len])
2176 vlib_cli_output (vm, "%=20d%=16lld",
2177 len, ca->count_by_prefix_length[len]);
2183 _vec_len (routes) = 0;
2185 _vec_len (results) = 0;
2187 a->fib_index = fib - im6->fibs;
2188 a->routep = &routes;
2190 BV(clib_bihash_foreach_key_value_pair)(h, add_routes_in_fib, a);
2192 vec_sort_with_function (routes, ip6_route_cmp);
2194 vlib_cli_output (vm, "%=45s%=16s%=16s%=16s",
2195 "Destination", "Packets", "Bytes", "Adjacency");
2196 vec_foreach (r, routes)
2198 vlib_counter_t c, sum;
2199 uword i, j, n_left, n_nhs, adj_index, * result = 0;
2200 ip_adjacency_t * adj;
2201 ip_multipath_next_hop_t * nhs, tmp_nhs[1];
2203 adj_index = r->index;
2204 if (lm->fib_result_n_words > 1)
2206 result = vec_elt_at_index (results, adj_index);
2207 adj_index = result[0];
2210 adj = ip_get_adjacency (lm, adj_index);
2211 if (adj->n_adj == 1)
2214 nhs[0].next_hop_adj_index = ~0; /* not used */
2220 ip_multipath_adjacency_t * madj;
2221 madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle);
2222 nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset);
2223 n_nhs = madj->normalized_next_hops.count;
2226 n_left = nhs[0].weight;
2227 vlib_counter_zero (&sum);
2228 for (i = j = 0; i < adj->n_adj; i++)
2231 vlib_get_combined_counter (&lm->adjacency_counters,
2234 vlib_zero_combined_counter (&lm->adjacency_counters,
2236 vlib_counter_add (&sum, &c);
2243 msg = format (msg, "%-45U",
2244 format_ip6_address_and_length,
2245 r->address.as_u8, r->address_length);
2247 msg = format (msg, "%U", format_white_space, 20);
2249 msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes);
2251 indent = vec_len (msg);
2252 msg = format (msg, "weight %d, index %d\n%U%U",
2253 nhs[j].weight, adj_index + i,
2254 format_white_space, indent,
2255 format_ip_adjacency,
2256 vnm, lm, adj_index + i);
2258 vlib_cli_output (vm, "%v", msg);
2264 n_left = nhs[j].weight;
2265 vlib_counter_zero (&sum);
2270 if (result && lm->format_fib_result)
2271 vlib_cli_output (vm, "%20s%U", "", lm->format_fib_result, vm, lm, result, 0);
2273 vlib_cli_output (vm, " ");
2282 VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
2283 .path = "show ip6 fib",
2284 .short_help = "show ip6 fib [summary] [clear]",
2285 .function = ip6_show_fib,