2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip_lookup.c: ip4/6 adjacency and lookup table managment
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vppinfra/math.h> /* for fabs */
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/adj_alloc.h>
45 ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index);
48 ip_poison_adjacencies (ip_adjacency_t * adj, uword n_adj)
52 u32 save_handle = adj->heap_handle;;
53 u32 save_n_adj = adj->n_adj;
55 memset (adj, 0xfe, n_adj * sizeof (adj[0]));
57 adj->heap_handle = save_handle;
58 adj->n_adj = save_n_adj;
63 ip_share_adjacency(ip_lookup_main_t * lm, u32 adj_index)
65 ip_adjacency_t * adj = ip_get_adjacency(lm, adj_index);
68 uword signature = vnet_ip_adjacency_signature (adj);
70 p = hash_get (lm->adj_index_by_signature, signature);
74 /* Save the adj index, p[0] will be toast after the unset! */
76 hash_unset (lm->adj_index_by_signature, signature);
77 hash_set (lm->adj_index_by_signature, signature, adj_index);
78 adj->next_adj_with_signature = old_ai;
82 adj->next_adj_with_signature = 0;
83 hash_set (lm->adj_index_by_signature, signature, adj_index);
88 ip_unshare_adjacency(ip_lookup_main_t * lm, u32 adj_index)
90 ip_adjacency_t * adj = ip_get_adjacency(lm, adj_index);
94 ip_adjacency_t * this_adj, * prev_adj = 0;
96 signature = vnet_ip_adjacency_signature (adj);
97 p = hash_get (lm->adj_index_by_signature, signature);
102 /* At the top of the signature chain (likely)? */
103 if (this_ai == adj_index)
105 if (adj->next_adj_with_signature == 0)
107 hash_unset (lm->adj_index_by_signature, signature);
112 this_adj = ip_get_adjacency (lm, adj->next_adj_with_signature);
113 hash_unset (lm->adj_index_by_signature, signature);
114 hash_set (lm->adj_index_by_signature, signature,
115 this_adj->heap_handle);
118 else /* walk signature chain */
120 this_adj = ip_get_adjacency (lm, this_ai);
121 while (this_adj != adj)
124 this_adj = ip_get_adjacency
125 (lm, this_adj->next_adj_with_signature);
127 * This can happen when creating the first multipath adj of a set
128 * We end up looking at the miss adjacency (handle==0).
130 if (this_adj->heap_handle == 0)
133 prev_adj->next_adj_with_signature = this_adj->next_adj_with_signature;
137 int ip_register_adjacency(vlib_main_t *vm,
139 ip_adj_register_t *reg)
141 ip_lookup_main_t *lm = (is_ip4)?&ip4_main.lookup_main:&ip6_main.lookup_main;
142 vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) ((is_ip4)?"ip4-lookup":"ip6-lookup"));
143 vlib_node_t *next_node = vlib_get_node_by_name(vm, (u8 *) reg->node_name);
144 *reg->next_index = vlib_node_add_next (vm, node->index, next_node->index);
145 vec_validate(lm->registered_adjacencies, *reg->next_index);
146 lm->registered_adjacencies[*reg->next_index] = *reg;
150 int ip_init_registered_adjacencies(u8 is_ip4)
152 vlib_main_t *vm = vlib_get_main();
153 ip_lookup_main_t *lm = (is_ip4)?&ip4_main.lookup_main:&ip6_main.lookup_main;
154 ip_adj_register_t *reg = lm->registered_adjacencies;
155 lm->registered_adjacencies = 0; //Init vector
158 if((rv = ip_register_adjacency(vm, is_ip4, reg)))
165 /* Create new block of given number of contiguous adjacencies. */
167 ip_add_adjacency (ip_lookup_main_t * lm,
168 ip_adjacency_t * copy_adj,
170 u32 * adj_index_return)
172 ip_adjacency_t * adj;
175 /* See if we know enough to attempt to share an existing adjacency */
176 if (copy_adj && n_adj == 1)
181 switch (copy_adj->lookup_next_index)
183 case IP_LOOKUP_NEXT_DROP:
184 if (lm->drop_adj_index)
186 adj = ip_get_adjacency (lm, lm->drop_adj_index);
187 *adj_index_return = lm->drop_adj_index;
192 case IP_LOOKUP_NEXT_LOCAL:
193 if (lm->local_adj_index)
195 adj = ip_get_adjacency (lm, lm->local_adj_index);
196 *adj_index_return = lm->local_adj_index;
203 signature = vnet_ip_adjacency_signature (copy_adj);
204 p = hash_get (lm->adj_index_by_signature, signature);
207 adj = vec_elt_at_index (lm->adjacency_heap, p[0]);
210 if (vnet_ip_adjacency_share_compare (adj, copy_adj))
213 *adj_index_return = p[0];
216 if (adj->next_adj_with_signature == 0)
218 adj = vec_elt_at_index (lm->adjacency_heap,
219 adj->next_adj_with_signature);
224 lm->adjacency_heap = aa_alloc (lm->adjacency_heap, &adj, n_adj);
225 handle = ai = adj->heap_handle;
227 ip_poison_adjacencies (adj, n_adj);
229 /* Validate adjacency counters. */
230 vlib_validate_combined_counter (&lm->adjacency_counters, ai + n_adj - 1);
232 for (i = 0; i < n_adj; i++)
234 /* Make sure certain fields are always initialized. */
235 adj[i].rewrite_header.sw_if_index = ~0;
236 adj[i].explicit_fib_index = ~0;
237 adj[i].mcast_group_index = ~0;
238 adj[i].classify.table_index = ~0;
239 adj[i].saved_lookup_next_index = 0;
240 adj[i].special_adjacency_format_function_index = 0;
243 adj[i] = copy_adj[i];
245 adj[i].heap_handle = handle;
246 adj[i].n_adj = n_adj;
247 adj[i].share_count = 0;
248 adj[i].next_adj_with_signature = 0;
250 /* Zero possibly stale counters for re-used adjacencies. */
251 vlib_zero_combined_counter (&lm->adjacency_counters, ai + i);
254 /* Set up to share the adj later */
255 if (copy_adj && n_adj == 1)
256 ip_share_adjacency(lm, ai);
258 *adj_index_return = ai;
263 ip_update_adjacency (ip_lookup_main_t * lm,
265 ip_adjacency_t * copy_adj)
267 ip_adjacency_t * adj = ip_get_adjacency(lm, adj_index);
269 ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 1);
270 ip_unshare_adjacency(lm, adj_index);
272 /* temporary redirect to drop while updating rewrite data */
273 adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
274 CLIB_MEMORY_BARRIER();
276 clib_memcpy (&adj->rewrite_header, ©_adj->rewrite_header,
277 VLIB_BUFFER_PRE_DATA_SIZE);
278 adj->lookup_next_index = copy_adj->lookup_next_index;
279 ip_share_adjacency(lm, adj_index);
280 ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
283 static void ip_del_adjacency2 (ip_lookup_main_t * lm, u32 adj_index, u32 delete_multipath_adjacency)
285 ip_adjacency_t * adj;
287 ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 1);
289 adj = ip_get_adjacency (lm, adj_index);
291 /* Special-case miss, local, drop adjs */
297 if (adj->share_count > 0)
303 ip_unshare_adjacency(lm, adj_index);
306 if (delete_multipath_adjacency)
307 ip_multipath_del_adjacency (lm, adj_index);
309 ip_poison_adjacencies (adj, adj->n_adj);
311 aa_free (lm->adjacency_heap, adj);
314 void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index)
315 { ip_del_adjacency2 (lm, adj_index, /* delete_multipath_adjacency */ 1); }
318 next_hop_sort_by_weight (ip_multipath_next_hop_t * n1,
319 ip_multipath_next_hop_t * n2)
321 int cmp = (int) n1->weight - (int) n2->weight;
323 ? (int) n1->next_hop_adj_index - (int) n2->next_hop_adj_index
324 : (cmp > 0 ? +1 : -1));
327 /* Given next hop vector is over-written with normalized one with sorted weights and
328 with weights corresponding to the number of adjacencies for each next hop.
329 Returns number of adjacencies in block. */
330 static u32 ip_multipath_normalize_next_hops (ip_lookup_main_t * lm,
331 ip_multipath_next_hop_t * raw_next_hops,
332 ip_multipath_next_hop_t ** normalized_next_hops)
334 ip_multipath_next_hop_t * nhs;
335 uword n_nhs, n_adj, n_adj_left, i;
336 f64 sum_weight, norm, error;
338 n_nhs = vec_len (raw_next_hops);
343 /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
344 nhs = *normalized_next_hops;
345 vec_validate (nhs, 2*n_nhs - 1);
347 /* Fast path: 1 next hop in block. */
351 nhs[0] = raw_next_hops[0];
359 int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
362 nhs[0] = raw_next_hops[cmp];
363 nhs[1] = raw_next_hops[cmp ^ 1];
365 /* Fast path: equal cost multipath with 2 next hops. */
366 if (nhs[0].weight == nhs[1].weight)
368 nhs[0].weight = nhs[1].weight = 1;
375 clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
376 qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
379 /* Find total weight to normalize weights. */
381 for (i = 0; i < n_nhs; i++)
382 sum_weight += nhs[i].weight;
384 /* In the unlikely case that all weights are given as 0, set them all to 1. */
387 for (i = 0; i < n_nhs; i++)
392 /* Save copies of all next hop weights to avoid being overwritten in loop below. */
393 for (i = 0; i < n_nhs; i++)
394 nhs[n_nhs + i].weight = nhs[i].weight;
396 /* Try larger and larger power of 2 sized adjacency blocks until we
397 find one where traffic flows to within 1% of specified weights. */
398 for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
402 norm = n_adj / sum_weight;
404 for (i = 0; i < n_nhs; i++)
406 f64 nf = nhs[n_nhs + i].weight * norm; /* use saved weights */
407 word n = flt_round_nearest (nf);
409 n = n > n_adj_left ? n_adj_left : n;
411 error += fabs (nf - n);
415 nhs[0].weight += n_adj_left;
417 /* Less than 5% average error per adjacency with this size adjacency block? */
418 if (error <= lm->multipath_next_hop_error_tolerance*n_adj)
420 /* Truncate any next hops with zero weight. */
427 /* Save vector for next call. */
428 *normalized_next_hops = nhs;
433 ip_next_hop_hash_key_from_handle (uword handle)
434 { return 1 + 2*handle; }
437 ip_next_hop_hash_key_is_heap_handle (uword k)
441 ip_next_hop_hash_key_get_heap_handle (uword k)
443 ASSERT (ip_next_hop_hash_key_is_heap_handle (k));
448 ip_multipath_adjacency_get (ip_lookup_main_t * lm,
449 ip_multipath_next_hop_t * raw_next_hops,
450 uword create_if_non_existent)
453 u32 i, j, n_adj, adj_index, adj_heap_handle;
454 ip_adjacency_t * adj, * copy_adj;
455 ip_multipath_next_hop_t * nh, * nhs;
456 ip_multipath_adjacency_t * madj;
458 n_adj = ip_multipath_normalize_next_hops (lm, raw_next_hops, &lm->next_hop_hash_lookup_key_normalized);
459 nhs = lm->next_hop_hash_lookup_key_normalized;
462 ASSERT (n_adj >= vec_len (raw_next_hops));
464 /* Use normalized next hops to see if we've seen a block equivalent to this one before. */
465 p = hash_get_mem (lm->multipath_adjacency_by_next_hops, nhs);
469 if (! create_if_non_existent)
472 adj = ip_add_adjacency (lm, /* copy_adj */ 0, n_adj, &adj_index);
473 adj_heap_handle = adj[0].heap_handle;
475 /* Fill in adjacencies in block based on corresponding next hop adjacencies. */
477 vec_foreach (nh, nhs)
479 copy_adj = ip_get_adjacency (lm, nh->next_hop_adj_index);
480 for (j = 0; j < nh->weight; j++)
482 adj[i] = copy_adj[0];
483 adj[i].heap_handle = adj_heap_handle;
484 adj[i].n_adj = n_adj;
489 /* All adjacencies should have been initialized. */
492 vec_validate (lm->multipath_adjacencies, adj_heap_handle);
493 madj = vec_elt_at_index (lm->multipath_adjacencies, adj_heap_handle);
495 madj->adj_index = adj_index;
496 madj->n_adj_in_block = n_adj;
497 madj->reference_count = 0; /* caller will set to one. */
499 madj->normalized_next_hops.count = vec_len (nhs);
500 madj->normalized_next_hops.heap_offset
501 = heap_alloc (lm->next_hop_heap, vec_len (nhs),
502 madj->normalized_next_hops.heap_handle);
503 clib_memcpy (lm->next_hop_heap + madj->normalized_next_hops.heap_offset,
504 nhs, vec_bytes (nhs));
506 hash_set (lm->multipath_adjacency_by_next_hops,
507 ip_next_hop_hash_key_from_handle (madj->normalized_next_hops.heap_handle),
508 madj - lm->multipath_adjacencies);
510 madj->unnormalized_next_hops.count = vec_len (raw_next_hops);
511 madj->unnormalized_next_hops.heap_offset
512 = heap_alloc (lm->next_hop_heap, vec_len (raw_next_hops),
513 madj->unnormalized_next_hops.heap_handle);
514 clib_memcpy (lm->next_hop_heap + madj->unnormalized_next_hops.heap_offset,
515 raw_next_hops, vec_bytes (raw_next_hops));
517 ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
519 return adj_heap_handle;
522 /* Returns 0 for next hop not found. */
524 ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm,
526 u32 old_mp_adj_index,
527 u32 next_hop_adj_index,
529 u32 * new_mp_adj_index)
531 ip_multipath_adjacency_t * mp_old, * mp_new;
532 ip_multipath_next_hop_t * nh, * nhs, * hash_nhs;
540 /* If old adj is not multipath, we need to "convert" it by calling this
541 * function recursively */
542 if (old_mp_adj_index != ~0 && !ip_adjacency_is_multipath(lm, old_mp_adj_index))
544 ip_multipath_adjacency_add_del_next_hop(lm, /* is_del */ 0,
545 /* old_mp_adj_index */ ~0,
546 /* nh_adj_index */ old_mp_adj_index,
551 /* If old multipath adjacency is valid, find requested next hop. */
552 if (old_mp_adj_index < vec_len (lm->multipath_adjacencies)
553 && lm->multipath_adjacencies[old_mp_adj_index].normalized_next_hops.count > 0)
555 mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
557 nhs = vec_elt_at_index (lm->next_hop_heap, mp_old->unnormalized_next_hops.heap_offset);
558 n_nhs = mp_old->unnormalized_next_hops.count;
560 /* Linear search: ok since n_next_hops is small. */
561 for (i_nh = 0; i_nh < n_nhs; i_nh++)
562 if (nhs[i_nh].next_hop_adj_index == next_hop_adj_index)
565 /* Given next hop not found. */
566 if (i_nh >= n_nhs && is_del)
570 hash_nhs = lm->next_hop_hash_lookup_key;
572 _vec_len (hash_nhs) = 0;
578 /* Prepare lookup key for multipath with target next hop deleted. */
580 vec_add (hash_nhs, nhs + 0, i_nh);
581 if (i_nh + 1 < n_nhs)
582 vec_add (hash_nhs, nhs + i_nh + 1, n_nhs - (i_nh + 1));
585 else /* it's an add. */
587 /* If next hop is already there with the same weight, we have nothing to do. */
588 if (i_nh < n_nhs && nhs[i_nh].weight == next_hop_weight)
590 new_mp_adj_index[0] = ~0;
594 /* Copy old next hops to lookup key vector. */
596 vec_add (hash_nhs, nhs, n_nhs);
600 /* Change weight of existing next hop. */
601 nh = vec_elt_at_index (hash_nhs, i_nh);
605 /* Add a new next hop. */
606 vec_add2 (hash_nhs, nh, 1);
607 nh->next_hop_adj_index = next_hop_adj_index;
610 /* Set weight for added or old next hop. */
611 nh->weight = next_hop_weight;
614 if (vec_len (hash_nhs) > 0)
616 u32 tmp = ip_multipath_adjacency_get (lm, hash_nhs,
617 /* create_if_non_existent */ 1);
619 mp_new = vec_elt_at_index (lm->multipath_adjacencies, tmp);
621 /* Fetch again since pool may have moved. */
623 mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
626 new_mp_adj_index[0] = mp_new ? mp_new - lm->multipath_adjacencies : ~0;
628 if (mp_new != mp_old)
632 ASSERT (mp_old->reference_count > 0);
633 mp_old->reference_count -= 1;
636 mp_new->reference_count += 1;
639 if (mp_old && mp_old->reference_count == 0)
640 ip_multipath_adjacency_free (lm, mp_old);
643 /* Save key vector next call. */
644 lm->next_hop_hash_lookup_key = hash_nhs;
650 ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index)
652 ip_adjacency_t * adj = ip_get_adjacency (lm, del_adj_index);
653 ip_multipath_adjacency_t * madj, * new_madj;
654 ip_multipath_next_hop_t * nhs, * hash_nhs;
655 u32 i, n_nhs, madj_index, new_madj_index;
657 if (adj->heap_handle >= vec_len (lm->multipath_adjacencies))
660 vec_validate (lm->adjacency_remap_table, vec_len (lm->adjacency_heap) - 1);
662 for (madj_index = 0; madj_index < vec_len (lm->multipath_adjacencies); madj_index++)
664 madj = vec_elt_at_index (lm->multipath_adjacencies, madj_index);
665 if (madj->n_adj_in_block == 0)
668 nhs = heap_elt_at_index (lm->next_hop_heap, madj->unnormalized_next_hops.heap_offset);
669 n_nhs = madj->unnormalized_next_hops.count;
670 for (i = 0; i < n_nhs; i++)
671 if (nhs[i].next_hop_adj_index == del_adj_index)
674 /* del_adj_index not found in unnormalized_next_hops? We're done. */
681 hash_nhs = lm->next_hop_hash_lookup_key;
683 _vec_len (hash_nhs) = 0;
685 vec_add (hash_nhs, nhs + 0, i);
687 vec_add (hash_nhs, nhs + i + 1, n_nhs - (i + 1));
689 new_madj_index = ip_multipath_adjacency_get (lm, hash_nhs, /* create_if_non_existent */ 1);
691 lm->next_hop_hash_lookup_key = hash_nhs;
693 if (new_madj_index == madj_index)
696 new_madj = vec_elt_at_index (lm->multipath_adjacencies, new_madj_index);
699 lm->adjacency_remap_table[madj->adj_index] = new_madj ? 1 + new_madj->adj_index : ~0;
700 lm->n_adjacency_remaps += 1;
701 ip_multipath_adjacency_free (lm, madj);
706 ip_multipath_adjacency_free (ip_lookup_main_t * lm,
707 ip_multipath_adjacency_t * a)
709 hash_unset (lm->multipath_adjacency_by_next_hops,
710 ip_next_hop_hash_key_from_handle (a->normalized_next_hops.heap_handle));
711 heap_dealloc (lm->next_hop_heap, a->normalized_next_hops.heap_handle);
712 heap_dealloc (lm->next_hop_heap, a->unnormalized_next_hops.heap_handle);
714 ip_del_adjacency2 (lm, a->adj_index, a->reference_count == 0);
715 memset (a, 0, sizeof (a[0]));
718 always_inline ip_multipath_next_hop_t *
719 ip_next_hop_hash_key_get_next_hops (ip_lookup_main_t * lm, uword k,
722 ip_multipath_next_hop_t * nhs;
724 if (ip_next_hop_hash_key_is_heap_handle (k))
726 uword handle = ip_next_hop_hash_key_get_heap_handle (k);
727 nhs = heap_elt_with_handle (lm->next_hop_heap, handle);
728 n_nhs = heap_len (lm->next_hop_heap, handle);
732 nhs = uword_to_pointer (k, ip_multipath_next_hop_t *);
733 n_nhs = vec_len (nhs);
735 *n_next_hops = n_nhs;
740 ip_next_hop_hash_key_sum (hash_t * h, uword key0)
742 ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *);
743 ip_multipath_next_hop_t * k0;
746 k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0);
747 return hash_memory (k0, n0 * sizeof (k0[0]), /* seed */ n0);
751 ip_next_hop_hash_key_equal (hash_t * h, uword key0, uword key1)
753 ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *);
754 ip_multipath_next_hop_t * k0, * k1;
757 k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0);
758 k1 = ip_next_hop_hash_key_get_next_hops (lm, key1, &n1);
760 return n0 == n1 && ! memcmp (k0, k1, n0 * sizeof (k0[0]));
764 ip_interface_address_add_del (ip_lookup_main_t * lm,
769 u32 * result_if_address_index)
771 vnet_main_t * vnm = vnet_get_main();
772 ip_interface_address_t * a, * prev, * next;
773 uword * p = mhash_get (&lm->address_to_if_address_index, addr_fib);
775 vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
776 a = p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
778 /* Verify given length. */
779 if ((a && (address_length != a->address_length)) || (address_length == 0))
781 vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
782 return clib_error_create
783 ( "%U wrong length (expected %d) for interface %U",
784 lm->format_address_and_length, addr_fib,
785 address_length, a? a->address_length : -1,
786 format_vnet_sw_if_index_name, vnm, sw_if_index);
793 vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, sw_if_index);
794 vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
795 return clib_error_create ("%U not found for interface %U",
796 lm->format_address_and_length,
797 addr_fib, address_length,
798 format_vnet_sw_interface_name, vnm, si);
801 if (a->prev_this_sw_interface != ~0)
803 prev = pool_elt_at_index (lm->if_address_pool, a->prev_this_sw_interface);
804 prev->next_this_sw_interface = a->next_this_sw_interface;
806 if (a->next_this_sw_interface != ~0)
808 next = pool_elt_at_index (lm->if_address_pool, a->next_this_sw_interface);
809 next->prev_this_sw_interface = a->prev_this_sw_interface;
811 if(a->prev_this_sw_interface == ~0)
812 lm->if_address_pool_index_by_sw_if_index[sw_if_index] = a->next_this_sw_interface;
815 if ((a->next_this_sw_interface == ~0) && (a->prev_this_sw_interface == ~0))
816 lm->if_address_pool_index_by_sw_if_index[sw_if_index] = ~0;
818 mhash_unset (&lm->address_to_if_address_index, addr_fib,
820 pool_put (lm->if_address_pool, a);
822 if (result_if_address_index)
823 *result_if_address_index = ~0;
828 u32 pi; /* previous index */
830 u32 hi; /* head index */
832 pool_get (lm->if_address_pool, a);
833 memset (a, ~0, sizeof (a[0]));
834 ai = a - lm->if_address_pool;
836 hi = pi = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
838 while (pi != (u32)~0)
840 prev = pool_elt_at_index(lm->if_address_pool, pi);
841 pi = prev->next_this_sw_interface;
843 pi = prev ? prev - lm->if_address_pool : (u32)~0;
845 a->address_key = mhash_set (&lm->address_to_if_address_index,
846 addr_fib, ai, /* old_value */ 0);
847 a->address_length = address_length;
848 a->sw_if_index = sw_if_index;
850 a->prev_this_sw_interface = pi;
851 a->next_this_sw_interface = ~0;
853 prev->next_this_sw_interface = ai;
855 lm->if_address_pool_index_by_sw_if_index[sw_if_index] =
856 (hi != ~0) ? hi : ai;
857 if (result_if_address_index)
858 *result_if_address_index = ai;
862 if (result_if_address_index)
863 *result_if_address_index = a - lm->if_address_pool;
867 return /* no error */ 0;
870 void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6)
872 ip_adjacency_t * adj;
873 ip_adjacency_t template_adj;
875 /* ensure that adjacency is cacheline aligned and sized */
876 ASSERT(STRUCT_OFFSET_OF(ip_adjacency_t, cacheline0) == 0);
877 ASSERT(STRUCT_OFFSET_OF(ip_adjacency_t, cacheline1) == CLIB_CACHE_LINE_BYTES);
879 lm->adj_index_by_signature = hash_create (0, sizeof (uword));
880 memset (&template_adj, 0, sizeof (template_adj));
882 /* Preallocate three "special" adjacencies */
883 lm->adjacency_heap = aa_bootstrap (0, 3 /* n=1 free items */);
885 /* Hand-craft special miss adjacency to use when nothing matches in the
886 routing table. Same for drop adjacency. */
887 adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1,
888 &lm->miss_adj_index);
889 adj->lookup_next_index = IP_LOOKUP_NEXT_MISS;
890 ASSERT (lm->miss_adj_index == IP_LOOKUP_MISS_ADJ_INDEX);
892 /* Make the "drop" adj sharable */
893 template_adj.lookup_next_index = IP_LOOKUP_NEXT_DROP;
894 adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1,
895 &lm->drop_adj_index);
897 /* Make the "local" adj sharable */
898 template_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
899 template_adj.if_address_index = ~0;
900 adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1,
901 &lm->local_adj_index);
903 if (! lm->fib_result_n_bytes)
904 lm->fib_result_n_bytes = sizeof (uword);
906 lm->multipath_adjacency_by_next_hops
907 = hash_create2 (/* elts */ 0,
908 /* user */ pointer_to_uword (lm),
909 /* value_bytes */ sizeof (uword),
910 ip_next_hop_hash_key_sum,
911 ip_next_hop_hash_key_equal,
912 /* format pair/arg */
915 /* 1% max error tolerance for multipath. */
916 lm->multipath_next_hop_error_tolerance = .01;
921 lm->format_address_and_length = format_ip6_address_and_length;
922 mhash_init (&lm->address_to_if_address_index, sizeof (uword),
923 sizeof (ip6_address_fib_t));
927 lm->format_address_and_length = format_ip4_address_and_length;
928 mhash_init (&lm->address_to_if_address_index, sizeof (uword),
929 sizeof (ip4_address_fib_t));
935 /* Setup all IP protocols to be punted and builtin-unknown. */
936 for (i = 0; i < 256; i++)
938 lm->local_next_by_ip_protocol[i] = IP_LOCAL_NEXT_PUNT;
939 lm->builtin_protocol_by_ip_protocol[i] = IP_BUILTIN_PROTOCOL_UNKNOWN;
942 lm->local_next_by_ip_protocol[IP_PROTOCOL_UDP] = IP_LOCAL_NEXT_UDP_LOOKUP;
943 lm->local_next_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : IP_PROTOCOL_ICMP] = IP_LOCAL_NEXT_ICMP;
944 lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_UDP] = IP_BUILTIN_PROTOCOL_UDP;
945 lm->builtin_protocol_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : IP_PROTOCOL_ICMP] = IP_BUILTIN_PROTOCOL_ICMP;
948 ip_init_registered_adjacencies(!is_ip6);
951 u8 * format_ip_flow_hash_config (u8 * s, va_list * args)
953 u32 flow_hash_config = va_arg (*args, u32);
955 #define _(n,v) if (flow_hash_config & v) s = format (s, "%s ", #n);
956 foreach_flow_hash_bit;
962 u8 * format_ip_lookup_next (u8 * s, va_list * args)
964 ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
965 ip_lookup_next_t n = va_arg (*args, u32);
966 ip_adj_register_t *reg;
973 vec_validate(lm->registered_adjacencies, n);
974 reg = vec_elt_at_index(lm->registered_adjacencies, n);
975 if (reg->node_name) {
976 s = format (s, "%s:", reg->node_name);
980 case IP_LOOKUP_NEXT_MISS: t = "miss"; break;
981 case IP_LOOKUP_NEXT_DROP: t = "drop"; break;
982 case IP_LOOKUP_NEXT_PUNT: t = "punt"; break;
983 case IP_LOOKUP_NEXT_LOCAL: t = "local"; break;
984 case IP_LOOKUP_NEXT_ARP: t = "arp"; break;
985 case IP_LOOKUP_NEXT_CLASSIFY: t = "classify"; break;
986 case IP_LOOKUP_NEXT_MAP: t = "map"; break;
987 case IP_LOOKUP_NEXT_MAP_T: t = "map-t"; break;
988 case IP_LOOKUP_NEXT_INDIRECT: t="indirect"; break;
989 case IP_LOOKUP_NEXT_REWRITE:
994 vec_add (s, t, strlen (t));
999 static u8 * format_ip_interface_address (u8 * s, va_list * args)
1001 ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
1002 u32 if_address_index = va_arg (*args, u32);
1003 ip_interface_address_t * ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
1004 void * a = ip_interface_address_get_address (lm, ia);
1007 return format (s, "%U", format_ip6_address_and_length, a, ia->address_length);
1009 return format (s, "%U", format_ip4_address_and_length, a, ia->address_length);
1012 u32 vnet_register_special_adjacency_format_function
1013 (ip_lookup_main_t * lm, format_function_t * fp)
1017 * Initialize the format function registration vector
1018 * Index 0 must be invalid, to avoid finding and fixing trivial bugs
1019 * all over the place
1021 if (vec_len (lm->special_adjacency_format_functions) == 0)
1023 vec_add1 (lm->special_adjacency_format_functions,
1024 (format_function_t *) 0);
1027 rv = vec_len (lm->special_adjacency_format_functions);
1028 vec_add1 (lm->special_adjacency_format_functions, fp);
1032 /** @brief Pretty print helper function for formatting specific adjacencies.
1033 @param s - input string to format
1034 @param args - other args passed to format function such as:
1039 u8 * format_ip_adjacency (u8 * s, va_list * args)
1041 vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
1042 ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
1043 u32 adj_index = va_arg (*args, u32);
1044 ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
1045 ip_adj_register_t *reg;
1047 if (adj->lookup_next_index < vec_len (lm->registered_adjacencies))
1049 reg = vec_elt_at_index(lm->registered_adjacencies,
1050 adj->lookup_next_index);
1053 s = format(s, " %U", reg->fn, lm, adj);
1058 switch (adj->lookup_next_index)
1060 case IP_LOOKUP_NEXT_REWRITE:
1061 s = format (s, "%U",
1062 format_vnet_rewrite,
1063 vnm->vlib_main, &adj->rewrite_header,
1064 sizeof (adj->rewrite_data));
1067 case IP_LOOKUP_NEXT_ARP:
1068 if (adj->if_address_index != ~0)
1069 s = format (s, " %U", format_ip_interface_address, lm,
1070 adj->if_address_index);
1071 if (adj->arp.next_hop.ip6.as_u64[0] || adj->arp.next_hop.ip6.as_u64[1])
1072 s = format (s, " via %U", format_ip46_address,
1073 &adj->arp.next_hop, IP46_TYPE_ANY);
1075 case IP_LOOKUP_NEXT_LOCAL:
1076 if (adj->if_address_index != ~0)
1077 s = format (s, " %U", format_ip_interface_address, lm,
1078 adj->if_address_index);
1081 case IP_LOOKUP_NEXT_CLASSIFY:
1082 s = format (s, " table %d", adj->classify.table_index);
1084 case IP_LOOKUP_NEXT_INDIRECT:
1085 s = format (s, " via %U", format_ip46_address,
1086 &adj->indirect.next_hop, IP46_TYPE_ANY);
1090 s = format (s, " unknown %d", adj->lookup_next_index);
1095 if (adj->explicit_fib_index != ~0 && adj->explicit_fib_index != 0)
1096 s = format (s, " lookup fib index %d", adj->explicit_fib_index);
1097 if (adj->share_count > 0)
1098 s = format (s, " shared %d", adj->share_count + 1);
1099 if (adj->next_adj_with_signature)
1100 s = format (s, " next_adj_with_signature %d", adj->next_adj_with_signature);
1105 u8 * format_ip_adjacency_packet_data (u8 * s, va_list * args)
1107 vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
1108 ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
1109 u32 adj_index = va_arg (*args, u32);
1110 u8 * packet_data = va_arg (*args, u8 *);
1111 u32 n_packet_data_bytes = va_arg (*args, u32);
1112 ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
1114 switch (adj->lookup_next_index)
1116 case IP_LOOKUP_NEXT_REWRITE:
1117 s = format (s, "%U",
1118 format_vnet_rewrite_header,
1119 vnm->vlib_main, &adj->rewrite_header, packet_data, n_packet_data_bytes);
1129 static uword unformat_ip_lookup_next (unformat_input_t * input, va_list * args)
1131 ip_lookup_next_t * result = va_arg (*args, ip_lookup_next_t *);
1134 if (unformat (input, "drop"))
1135 n = IP_LOOKUP_NEXT_DROP;
1137 else if (unformat (input, "punt"))
1138 n = IP_LOOKUP_NEXT_PUNT;
1140 else if (unformat (input, "local"))
1141 n = IP_LOOKUP_NEXT_LOCAL;
1143 else if (unformat (input, "arp"))
1144 n = IP_LOOKUP_NEXT_ARP;
1146 else if (unformat (input, "classify"))
1147 n = IP_LOOKUP_NEXT_CLASSIFY;
1156 static uword unformat_ip_adjacency (unformat_input_t * input, va_list * args)
1158 vlib_main_t * vm = va_arg (*args, vlib_main_t *);
1159 ip_adjacency_t * adj = va_arg (*args, ip_adjacency_t *);
1160 u32 node_index = va_arg (*args, u32);
1161 vnet_main_t * vnm = vnet_get_main();
1162 u32 sw_if_index, is_ip6;
1164 ip_lookup_next_t next;
1166 is_ip6 = node_index == ip6_rewrite_node.index;
1167 adj->rewrite_header.node_index = node_index;
1168 adj->explicit_fib_index = ~0;
1170 if (unformat (input, "arp %U %U",
1171 unformat_vnet_sw_interface, vnm, &sw_if_index,
1172 unformat_ip46_address, &a46, is_ip6?IP46_TYPE_IP6:IP46_TYPE_IP4))
1174 ip_lookup_main_t * lm = is_ip6 ? &ip6_main.lookup_main : &ip4_main.lookup_main;
1175 ip_adjacency_t * a_adj;
1179 adj_index = ip6_fib_lookup (&ip6_main, sw_if_index, &a46.ip6);
1181 adj_index = ip4_fib_lookup (&ip4_main, sw_if_index, &a46.ip4);
1183 a_adj = ip_get_adjacency (lm, adj_index);
1185 if (a_adj->rewrite_header.sw_if_index != sw_if_index)
1189 ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index);
1191 ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index);
1194 else if (unformat_user (input, unformat_ip_lookup_next, &next))
1196 adj->lookup_next_index = next;
1197 adj->if_address_index = ~0;
1198 if (next == IP_LOOKUP_NEXT_LOCAL)
1199 (void) unformat (input, "%d", &adj->if_address_index);
1200 else if (next == IP_LOOKUP_NEXT_CLASSIFY)
1202 if (!unformat (input, "%d", &adj->classify.table_index))
1204 clib_warning ("classify adj must specify table index");
1208 else if (next == IP_LOOKUP_NEXT_DROP)
1210 adj->rewrite_header.node_index = 0;
1214 else if (unformat_user (input,
1215 unformat_vnet_rewrite,
1216 vm, &adj->rewrite_header, sizeof (adj->rewrite_data)))
1217 adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
1226 vnet_ip_route_cmd (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_command_t * cmd)
1228 vnet_main_t * vnm = vnet_get_main();
1229 clib_error_t * error = 0;
1230 u32 table_id, is_del;
1231 u32 weight, * weights = 0;
1232 u32 * table_ids = 0;
1233 u32 sw_if_index, * sw_if_indices = 0;
1234 ip4_address_t ip4_addr, * ip4_dst_addresses = 0, * ip4_via_next_hops = 0;
1235 ip6_address_t ip6_addr, * ip6_dst_addresses = 0, * ip6_via_next_hops = 0;
1236 u32 dst_address_length, * dst_address_lengths = 0;
1237 ip_adjacency_t parse_adj, * add_adj = 0;
1238 unformat_input_t _line_input, * line_input = &_line_input;
1246 /* Get a line of input. */
1247 if (! unformat_user (main_input, unformat_line_input, line_input))
1250 memset(&parse_adj, 0, sizeof (parse_adj));
1252 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1254 if (unformat (line_input, "table %d", &table_id))
1256 else if (unformat (line_input, "del"))
1258 else if (unformat (line_input, "add"))
1260 else if (unformat (line_input, "count %f", &count))
1263 else if (unformat (line_input, "%U/%d",
1264 unformat_ip4_address, &ip4_addr,
1265 &dst_address_length))
1267 vec_add1 (ip4_dst_addresses, ip4_addr);
1268 vec_add1 (dst_address_lengths, dst_address_length);
1271 else if (unformat (line_input, "%U/%d",
1272 unformat_ip6_address, &ip6_addr,
1273 &dst_address_length))
1275 vec_add1 (ip6_dst_addresses, ip6_addr);
1276 vec_add1 (dst_address_lengths, dst_address_length);
1279 else if (unformat (line_input, "via %U %U weight %u",
1280 unformat_ip4_address, &ip4_addr,
1281 unformat_vnet_sw_interface, vnm, &sw_if_index,
1284 vec_add1 (ip4_via_next_hops, ip4_addr);
1285 vec_add1 (sw_if_indices, sw_if_index);
1286 vec_add1 (weights, weight);
1287 vec_add1 (table_ids, (u32)~0);
1290 else if (unformat (line_input, "via %U %U weight %u",
1291 unformat_ip6_address, &ip6_addr,
1292 unformat_vnet_sw_interface, vnm, &sw_if_index,
1295 vec_add1 (ip6_via_next_hops, ip6_addr);
1296 vec_add1 (sw_if_indices, sw_if_index);
1297 vec_add1 (weights, weight);
1298 vec_add1 (table_ids, (u32)~0);
1301 else if (unformat (line_input, "via %U %U",
1302 unformat_ip4_address, &ip4_addr,
1303 unformat_vnet_sw_interface, vnm, &sw_if_index))
1305 vec_add1 (ip4_via_next_hops, ip4_addr);
1306 vec_add1 (sw_if_indices, sw_if_index);
1307 vec_add1 (weights, 1);
1308 vec_add1 (table_ids, (u32)~0);
1311 else if (unformat (line_input, "via %U %U",
1312 unformat_ip6_address, &ip6_addr,
1313 unformat_vnet_sw_interface, vnm, &sw_if_index))
1315 vec_add1 (ip6_via_next_hops, ip6_addr);
1316 vec_add1 (sw_if_indices, sw_if_index);
1317 vec_add1 (weights, 1);
1318 vec_add1 (table_ids, (u32)~0);
1320 else if (unformat (line_input, "via %U",
1321 unformat_ip4_address, &ip4_addr))
1323 vec_add1 (ip4_via_next_hops, ip4_addr);
1324 vec_add1 (sw_if_indices, (u32)~0);
1325 vec_add1 (weights, 1);
1326 vec_add1 (table_ids, table_id);
1328 else if (unformat (line_input, "via %U",
1329 unformat_ip6_address, &ip6_addr))
1331 vec_add1 (ip6_via_next_hops, ip6_addr);
1332 vec_add1 (sw_if_indices, (u32)~0);
1333 vec_add1 (weights, 1);
1334 vec_add1 (table_ids, (u32)table_id);
1337 else if (vec_len (ip4_dst_addresses) > 0
1338 && unformat (line_input, "via %U",
1339 unformat_ip_adjacency, vm, &parse_adj, ip4_rewrite_node.index))
1340 vec_add1 (add_adj, parse_adj);
1342 else if (vec_len (ip6_dst_addresses) > 0
1343 && unformat (line_input, "via %U",
1344 unformat_ip_adjacency, vm, &parse_adj, ip6_rewrite_node.index))
1345 vec_add1 (add_adj, parse_adj);
1346 else if (unformat (line_input, "lookup in table %d", &outer_table_id))
1350 if (vec_len (ip4_dst_addresses) > 0)
1351 p = hash_get (ip4_main.fib_index_by_table_id, outer_table_id);
1353 p = hash_get (ip6_main.fib_index_by_table_id, outer_table_id);
1357 error = clib_error_return (0, "Nonexistent outer table id %d",
1362 parse_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1363 parse_adj.explicit_fib_index = p[0];
1364 vec_add1 (add_adj, parse_adj);
1368 error = unformat_parse_error (line_input);
1373 unformat_free (line_input);
1375 if (vec_len (ip4_dst_addresses) + vec_len (ip6_dst_addresses) == 0)
1377 error = clib_error_return (0, "expected ip4/ip6 destination address/length.");
1381 if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_dst_addresses) > 0)
1383 error = clib_error_return (0, "mixed ip4/ip6 address/length.");
1387 if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_via_next_hops) > 0)
1389 error = clib_error_return (0, "ip4 destinations with ip6 next hops.");
1393 if (vec_len (ip6_dst_addresses) > 0 && vec_len (ip4_via_next_hops) > 0)
1395 error = clib_error_return (0, "ip6 destinations with ip4 next hops.");
1399 if (! is_del && vec_len (add_adj) + vec_len (weights) == 0)
1401 error = clib_error_return (0, "no next hops or adjacencies to add.");
1407 ip4_main_t * im4 = &ip4_main;
1408 ip6_main_t * im6 = &ip6_main;
1410 for (i = 0; i < vec_len (ip4_dst_addresses); i++)
1412 ip4_add_del_route_args_t a;
1414 memset (&a, 0, sizeof (a));
1415 a.flags = IP4_ROUTE_FLAG_TABLE_ID;
1416 a.table_index_or_table_id = table_id;
1417 a.dst_address = ip4_dst_addresses[i];
1418 a.dst_address_length = dst_address_lengths[i];
1423 if (vec_len (ip4_via_next_hops) == 0)
1425 uword * dst_hash, * dst_result;
1426 u32 dst_address_u32;
1429 fib = find_ip4_fib_by_table_index_or_id (im4, table_id,
1430 0 /* by table id */);
1432 a.flags |= IP4_ROUTE_FLAG_DEL;
1433 dst_address_u32 = a.dst_address.as_u32
1434 & im4->fib_masks[a.dst_address_length];
1437 fib->adj_index_by_dst_address[a.dst_address_length];
1438 dst_result = hash_get (dst_hash, dst_address_u32);
1440 a.adj_index = dst_result[0];
1443 clib_warning ("%U/%d not in FIB",
1444 format_ip4_address, &a.dst_address,
1445 a.dst_address_length);
1449 ip4_add_del_route (im4, &a);
1450 ip4_maybe_remap_adjacencies (im4, table_id,
1451 IP4_ROUTE_FLAG_TABLE_ID);
1455 u32 i, j, n, f, incr;
1456 ip4_address_t dst = a.dst_address;
1459 t[0] = vlib_time_now (vm);
1460 incr = 1<<(32 - a.dst_address_length);
1461 for (i = 0; i < n; i++)
1463 f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0;
1464 a.dst_address = dst;
1465 for (j = 0; j < vec_len (ip4_via_next_hops); j++)
1467 if (table_ids[j] != (u32)~0)
1469 uword * p = hash_get (im4->fib_index_by_table_id,
1473 clib_warning ("no such FIB table %d",
1477 table_ids[j] = p[0];
1480 ip4_add_del_route_next_hop (im4,
1481 IP4_ROUTE_FLAG_DEL | f,
1483 a.dst_address_length,
1484 &ip4_via_next_hops[j],
1486 weights[j], (u32)~0,
1487 table_ids[j] /* fib index */);
1489 dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32));
1491 t[1] = vlib_time_now (vm);
1493 vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0]));
1498 if (vec_len (add_adj) > 0)
1500 a.flags |= IP4_ROUTE_FLAG_ADD;
1501 a.add_adj = add_adj;
1502 a.n_add_adj = vec_len (add_adj);
1504 ip4_add_del_route (im4, &a);
1506 else if (vec_len (ip4_via_next_hops) > 0)
1508 u32 i, j, n, f, incr;
1509 ip4_address_t dst = a.dst_address;
1512 t[0] = vlib_time_now (vm);
1513 incr = 1<<(32 - a.dst_address_length);
1514 for (i = 0; i < n; i++)
1516 f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0;
1517 a.dst_address = dst;
1518 for (j = 0; j < vec_len (ip4_via_next_hops); j++)
1520 if (table_ids[j] != (u32)~0)
1522 uword * p = hash_get (im4->fib_index_by_table_id,
1526 clib_warning ("no such FIB table %d",
1530 table_ids[j] = p[0];
1532 ip4_add_del_route_next_hop (im4,
1533 IP4_ROUTE_FLAG_ADD | f,
1535 a.dst_address_length,
1536 &ip4_via_next_hops[j],
1538 weights[j], (u32)~0,
1539 table_ids[j] /* fib index */);
1541 dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32));
1543 t[1] = vlib_time_now (vm);
1545 vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0]));
1550 for (i = 0; i < vec_len (ip6_dst_addresses); i++)
1552 ip6_add_del_route_args_t a;
1555 memset (&a, 0, sizeof (a));
1556 a.flags = IP6_ROUTE_FLAG_TABLE_ID;
1557 a.table_index_or_table_id = table_id;
1558 a.dst_address = ip6_dst_addresses[i];
1559 a.dst_address_length = dst_address_lengths[i];
1564 if (vec_len (ip6_via_next_hops) == 0)
1566 BVT(clib_bihash_kv) kv, value;
1567 ip6_address_t dst_address;
1570 fib = find_ip6_fib_by_table_index_or_id (im6, table_id,
1571 0 /* by table id */);
1573 a.flags |= IP4_ROUTE_FLAG_DEL;
1575 dst_address = ip6_dst_addresses[i];
1577 ip6_address_mask (&dst_address,
1578 &im6->fib_masks[dst_address_length]);
1580 kv.key[0] = dst_address.as_u64[0];
1581 kv.key[1] = dst_address.as_u64[1];
1582 kv.key[2] = ((u64)(fib - im6->fibs)<<32)
1583 | a.dst_address_length;
1585 if (BV(clib_bihash_search)(&im6->ip6_lookup_table,
1587 a.adj_index = value.value;
1590 clib_warning ("%U/%d not in FIB",
1591 format_ip6_address, &a.dst_address,
1592 a.dst_address_length);
1596 a.flags |= IP6_ROUTE_FLAG_DEL;
1597 ip6_add_del_route (im6, &a);
1598 ip6_maybe_remap_adjacencies (im6, table_id,
1599 IP6_ROUTE_FLAG_TABLE_ID);
1604 for (i = 0; i < vec_len (ip6_via_next_hops); i++)
1606 ip6_add_del_route_next_hop (im6,
1609 a.dst_address_length,
1610 &ip6_via_next_hops[i],
1612 weights[i], (u32)~0,
1613 table_ids[i] /* fib index */);
1619 if (vec_len (add_adj) > 0)
1621 a.flags |= IP6_ROUTE_FLAG_ADD;
1622 a.add_adj = add_adj;
1623 a.n_add_adj = vec_len (add_adj);
1625 ip6_add_del_route (im6, &a);
1627 else if (vec_len (ip6_via_next_hops) > 0)
1630 for (i = 0; i < vec_len (ip6_via_next_hops); i++)
1632 ip6_add_del_route_next_hop (im6,
1635 a.dst_address_length,
1636 &ip6_via_next_hops[i],
1638 weights[i], (u32)~0,
1649 vec_free (dst_address_lengths);
1650 vec_free (ip4_dst_addresses);
1651 vec_free (ip6_dst_addresses);
1652 vec_free (ip4_via_next_hops);
1653 vec_free (ip6_via_next_hops);
1657 VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = {
1659 .short_help = "Internet protocol (IP) commands",
1662 VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = {
1664 .short_help = "Internet protocol (IP) show commands",
1667 VLIB_CLI_COMMAND (vlib_cli_show_ip4_command, static) = {
1669 .short_help = "Internet protocol version 4 (IP4) show commands",
1672 VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
1674 .short_help = "Internet protocol version 6 (IP6) show commands",
1677 VLIB_CLI_COMMAND (ip_route_command, static) = {
1679 .short_help = "Add/delete IP routes",
1680 .function = vnet_ip_route_cmd,
1685 * The next two routines address a longstanding script hemorrhoid.
1686 * Probing a v4 or v6 neighbor needs to appear to be synchronous,
1687 * or dependent route-adds will simply fail.
1689 static clib_error_t *
1690 ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index,
1693 vnet_main_t * vnm = vnet_get_main();
1698 uword *event_data = 0;
1700 ASSERT (vlib_in_process_context(vm));
1702 if (retry_count > 0)
1703 vnet_register_ip6_neighbor_resolution_event
1704 (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
1705 1 /* event */, 0 /* data */);
1707 for (i = 0; i < retry_count; i++)
1709 /* The interface may be down, etc. */
1710 e = ip6_probe_neighbor (vm, a, sw_if_index);
1715 vlib_process_wait_for_event_or_clock (vm, 1.0);
1716 event_type = vlib_process_get_events (vm, &event_data);
1719 case 1: /* resolved... */
1720 vlib_cli_output (vm, "Resolved %U",
1721 format_ip6_address, a);
1725 case ~0: /* timeout */
1729 clib_warning ("unknown event_type %d", event_type);
1731 vec_reset_length (event_data);
1737 return clib_error_return (0, "Resolution failed for %U",
1738 format_ip6_address, a);
1742 static clib_error_t *
1743 ip4_probe_neighbor_wait (vlib_main_t *vm, ip4_address_t * a, u32 sw_if_index,
1746 vnet_main_t * vnm = vnet_get_main();
1751 uword *event_data = 0;
1753 ASSERT (vlib_in_process_context(vm));
1755 if (retry_count > 0)
1756 vnet_register_ip4_arp_resolution_event
1757 (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
1758 1 /* event */, 0 /* data */);
1760 for (i = 0; i < retry_count; i++)
1762 /* The interface may be down, etc. */
1763 e = ip4_probe_neighbor (vm, a, sw_if_index);
1768 vlib_process_wait_for_event_or_clock (vm, 1.0);
1769 event_type = vlib_process_get_events (vm, &event_data);
1772 case 1: /* resolved... */
1773 vlib_cli_output (vm, "Resolved %U",
1774 format_ip4_address, a);
1778 case ~0: /* timeout */
1782 clib_warning ("unknown event_type %d", event_type);
1784 vec_reset_length (event_data);
1789 vec_reset_length (event_data);
1792 return clib_error_return (0, "Resolution failed for %U",
1793 format_ip4_address, a);
1797 static clib_error_t *
1798 probe_neighbor_address (vlib_main_t * vm,
1799 unformat_input_t * input,
1800 vlib_cli_command_t * cmd)
1802 vnet_main_t * vnm = vnet_get_main();
1803 unformat_input_t _line_input, * line_input = &_line_input;
1806 clib_error_t * error = 0;
1807 u32 sw_if_index = ~0;
1808 int retry_count = 3;
1810 int address_set = 0;
1812 /* Get a line of input. */
1813 if (! unformat_user (input, unformat_line_input, line_input))
1816 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1818 if (unformat_user (line_input, unformat_vnet_sw_interface, vnm,
1821 else if (unformat (line_input, "retry %d", &retry_count))
1824 else if (unformat (line_input, "%U", unformat_ip4_address, &a4))
1826 else if (unformat (line_input, "%U", unformat_ip6_address, &a6))
1832 return clib_error_return (0, "unknown input '%U'",
1833 format_unformat_error, line_input);
1836 unformat_free (line_input);
1838 if (sw_if_index == ~0)
1839 return clib_error_return (0, "Interface required, not set.");
1840 if (address_set == 0)
1841 return clib_error_return (0, "ip address required, not set.");
1842 if (address_set > 1)
1843 return clib_error_return (0, "Multiple ip addresses not supported.");
1846 error = ip4_probe_neighbor_wait (vm, &a4, sw_if_index, retry_count);
1848 error = ip6_probe_neighbor_wait (vm, &a6, sw_if_index, retry_count);
1853 VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = {
1854 .path = "ip probe-neighbor",
1855 .function = probe_neighbor_address,
1856 .short_help = "ip probe-neighbor <intfc> <ip4-addr> | <ip6-addr> [retry nn]",
1860 typedef CLIB_PACKED (struct {
1861 ip4_address_t address;
1863 u32 address_length : 6;
1869 ip4_route_cmp (void * a1, void * a2)
1871 ip4_route_t * r1 = a1;
1872 ip4_route_t * r2 = a2;
1874 int cmp = ip4_address_compare (&r1->address, &r2->address);
1875 return cmp ? cmp : ((int) r1->address_length - (int) r2->address_length);
1878 static clib_error_t *
1879 ip4_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
1881 vnet_main_t * vnm = vnet_get_main();
1882 ip4_main_t * im4 = &ip4_main;
1883 ip4_route_t * routes, * r;
1885 ip_lookup_main_t * lm = &im4->lookup_main;
1887 int verbose, matching, mtrie, include_empty_fibs;
1888 ip4_address_t matching_address;
1895 include_empty_fibs = 0;
1898 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1900 if (unformat (input, "brief") || unformat (input, "summary")
1901 || unformat (input, "sum"))
1904 else if (unformat (input, "mtrie"))
1907 else if (unformat (input, "include-empty"))
1908 include_empty_fibs = 1;
1910 else if (unformat (input, "%U", unformat_ip4_address, &matching_address))
1913 else if (unformat (input, "clear"))
1916 else if (unformat (input, "table %d", &table_id))
1922 vec_foreach (fib, im4->fibs)
1927 for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
1929 uword * hash = fib->adj_index_by_dst_address[i];
1930 uword n_elts = hash_elts (hash);
1938 if (fib_not_empty == 0 && include_empty_fibs == 0)
1941 if (table_id >= 0 && table_id != (int)fib->table_id)
1944 if (include_empty_fibs)
1945 vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U",
1946 fib->table_id, fib - im4->fibs,
1947 format_ip_flow_hash_config, fib->flow_hash_config);
1952 if (include_empty_fibs == 0)
1953 vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U",
1954 fib->table_id, fib - im4->fibs,
1955 format_ip_flow_hash_config, fib->flow_hash_config);
1956 vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
1957 for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
1959 uword * hash = fib->adj_index_by_dst_address[i];
1960 uword n_elts = hash_elts (hash);
1962 vlib_cli_output (vm, "%20d%16d", i, n_elts);
1968 _vec_len (routes) = 0;
1970 _vec_len (results) = 0;
1972 for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
1974 uword * hash = fib->adj_index_by_dst_address[i];
1978 x.address_length = i;
1982 x.address.as_u32 = matching_address.as_u32 & im4->fib_masks[i];
1983 p = hash_get_pair (hash, x.address.as_u32);
1986 if (lm->fib_result_n_words > 1)
1988 x.index = vec_len (results);
1989 vec_add (results, p->value, lm->fib_result_n_words);
1992 x.index = p->value[0];
1993 vec_add1 (routes, x);
1998 hash_foreach_pair (p, hash, ({
1999 x.address.data_u32 = p->key;
2000 if (lm->fib_result_n_words > 1)
2002 x.index = vec_len (results);
2003 vec_add (results, p->value, lm->fib_result_n_words);
2006 x.index = p->value[0];
2008 vec_add1 (routes, x);
2013 vec_sort_with_function (routes, ip4_route_cmp);
2014 if (vec_len(routes)) {
2015 if (include_empty_fibs == 0)
2016 vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U",
2017 fib->table_id, fib - im4->fibs,
2018 format_ip_flow_hash_config, fib->flow_hash_config);
2020 vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie);
2021 vlib_cli_output (vm, "%=20s%=16s%=16s%=16s",
2022 "Destination", "Packets", "Bytes", "Adjacency");
2024 vec_foreach (r, routes)
2026 vlib_counter_t c, sum;
2027 uword i, j, n_left, n_nhs, adj_index, * result = 0;
2028 ip_adjacency_t * adj;
2029 ip_multipath_next_hop_t * nhs, tmp_nhs[1];
2031 adj_index = r->index;
2032 if (lm->fib_result_n_words > 1)
2034 result = vec_elt_at_index (results, adj_index);
2035 adj_index = result[0];
2038 adj = ip_get_adjacency (lm, adj_index);
2039 if (adj->n_adj == 1)
2042 nhs[0].next_hop_adj_index = ~0; /* not used */
2048 ip_multipath_adjacency_t * madj;
2049 madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle);
2050 nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset);
2051 n_nhs = madj->normalized_next_hops.count;
2054 n_left = nhs[0].weight;
2055 vlib_counter_zero (&sum);
2056 for (i = j = 0; i < adj->n_adj; i++)
2059 vlib_get_combined_counter (&lm->adjacency_counters,
2062 vlib_zero_combined_counter (&lm->adjacency_counters,
2064 vlib_counter_add (&sum, &c);
2071 msg = format (msg, "%-20U",
2072 format_ip4_address_and_length,
2073 r->address.data, r->address_length);
2075 msg = format (msg, "%U", format_white_space, 20);
2077 msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes);
2079 indent = vec_len (msg);
2080 msg = format (msg, "weight %d, index %d",
2081 nhs[j].weight, adj_index + i);
2083 if (ip_adjacency_is_multipath(lm, adj_index))
2084 msg = format (msg, ", multipath");
2086 msg = format (msg, "\n%U%U",
2087 format_white_space, indent,
2088 format_ip_adjacency,
2089 vnm, lm, adj_index + i);
2091 vlib_cli_output (vm, "%v", msg);
2094 if (result && lm->format_fib_result)
2095 vlib_cli_output (vm, "%20s%U", "",
2096 lm->format_fib_result, vm, lm, result,
2097 i + 1 - nhs[j].weight,
2103 n_left = nhs[j].weight;
2104 vlib_counter_zero (&sum);
2117 VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
2118 .path = "show ip fib",
2119 .short_help = "show ip fib [mtrie] [summary] [table <n>] [<ip4-addr>] [clear] [include-empty]",
2120 .function = ip4_show_fib,
2124 ip6_address_t address;
2133 ip6_route_t ** routep;
2134 } add_routes_in_fib_arg_t;
2136 static void add_routes_in_fib (BVT(clib_bihash_kv) * kvp, void *arg)
2138 add_routes_in_fib_arg_t * ap = arg;
2140 if (kvp->key[2]>>32 == ap->fib_index)
2142 ip6_address_t *addr;
2144 addr = (ip6_address_t *) kvp;
2145 vec_add2 (*ap->routep, r, 1);
2146 r->address = addr[0];
2147 r->address_length = kvp->key[2] & 0xFF;
2148 r->index = kvp->value;
2154 u64 count_by_prefix_length[129];
2155 } count_routes_in_fib_at_prefix_length_arg_t;
2157 static void count_routes_in_fib_at_prefix_length
2158 (BVT(clib_bihash_kv) * kvp, void *arg)
2160 count_routes_in_fib_at_prefix_length_arg_t * ap = arg;
2163 if ((kvp->key[2]>>32) != ap->fib_index)
2166 mask_width = kvp->key[2] & 0xFF;
2168 ap->count_by_prefix_length[mask_width]++;
2172 ip6_route_cmp (void * a1, void * a2)
2174 ip6_route_t * r1 = a1;
2175 ip6_route_t * r2 = a2;
2177 int cmp = ip6_address_compare (&r1->address, &r2->address);
2178 return cmp ? cmp : ((int) r1->address_length - (int) r2->address_length);
2181 static clib_error_t *
2182 ip6_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
2184 vnet_main_t * vnm = vnet_get_main();
2185 ip6_main_t * im6 = &ip6_main;
2186 ip6_route_t * routes, * r;
2188 ip_lookup_main_t * lm = &im6->lookup_main;
2191 BVT(clib_bihash) * h = &im6->ip6_lookup_table;
2192 __attribute__((unused)) u8 clear = 0;
2193 add_routes_in_fib_arg_t _a, *a=&_a;
2194 count_routes_in_fib_at_prefix_length_arg_t _ca, *ca = &_ca;
2199 if (unformat (input, "brief") || unformat (input, "summary")
2200 || unformat (input, "sum"))
2203 if (unformat (input, "clear"))
2206 vlib_cli_output (vm, "FIB lookup table: %d buckets, %lld MB heap",
2207 im6->lookup_table_nbuckets, im6->lookup_table_size>>20);
2208 vlib_cli_output (vm, "%U", format_mheap, h->mheap, 0 /*verbose*/);
2209 vlib_cli_output (vm, " ");
2211 vec_foreach (fib, im6->fibs)
2213 vlib_cli_output (vm, "VRF %d, fib_index %d, flow hash: %U",
2214 fib->table_id, fib - im6->fibs,
2215 format_ip_flow_hash_config, fib->flow_hash_config);
2221 vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
2223 memset (ca, 0, sizeof(*ca));
2224 ca->fib_index = fib - im6->fibs;
2226 BV(clib_bihash_foreach_key_value_pair)
2227 (h, count_routes_in_fib_at_prefix_length, ca);
2229 for (len = 128; len >= 0; len--)
2231 if (ca->count_by_prefix_length[len])
2232 vlib_cli_output (vm, "%=20d%=16lld",
2233 len, ca->count_by_prefix_length[len]);
2239 _vec_len (routes) = 0;
2241 _vec_len (results) = 0;
2243 a->fib_index = fib - im6->fibs;
2244 a->routep = &routes;
2246 BV(clib_bihash_foreach_key_value_pair)(h, add_routes_in_fib, a);
2248 vec_sort_with_function (routes, ip6_route_cmp);
2250 vlib_cli_output (vm, "%=45s%=16s%=16s%=16s",
2251 "Destination", "Packets", "Bytes", "Adjacency");
2252 vec_foreach (r, routes)
2254 vlib_counter_t c, sum;
2255 uword i, j, n_left, n_nhs, adj_index, * result = 0;
2256 ip_adjacency_t * adj;
2257 ip_multipath_next_hop_t * nhs, tmp_nhs[1];
2259 adj_index = r->index;
2260 if (lm->fib_result_n_words > 1)
2262 result = vec_elt_at_index (results, adj_index);
2263 adj_index = result[0];
2266 adj = ip_get_adjacency (lm, adj_index);
2267 if (adj->n_adj == 1)
2270 nhs[0].next_hop_adj_index = ~0; /* not used */
2276 ip_multipath_adjacency_t * madj;
2277 madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle);
2278 nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset);
2279 n_nhs = madj->normalized_next_hops.count;
2282 n_left = nhs[0].weight;
2283 vlib_counter_zero (&sum);
2284 for (i = j = 0; i < adj->n_adj; i++)
2287 vlib_get_combined_counter (&lm->adjacency_counters,
2290 vlib_zero_combined_counter (&lm->adjacency_counters,
2292 vlib_counter_add (&sum, &c);
2299 msg = format (msg, "%-45U",
2300 format_ip6_address_and_length,
2301 r->address.as_u8, r->address_length);
2303 msg = format (msg, "%U", format_white_space, 20);
2305 msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes);
2307 indent = vec_len (msg);
2308 msg = format (msg, "weight %d, index %d",
2309 nhs[j].weight, adj_index + i);
2311 if (ip_adjacency_is_multipath(lm, adj_index + i))
2312 msg = format (msg, ", multipath");
2314 msg = format (msg, "\n%U%U",
2315 format_white_space, indent,
2316 format_ip_adjacency,
2317 vnm, lm, adj_index + i);
2319 vlib_cli_output (vm, "%v", msg);
2325 n_left = nhs[j].weight;
2326 vlib_counter_zero (&sum);
2331 if (result && lm->format_fib_result)
2332 vlib_cli_output (vm, "%20s%U", "", lm->format_fib_result, vm, lm, result, 0);
2334 vlib_cli_output (vm, " ");
2343 VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
2344 .path = "show ip6 fib",
2345 .short_help = "show ip6 fib [summary] [clear]",
2346 .function = ip6_show_fib,