2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_fib.h: ip4 mtrie fib
18 * Copyright (c) 2012 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/ip/ip.h>
41 #include <vnet/fib/fib_entry.h>
44 ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_ply_t * p, u8 dst_byte)
47 * It's 'non-empty' if the length of the leaf stored is greater than the
48 * length of a leaf in the covering ply. i.e. the leaf is more specific
49 * than it's would be cover in the covering ply
51 if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base)
56 always_inline ip4_fib_mtrie_leaf_t
57 ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
59 ip4_fib_mtrie_leaf_t l;
60 l = 1 + 2 * adj_index;
61 ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
66 ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
72 ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
74 ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
78 always_inline ip4_fib_mtrie_leaf_t
79 ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
81 ip4_fib_mtrie_leaf_t l;
83 ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
88 ply_init (ip4_fib_mtrie_ply_t * p,
89 ip4_fib_mtrie_leaf_t init, u32 prefix_len, u32 ply_base_len)
92 * A leaf is 'empty' if it represents a leaf from the covering PLY
93 * i.e. if the prefix length of the leaf is less than or equal to
94 * the prefix length of the PLY
96 p->n_non_empty_leafs = (prefix_len > ply_base_len ?
97 ARRAY_LEN (p->leaves) : 0);
98 memset (p->dst_address_bits_of_leaves, prefix_len,
99 sizeof (p->dst_address_bits_of_leaves));
100 p->dst_address_bits_base = ply_base_len;
102 /* Initialize leaves. */
103 #ifdef CLIB_HAVE_VEC128
108 init_x4 = u32x4_splat (init);
116 init_x4 = y.as_u32x4;
120 for (l = p->leaves_as_u32x4;
121 l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4)
133 for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)
144 static ip4_fib_mtrie_leaf_t
145 ply_create (ip4_fib_mtrie_t * m,
146 ip4_fib_mtrie_leaf_t init_leaf,
147 u32 leaf_prefix_len, u32 ply_base_len)
149 ip4_fib_mtrie_ply_t *p;
151 /* Get cache aligned ply. */
152 pool_get_aligned (m->ply_pool, p, sizeof (p[0]));
154 ply_init (p, init_leaf, leaf_prefix_len, ply_base_len);
155 return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool);
158 always_inline ip4_fib_mtrie_ply_t *
159 get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
161 uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
162 /* It better not be the root ply. */
164 return pool_elt_at_index (m->ply_pool, n);
168 ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
172 is_root = p - m->ply_pool == 0;
174 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
176 ip4_fib_mtrie_leaf_t l = p->leaves[i];
177 if (ip4_fib_mtrie_leaf_is_next_ply (l))
178 ply_free (m, get_next_ply_for_leaf (m, l));
182 ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0, 0);
184 pool_put (m->ply_pool, p);
188 ip4_fib_free (ip4_fib_mtrie_t * m)
190 ip4_fib_mtrie_ply_t *root_ply = pool_elt_at_index (m->ply_pool, 0);
191 ply_free (m, root_ply);
196 ip4_address_t dst_address;
197 u32 dst_address_length;
199 u32 cover_address_length;
201 } ip4_fib_mtrie_set_unset_leaf_args_t;
204 set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
205 ip4_fib_mtrie_ply_t * ply,
206 ip4_fib_mtrie_leaf_t new_leaf,
207 uword new_leaf_dst_address_bits)
209 ip4_fib_mtrie_leaf_t old_leaf;
212 ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
214 for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
216 old_leaf = ply->leaves[i];
218 /* Recurse into sub plies. */
219 if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
221 ip4_fib_mtrie_ply_t *sub_ply = get_next_ply_for_leaf (m, old_leaf);
222 set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
223 new_leaf_dst_address_bits);
226 /* Replace less specific terminal leaves with new leaf. */
227 else if (new_leaf_dst_address_bits >=
228 ply->dst_address_bits_of_leaves[i])
230 __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf);
231 ASSERT (ply->leaves[i] == new_leaf);
232 ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
233 ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i);
239 set_leaf (ip4_fib_mtrie_t * m,
240 ip4_fib_mtrie_set_unset_leaf_args_t * a,
241 u32 old_ply_index, u32 dst_address_byte_index)
243 ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
244 i32 n_dst_bits_next_plies;
247 ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
248 ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
250 n_dst_bits_next_plies =
251 a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
253 dst_byte = a->dst_address.as_u8[dst_address_byte_index];
255 /* Number of bits next plies <= 0 => insert leaves this ply. */
256 if (n_dst_bits_next_plies <= 0)
258 uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
260 n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
261 ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
262 pow2_mask (n_dst_bits_this_ply)) == 0);
264 for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
266 ip4_fib_mtrie_ply_t *old_ply, *new_ply;
268 old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
270 old_leaf = old_ply->leaves[i];
271 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
273 /* Is leaf to be inserted more specific? */
274 if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
276 new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
278 if (old_leaf_is_terminal)
280 old_ply->n_non_empty_leafs -=
281 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
282 old_ply->dst_address_bits_of_leaves[i] =
283 a->dst_address_length;
284 __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
286 ASSERT (old_ply->leaves[i] == new_leaf);
288 old_ply->n_non_empty_leafs +=
289 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
290 ASSERT (old_ply->n_non_empty_leafs <=
291 ARRAY_LEN (old_ply->leaves));
295 /* Existing leaf points to another ply. We need to place new_leaf into all
296 more specific slots. */
297 new_ply = get_next_ply_for_leaf (m, old_leaf);
298 set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
299 a->dst_address_length);
303 else if (!old_leaf_is_terminal)
305 new_ply = get_next_ply_for_leaf (m, old_leaf);
306 set_leaf (m, a, new_ply - m->ply_pool,
307 dst_address_byte_index + 1);
313 ip4_fib_mtrie_ply_t *old_ply, *new_ply;
316 ply_base_len = 8 * (dst_address_byte_index + 1);
317 old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
318 old_leaf = old_ply->leaves[dst_byte];
319 if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
321 old_ply->n_non_empty_leafs -=
322 ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
324 new_leaf = ply_create (m, old_leaf,
325 clib_max (old_ply->dst_address_bits_of_leaves
326 [dst_byte], ply_base_len),
328 new_ply = get_next_ply_for_leaf (m, new_leaf);
330 /* Refetch since ply_create may move pool. */
331 old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
333 __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
335 ASSERT (old_ply->leaves[dst_byte] == new_leaf);
336 old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
338 /* Account for the ply we just created. */
339 old_ply->n_non_empty_leafs += 1;
340 ASSERT (old_ply->n_non_empty_leafs >= 0);
343 new_ply = get_next_ply_for_leaf (m, old_leaf);
345 set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1);
350 unset_leaf (ip4_fib_mtrie_t * m,
351 ip4_fib_mtrie_set_unset_leaf_args_t * a,
352 ip4_fib_mtrie_ply_t * old_ply, u32 dst_address_byte_index)
354 ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
355 i32 n_dst_bits_next_plies;
356 i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
359 ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
360 ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
362 n_dst_bits_next_plies =
363 a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
365 dst_byte = a->dst_address.as_u8[dst_address_byte_index];
366 if (n_dst_bits_next_plies < 0)
367 dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
369 n_dst_bits_this_ply =
370 n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
371 n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
373 del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
375 for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
377 old_leaf = old_ply->leaves[i];
378 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
380 if (old_leaf == del_leaf
381 || (!old_leaf_is_terminal
382 && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
383 dst_address_byte_index + 1)))
385 old_ply->n_non_empty_leafs -=
386 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
389 ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
390 old_ply->dst_address_bits_of_leaves[i] =
391 clib_max (old_ply->dst_address_bits_base,
392 a->cover_address_length);
394 old_ply->n_non_empty_leafs +=
395 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
397 ASSERT (old_ply->n_non_empty_leafs >= 0);
398 if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
400 pool_put (m->ply_pool, old_ply);
401 /* Old ply was deleted. */
405 else if (dst_address_byte_index)
408 for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++)
410 count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii);
418 /* Old ply was not deleted. */
423 ip4_mtrie_init (ip4_fib_mtrie_t * m)
425 ip4_fib_mtrie_leaf_t root;
426 memset (m, 0, sizeof (m[0]));
427 root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, 0, 0);
428 ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0);
432 ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
433 ip4_address_t dst_address,
434 u32 dst_address_length,
435 u32 adj_index, u32 is_del)
437 ip4_fib_mtrie_t *m = &fib->mtrie;
438 ip4_fib_mtrie_ply_t *root_ply;
439 ip4_fib_mtrie_set_unset_leaf_args_t a;
440 ip4_main_t *im = &ip4_main;
442 ASSERT (m->ply_pool != 0);
444 root_ply = pool_elt_at_index (m->ply_pool, 0);
446 /* Honor dst_address_length. Fib masks are in network byte order */
447 dst_address.as_u32 &= im->fib_masks[dst_address_length];
448 a.dst_address = dst_address;
449 a.dst_address_length = dst_address_length;
450 a.adj_index = adj_index;
454 set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
458 ip4_main_t *im = &ip4_main;
460 if (dst_address_length)
464 /* If the ply was not deleted, then we need to fill the
465 * bucket just reset will the leaf from the less specfic
467 * Find next less specific route and insert into mtrie. */
468 for (i = dst_address_length - 1; i >= 0; i--)
474 if (!fib->fib_entry_by_dst_address[i])
477 key.as_u32 = dst_address.as_u32 & im->fib_masks[i];
478 p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32);
481 lbi = fib_entry_contribute_ip_forwarding (p[0])->dpoi_index;
482 if (INDEX_INVALID == lbi)
485 a.cover_adj_index = lbi;
486 a.cover_address_length = i;
494 a.cover_adj_index = 0;
495 a.cover_address_length = 0;
498 /* the top level ply is never removed, so we can ignore the return code */
499 unset_leaf (m, &a, root_ply, 0);
503 /* Returns number of bytes of memory used by mtrie. */
505 mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
511 if (pool_is_free_index (m->ply_pool, 0))
513 p = pool_elt_at_index (m->ply_pool, 0);
516 bytes = sizeof (p[0]);
517 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
519 ip4_fib_mtrie_leaf_t l = p->leaves[i];
520 if (ip4_fib_mtrie_leaf_is_next_ply (l))
521 bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l));
528 format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
530 ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
532 if (ip4_fib_mtrie_leaf_is_terminal (l))
533 s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l));
535 s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
540 format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
542 ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
543 u32 base_address = va_arg (*va, u32);
544 u32 ply_index = va_arg (*va, u32);
545 u32 dst_address_byte_index = va_arg (*va, u32);
546 ip4_fib_mtrie_ply_t *p;
549 p = pool_elt_at_index (m->ply_pool, ply_index);
550 indent = format_get_indent (s);
552 format (s, "ply index %d, %d non-empty leaves", ply_index,
553 p->n_non_empty_leafs);
554 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
556 ip4_fib_mtrie_leaf_t l = p->leaves[i];
558 if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
563 a = base_address + (i << (24 - 8 * dst_address_byte_index));
564 ia.as_u32 = clib_host_to_net_u32 (a);
565 if (ip4_fib_mtrie_leaf_is_terminal (l))
566 ia_length = p->dst_address_bits_of_leaves[i];
568 ia_length = 8 * (1 + dst_address_byte_index);
569 s = format (s, "\n%U%20U %U",
570 format_white_space, indent + 2,
571 format_ip4_address_and_length, &ia, ia_length,
572 format_ip4_fib_mtrie_leaf, l);
574 if (ip4_fib_mtrie_leaf_is_next_ply (l))
575 s = format (s, "\n%U%U",
576 format_white_space, indent + 2,
577 format_ip4_fib_mtrie_ply, m, a,
578 ip4_fib_mtrie_leaf_get_next_ply_index (l),
579 dst_address_byte_index + 1);
587 format_ip4_fib_mtrie (u8 * s, va_list * va)
589 ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
591 s = format (s, "%d plies, memory usage %U",
592 pool_elts (m->ply_pool),
593 format_memory_size, mtrie_memory_usage (m, 0));
595 if (pool_elts (m->ply_pool) > 0)
597 ip4_address_t base_address;
598 base_address.as_u32 = 0;
600 format (s, "\n %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0);
607 * fd.io coding-style-patch-verification: ON
610 * eval: (c-set-style "gnu")