2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_fib.h: ip4 mtrie fib
18 * Copyright (c) 2012 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/ip/ip.h>
41 #include <vnet/fib/fib_entry.h>
44 ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init,
47 p->n_non_empty_leafs =
48 ip4_fib_mtrie_leaf_is_empty (init) ? 0 : ARRAY_LEN (p->leaves);
49 memset (p->dst_address_bits_of_leaves, prefix_len,
50 sizeof (p->dst_address_bits_of_leaves));
52 /* Initialize leaves. */
53 #ifdef CLIB_HAVE_VEC128
58 init_x4 = u32x4_splat (init);
70 for (l = p->leaves_as_u32x4;
71 l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4)
83 for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)
94 static ip4_fib_mtrie_leaf_t
95 ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf,
98 ip4_fib_mtrie_ply_t *p;
100 /* Get cache aligned ply. */
101 pool_get_aligned (m->ply_pool, p, sizeof (p[0]));
103 ply_init (p, init_leaf, prefix_len);
104 return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool);
107 always_inline ip4_fib_mtrie_ply_t *
108 get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
110 uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
111 /* It better not be the root ply. */
113 return pool_elt_at_index (m->ply_pool, n);
117 ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
121 is_root = p - m->ply_pool == 0;
123 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
125 ip4_fib_mtrie_leaf_t l = p->leaves[i];
126 if (ip4_fib_mtrie_leaf_is_next_ply (l))
127 ply_free (m, get_next_ply_for_leaf (m, l));
131 ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0);
133 pool_put (m->ply_pool, p);
137 ip4_fib_free (ip4_fib_mtrie_t * m)
139 ip4_fib_mtrie_ply_t *root_ply = pool_elt_at_index (m->ply_pool, 0);
140 ply_free (m, root_ply);
144 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst)
146 ip4_fib_mtrie_ply_t *p = pool_elt_at_index (m->ply_pool, 0);
147 ip4_fib_mtrie_leaf_t l;
149 l = p->leaves[dst.as_u8[0]];
150 if (ip4_fib_mtrie_leaf_is_terminal (l))
151 return ip4_fib_mtrie_leaf_get_adj_index (l);
153 p = get_next_ply_for_leaf (m, l);
154 l = p->leaves[dst.as_u8[1]];
155 if (ip4_fib_mtrie_leaf_is_terminal (l))
156 return ip4_fib_mtrie_leaf_get_adj_index (l);
158 p = get_next_ply_for_leaf (m, l);
159 l = p->leaves[dst.as_u8[2]];
160 if (ip4_fib_mtrie_leaf_is_terminal (l))
161 return ip4_fib_mtrie_leaf_get_adj_index (l);
163 p = get_next_ply_for_leaf (m, l);
164 l = p->leaves[dst.as_u8[3]];
166 ASSERT (ip4_fib_mtrie_leaf_is_terminal (l));
167 return ip4_fib_mtrie_leaf_get_adj_index (l);
172 ip4_address_t dst_address;
173 u32 dst_address_length;
175 } ip4_fib_mtrie_set_unset_leaf_args_t;
178 set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
179 ip4_fib_mtrie_ply_t * ply,
180 ip4_fib_mtrie_leaf_t new_leaf,
181 uword new_leaf_dst_address_bits)
183 ip4_fib_mtrie_leaf_t old_leaf;
186 ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
187 ASSERT (!ip4_fib_mtrie_leaf_is_empty (new_leaf));
189 for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
191 old_leaf = ply->leaves[i];
193 /* Recurse into sub plies. */
194 if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
196 ip4_fib_mtrie_ply_t *sub_ply = get_next_ply_for_leaf (m, old_leaf);
197 set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
198 new_leaf_dst_address_bits);
201 /* Replace less specific terminal leaves with new leaf. */
202 else if (new_leaf_dst_address_bits >=
203 ply->dst_address_bits_of_leaves[i])
205 __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf);
206 ASSERT (ply->leaves[i] == new_leaf);
207 ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
208 ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf);
214 set_leaf (ip4_fib_mtrie_t * m,
215 ip4_fib_mtrie_set_unset_leaf_args_t * a,
216 u32 old_ply_index, u32 dst_address_byte_index)
218 ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
219 i32 n_dst_bits_next_plies;
222 ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
223 ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
225 n_dst_bits_next_plies =
226 a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
228 dst_byte = a->dst_address.as_u8[dst_address_byte_index];
230 /* Number of bits next plies <= 0 => insert leaves this ply. */
231 if (n_dst_bits_next_plies <= 0)
233 uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
235 n_dst_bits_this_ply = -n_dst_bits_next_plies;
236 ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
237 pow2_mask (n_dst_bits_this_ply)) == 0);
239 for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
241 ip4_fib_mtrie_ply_t *old_ply, *new_ply;
243 old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
245 old_leaf = old_ply->leaves[i];
246 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
248 /* Is leaf to be inserted more specific? */
249 if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
251 new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
253 if (old_leaf_is_terminal)
255 old_ply->dst_address_bits_of_leaves[i] =
256 a->dst_address_length;
257 __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
259 ASSERT (old_ply->leaves[i] == new_leaf);
260 old_ply->n_non_empty_leafs +=
261 ip4_fib_mtrie_leaf_is_empty (old_leaf);
262 ASSERT (old_ply->n_non_empty_leafs <=
263 ARRAY_LEN (old_ply->leaves));
267 /* Existing leaf points to another ply. We need to place new_leaf into all
268 more specific slots. */
269 new_ply = get_next_ply_for_leaf (m, old_leaf);
270 set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
271 a->dst_address_length);
275 else if (!old_leaf_is_terminal)
277 new_ply = get_next_ply_for_leaf (m, old_leaf);
278 set_leaf (m, a, new_ply - m->ply_pool,
279 dst_address_byte_index + 1);
285 ip4_fib_mtrie_ply_t *old_ply, *new_ply;
287 old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
288 old_leaf = old_ply->leaves[dst_byte];
289 if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
292 ply_create (m, old_leaf,
293 old_ply->dst_address_bits_of_leaves[dst_byte]);
294 new_ply = get_next_ply_for_leaf (m, new_leaf);
296 /* Refetch since ply_create may move pool. */
297 old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
299 __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
301 ASSERT (old_ply->leaves[dst_byte] == new_leaf);
302 old_ply->dst_address_bits_of_leaves[dst_byte] = 0;
304 old_ply->n_non_empty_leafs -=
305 ip4_fib_mtrie_leaf_is_non_empty (old_leaf);
306 ASSERT (old_ply->n_non_empty_leafs >= 0);
308 /* Account for the ply we just created. */
309 old_ply->n_non_empty_leafs += 1;
312 new_ply = get_next_ply_for_leaf (m, old_leaf);
314 set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1);
319 unset_leaf (ip4_fib_mtrie_t * m,
320 ip4_fib_mtrie_set_unset_leaf_args_t * a,
321 ip4_fib_mtrie_ply_t * old_ply, u32 dst_address_byte_index)
323 ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
324 i32 n_dst_bits_next_plies;
325 i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
328 ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
329 ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
331 n_dst_bits_next_plies =
332 a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
334 dst_byte = a->dst_address.as_u8[dst_address_byte_index];
335 if (n_dst_bits_next_plies < 0)
336 dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
338 n_dst_bits_this_ply =
339 n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
340 n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
342 del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
344 for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
346 old_leaf = old_ply->leaves[i];
347 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
349 if (old_leaf == del_leaf
350 || (!old_leaf_is_terminal
351 && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
352 dst_address_byte_index + 1)))
354 old_ply->leaves[i] = IP4_FIB_MTRIE_LEAF_EMPTY;
355 old_ply->dst_address_bits_of_leaves[i] = 0;
357 /* No matter what we just deleted a non-empty leaf. */
358 ASSERT (!ip4_fib_mtrie_leaf_is_empty (old_leaf));
359 old_ply->n_non_empty_leafs -= 1;
361 ASSERT (old_ply->n_non_empty_leafs >= 0);
362 if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
364 pool_put (m->ply_pool, old_ply);
365 /* Old ply was deleted. */
371 /* Old ply was not deleted. */
376 ip4_mtrie_init (ip4_fib_mtrie_t * m)
378 ip4_fib_mtrie_leaf_t root;
379 memset (m, 0, sizeof (m[0]));
380 m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
381 root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, /* dst_address_bits_of_leaves */
383 ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0);
387 ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
388 ip4_address_t dst_address,
389 u32 dst_address_length,
390 u32 adj_index, u32 is_del)
392 ip4_fib_mtrie_t *m = &fib->mtrie;
393 ip4_fib_mtrie_ply_t *root_ply;
394 ip4_fib_mtrie_set_unset_leaf_args_t a;
395 ip4_main_t *im = &ip4_main;
397 ASSERT (m->ply_pool != 0);
399 root_ply = pool_elt_at_index (m->ply_pool, 0);
401 /* Honor dst_address_length. Fib masks are in network byte order */
402 dst_address.as_u32 &= im->fib_masks[dst_address_length];
403 a.dst_address = dst_address;
404 a.dst_address_length = dst_address_length;
405 a.adj_index = adj_index;
409 if (dst_address_length == 0)
410 m->default_leaf = ip4_fib_mtrie_leaf_set_adj_index (adj_index);
412 set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
416 if (dst_address_length == 0)
417 m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
421 ip4_main_t *im = &ip4_main;
424 unset_leaf (m, &a, root_ply, 0);
426 /* Find next less specific route and insert into mtrie. */
427 for (i = dst_address_length - 1; i >= 1; i--)
433 if (!fib->fib_entry_by_dst_address[i])
436 key.as_u32 = dst_address.as_u32 & im->fib_masks[i];
437 p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32);
440 lbi = fib_entry_contribute_ip_forwarding (p[0])->dpoi_index;
441 if (INDEX_INVALID == lbi)
446 a.dst_address_length = i;
448 set_leaf (m, &a, /* ply_index */ 0,
449 /* dst_address_byte_index */ 0);
457 /* Returns number of bytes of memory used by mtrie. */
459 mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
465 if (pool_is_free_index (m->ply_pool, 0))
467 p = pool_elt_at_index (m->ply_pool, 0);
470 bytes = sizeof (p[0]);
471 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
473 ip4_fib_mtrie_leaf_t l = p->leaves[i];
474 if (ip4_fib_mtrie_leaf_is_next_ply (l))
475 bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l));
482 format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
484 ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
486 if (ip4_fib_mtrie_leaf_is_empty (l))
487 s = format (s, "miss");
488 else if (ip4_fib_mtrie_leaf_is_terminal (l))
489 s = format (s, "adj %d", ip4_fib_mtrie_leaf_get_adj_index (l));
491 s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
496 format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
498 ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
499 u32 base_address = va_arg (*va, u32);
500 u32 ply_index = va_arg (*va, u32);
501 u32 dst_address_byte_index = va_arg (*va, u32);
502 ip4_fib_mtrie_ply_t *p;
505 p = pool_elt_at_index (m->ply_pool, ply_index);
506 indent = format_get_indent (s);
508 format (s, "ply index %d, %d non-empty leaves", ply_index,
509 p->n_non_empty_leafs);
510 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
512 ip4_fib_mtrie_leaf_t l = p->leaves[i];
514 if (!ip4_fib_mtrie_leaf_is_empty (l))
519 a = base_address + (i << (24 - 8 * dst_address_byte_index));
520 ia.as_u32 = clib_host_to_net_u32 (a);
521 if (ip4_fib_mtrie_leaf_is_terminal (l))
522 ia_length = p->dst_address_bits_of_leaves[i];
524 ia_length = 8 * (1 + dst_address_byte_index);
525 s = format (s, "\n%U%20U %U",
526 format_white_space, indent + 2,
527 format_ip4_address_and_length, &ia, ia_length,
528 format_ip4_fib_mtrie_leaf, l);
530 if (ip4_fib_mtrie_leaf_is_next_ply (l))
531 s = format (s, "\n%U%U",
532 format_white_space, indent + 2,
533 format_ip4_fib_mtrie_ply, m, a,
534 ip4_fib_mtrie_leaf_get_next_ply_index (l),
535 dst_address_byte_index + 1);
543 format_ip4_fib_mtrie (u8 * s, va_list * va)
545 ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
547 s = format (s, "%d plies, memory usage %U",
548 pool_elts (m->ply_pool),
549 format_memory_size, mtrie_memory_usage (m, 0));
551 if (pool_elts (m->ply_pool) > 0)
553 ip4_address_t base_address;
554 base_address.as_u32 = 0;
556 format (s, "\n %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0);
563 * fd.io coding-style-patch-verification: ON
566 * eval: (c-set-style "gnu")