VPP-1448: Fix error when recurse on down the trie.
[vpp.git] / src / vnet / ip / ip4_mtrie.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_fib.h: ip4 mtrie fib
17  *
18  * Copyright (c) 2012 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/ip/ip.h>
41 #include <vnet/ip/ip4_mtrie.h>
42 #include <vnet/fib/ip4_fib.h>
43
44
45 /**
46  * Global pool of IPv4 8bit PLYs
47  */
48 ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
49
50 always_inline u32
51 ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte)
52 {
53   /*
54    * It's 'non-empty' if the length of the leaf stored is greater than the
55    * length of a leaf in the covering ply. i.e. the leaf is more specific
56    * than it's would be cover in the covering ply
57    */
58   if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base)
59     return (1);
60   return (0);
61 }
62
63 always_inline ip4_fib_mtrie_leaf_t
64 ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
65 {
66   ip4_fib_mtrie_leaf_t l;
67   l = 1 + 2 * adj_index;
68   ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
69   return l;
70 }
71
72 always_inline u32
73 ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
74 {
75   return (n & 1) == 0;
76 }
77
78 always_inline u32
79 ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
80 {
81   ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
82   return n >> 1;
83 }
84
85 always_inline ip4_fib_mtrie_leaf_t
86 ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
87 {
88   ip4_fib_mtrie_leaf_t l;
89   l = 0 + 2 * i;
90   ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
91   return l;
92 }
93
94 #ifndef __ALTIVEC__
95 #define PLY_X4_SPLAT_INIT(init_x4, init) \
96   init_x4 = u32x4_splat (init);
97 #else
98 #define PLY_X4_SPLAT_INIT(init_x4, init)                                \
99 {                                                                       \
100   u32x4_union_t y;                                                      \
101   y.as_u32[0] = init;                                                   \
102   y.as_u32[1] = init;                                                   \
103   y.as_u32[2] = init;                                                   \
104   y.as_u32[3] = init;                                                   \
105   init_x4 = y.as_u32x4;                                                 \
106 }
107 #endif
108
109 #ifdef CLIB_HAVE_VEC128
110 #define PLY_INIT_LEAVES(p)                                              \
111 {                                                                       \
112     u32x4 *l, init_x4;                                                  \
113                                                                         \
114     PLY_X4_SPLAT_INIT(init_x4, init);                                   \
115     for (l = p->leaves_as_u32x4;                                        \
116          l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4);       \
117          l += 4)                                                        \
118       {                                                                 \
119         l[0] = init_x4;                                                 \
120         l[1] = init_x4;                                                 \
121         l[2] = init_x4;                                                 \
122         l[3] = init_x4;                                                 \
123       }                                                                 \
124 }
125 #else
126 #define PLY_INIT_LEAVES(p)                                              \
127 {                                                                       \
128   u32 *l;                                                               \
129                                                                         \
130   for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)    \
131     {                                                                   \
132       l[0] = init;                                                      \
133       l[1] = init;                                                      \
134       l[2] = init;                                                      \
135       l[3] = init;                                                      \
136       }                                                                 \
137 }
138 #endif
139
140 #define PLY_INIT(p, init, prefix_len, ply_base_len)                     \
141 {                                                                       \
142   /*                                                                    \
143    * A leaf is 'empty' if it represents a leaf from the covering PLY    \
144    * i.e. if the prefix length of the leaf is less than or equal to     \
145    * the prefix length of the PLY                                       \
146    */                                                                   \
147   p->n_non_empty_leafs = (prefix_len > ply_base_len ?                   \
148                           ARRAY_LEN (p->leaves) : 0);                   \
149   memset (p->dst_address_bits_of_leaves, prefix_len,                    \
150           sizeof (p->dst_address_bits_of_leaves));                      \
151   p->dst_address_bits_base = ply_base_len;                              \
152                                                                         \
153   /* Initialize leaves. */                                              \
154   PLY_INIT_LEAVES(p);                                                   \
155 }
156
157 static void
158 ply_8_init (ip4_fib_mtrie_8_ply_t * p,
159             ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len)
160 {
161   PLY_INIT (p, init, prefix_len, ply_base_len);
162 }
163
164 static void
165 ply_16_init (ip4_fib_mtrie_16_ply_t * p,
166              ip4_fib_mtrie_leaf_t init, uword prefix_len)
167 {
168   memset (p->dst_address_bits_of_leaves, prefix_len,
169           sizeof (p->dst_address_bits_of_leaves));
170   PLY_INIT_LEAVES (p);
171 }
172
173 static ip4_fib_mtrie_leaf_t
174 ply_create (ip4_fib_mtrie_t * m,
175             ip4_fib_mtrie_leaf_t init_leaf,
176             u32 leaf_prefix_len, u32 ply_base_len)
177 {
178   ip4_fib_mtrie_8_ply_t *p;
179   void *old_heap;
180   /* Get cache aligned ply. */
181
182   old_heap = clib_mem_set_heap (ip4_main.mtrie_mheap);
183   pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
184   clib_mem_set_heap (old_heap);
185
186   ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
187   return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
188 }
189
190 always_inline ip4_fib_mtrie_8_ply_t *
191 get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
192 {
193   uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
194
195   return pool_elt_at_index (ip4_ply_pool, n);
196 }
197
198 void
199 ip4_mtrie_free (ip4_fib_mtrie_t * m)
200 {
201   /* the root ply is embedded so the is nothing to do,
202    * the assumption being that the IP4 FIB table has emptied the trie
203    * before deletion.
204    */
205 #if CLIB_DEBUG > 0
206   int i;
207   for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
208     {
209       ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
210     }
211 #endif
212 }
213
214 void
215 ip4_mtrie_init (ip4_fib_mtrie_t * m)
216 {
217   ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0);
218 }
219
220 typedef struct
221 {
222   ip4_address_t dst_address;
223   u32 dst_address_length;
224   u32 adj_index;
225   u32 cover_address_length;
226   u32 cover_adj_index;
227 } ip4_fib_mtrie_set_unset_leaf_args_t;
228
229 static void
230 set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
231                                  ip4_fib_mtrie_8_ply_t * ply,
232                                  ip4_fib_mtrie_leaf_t new_leaf,
233                                  uword new_leaf_dst_address_bits)
234 {
235   ip4_fib_mtrie_leaf_t old_leaf;
236   uword i;
237
238   ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
239
240   for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
241     {
242       old_leaf = ply->leaves[i];
243
244       /* Recurse into sub plies. */
245       if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
246         {
247           ip4_fib_mtrie_8_ply_t *sub_ply =
248             get_next_ply_for_leaf (m, old_leaf);
249           set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
250                                            new_leaf_dst_address_bits);
251         }
252
253       /* Replace less specific terminal leaves with new leaf. */
254       else if (new_leaf_dst_address_bits >=
255                ply->dst_address_bits_of_leaves[i])
256         {
257           __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf);
258           ASSERT (ply->leaves[i] == new_leaf);
259           ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
260           ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i);
261         }
262     }
263 }
264
265 static void
266 set_leaf (ip4_fib_mtrie_t * m,
267           const ip4_fib_mtrie_set_unset_leaf_args_t * a,
268           u32 old_ply_index, u32 dst_address_byte_index)
269 {
270   ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
271   i32 n_dst_bits_next_plies;
272   u8 dst_byte;
273   ip4_fib_mtrie_8_ply_t *old_ply;
274
275   old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
276
277   ASSERT (a->dst_address_length <= 32);
278   ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
279
280   /* how many bits of the destination address are in the next PLY */
281   n_dst_bits_next_plies =
282     a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
283
284   dst_byte = a->dst_address.as_u8[dst_address_byte_index];
285
286   /* Number of bits next plies <= 0 => insert leaves this ply. */
287   if (n_dst_bits_next_plies <= 0)
288     {
289       /* The mask length of the address to insert maps to this ply */
290       uword old_leaf_is_terminal;
291       u32 i, n_dst_bits_this_ply;
292
293       /* The number of bits, and hence slots/buckets, we will fill */
294       n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
295       ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
296                pow2_mask (n_dst_bits_this_ply)) == 0);
297
298       /* Starting at the value of the byte at this section of the v4 address
299        * fill the buckets/slots of the ply */
300       for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
301         {
302           ip4_fib_mtrie_8_ply_t *new_ply;
303
304           old_leaf = old_ply->leaves[i];
305           old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
306
307           if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
308             {
309               /* The new leaf is more or equally specific than the one currently
310                * occupying the slot */
311               new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
312
313               if (old_leaf_is_terminal)
314                 {
315                   /* The current leaf is terminal, we can replace it with
316                    * the new one */
317                   old_ply->n_non_empty_leafs -=
318                     ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
319
320                   old_ply->dst_address_bits_of_leaves[i] =
321                     a->dst_address_length;
322                   __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
323                                                new_leaf);
324                   ASSERT (old_ply->leaves[i] == new_leaf);
325
326                   old_ply->n_non_empty_leafs +=
327                     ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
328                   ASSERT (old_ply->n_non_empty_leafs <=
329                           ARRAY_LEN (old_ply->leaves));
330                 }
331               else
332                 {
333                   /* Existing leaf points to another ply.  We need to place
334                    * new_leaf into all more specific slots. */
335                   new_ply = get_next_ply_for_leaf (m, old_leaf);
336                   set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
337                                                    a->dst_address_length);
338                 }
339             }
340           else if (!old_leaf_is_terminal)
341             {
342               /* The current leaf is less specific and not termial (i.e. a ply),
343                * recurse on down the trie */
344               new_ply = get_next_ply_for_leaf (m, old_leaf);
345               set_leaf (m, a, new_ply - ip4_ply_pool,
346                         dst_address_byte_index + 1);
347             }
348           /*
349            * else
350            *  the route we are adding is less specific than the leaf currently
351            *  occupying this slot. leave it there
352            */
353         }
354     }
355   else
356     {
357       /* The address to insert requires us to move down at a lower level of
358        * the trie - recurse on down */
359       ip4_fib_mtrie_8_ply_t *new_ply;
360       u8 ply_base_len;
361
362       ply_base_len = 8 * (dst_address_byte_index + 1);
363
364       old_leaf = old_ply->leaves[dst_byte];
365
366       if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
367         {
368           /* There is a leaf occupying the slot. Replace it with a new ply */
369           old_ply->n_non_empty_leafs -=
370             ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
371
372           new_leaf =
373             ply_create (m, old_leaf,
374                         old_ply->dst_address_bits_of_leaves[dst_byte],
375                         ply_base_len);
376           new_ply = get_next_ply_for_leaf (m, new_leaf);
377
378           /* Refetch since ply_create may move pool. */
379           old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
380
381           __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
382                                        new_leaf);
383           ASSERT (old_ply->leaves[dst_byte] == new_leaf);
384           old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
385
386           old_ply->n_non_empty_leafs +=
387             ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
388           ASSERT (old_ply->n_non_empty_leafs >= 0);
389         }
390       else
391         new_ply = get_next_ply_for_leaf (m, old_leaf);
392
393       set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
394     }
395 }
396
397 static void
398 set_root_leaf (ip4_fib_mtrie_t * m,
399                const ip4_fib_mtrie_set_unset_leaf_args_t * a)
400 {
401   ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
402   ip4_fib_mtrie_16_ply_t *old_ply;
403   i32 n_dst_bits_next_plies;
404   u16 dst_byte;
405
406   old_ply = &m->root_ply;
407
408   ASSERT (a->dst_address_length <= 32);
409
410   /* how many bits of the destination address are in the next PLY */
411   n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
412
413   dst_byte = a->dst_address.as_u16[0];
414
415   /* Number of bits next plies <= 0 => insert leaves this ply. */
416   if (n_dst_bits_next_plies <= 0)
417     {
418       /* The mask length of the address to insert maps to this ply */
419       uword old_leaf_is_terminal;
420       u32 i, n_dst_bits_this_ply;
421
422       /* The number of bits, and hence slots/buckets, we will fill */
423       n_dst_bits_this_ply = 16 - a->dst_address_length;
424       ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) &
425                pow2_mask (n_dst_bits_this_ply)) == 0);
426
427       /* Starting at the value of the byte at this section of the v4 address
428        * fill the buckets/slots of the ply */
429       for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
430         {
431           ip4_fib_mtrie_8_ply_t *new_ply;
432           u16 slot;
433
434           slot = clib_net_to_host_u16 (dst_byte);
435           slot += i;
436           slot = clib_host_to_net_u16 (slot);
437
438           old_leaf = old_ply->leaves[slot];
439           old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
440
441           if (a->dst_address_length >=
442               old_ply->dst_address_bits_of_leaves[slot])
443             {
444               /* The new leaf is more or equally specific than the one currently
445                * occupying the slot */
446               new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
447
448               if (old_leaf_is_terminal)
449                 {
450                   /* The current leaf is terminal, we can replace it with
451                    * the new one */
452                   old_ply->dst_address_bits_of_leaves[slot] =
453                     a->dst_address_length;
454                   __sync_val_compare_and_swap (&old_ply->leaves[slot],
455                                                old_leaf, new_leaf);
456                   ASSERT (old_ply->leaves[slot] == new_leaf);
457                 }
458               else
459                 {
460                   /* Existing leaf points to another ply.  We need to place
461                    * new_leaf into all more specific slots. */
462                   new_ply = get_next_ply_for_leaf (m, old_leaf);
463                   set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
464                                                    a->dst_address_length);
465                 }
466             }
467           else if (!old_leaf_is_terminal)
468             {
469               /* The current leaf is less specific and not termial (i.e. a ply),
470                * recurse on down the trie */
471               new_ply = get_next_ply_for_leaf (m, old_leaf);
472               set_leaf (m, a, new_ply - ip4_ply_pool, 2);
473             }
474           /*
475            * else
476            *  the route we are adding is less specific than the leaf currently
477            *  occupying this slot. leave it there
478            */
479         }
480     }
481   else
482     {
483       /* The address to insert requires us to move down at a lower level of
484        * the trie - recurse on down */
485       ip4_fib_mtrie_8_ply_t *new_ply;
486       u8 ply_base_len;
487
488       ply_base_len = 16;
489
490       old_leaf = old_ply->leaves[dst_byte];
491
492       if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
493         {
494           /* There is a leaf occupying the slot. Replace it with a new ply */
495           new_leaf =
496             ply_create (m, old_leaf,
497                         old_ply->dst_address_bits_of_leaves[dst_byte],
498                         ply_base_len);
499           new_ply = get_next_ply_for_leaf (m, new_leaf);
500
501           __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
502                                        new_leaf);
503           ASSERT (old_ply->leaves[dst_byte] == new_leaf);
504           old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
505         }
506       else
507         new_ply = get_next_ply_for_leaf (m, old_leaf);
508
509       set_leaf (m, a, new_ply - ip4_ply_pool, 2);
510     }
511 }
512
513 static uword
514 unset_leaf (ip4_fib_mtrie_t * m,
515             const ip4_fib_mtrie_set_unset_leaf_args_t * a,
516             ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index)
517 {
518   ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
519   i32 n_dst_bits_next_plies;
520   i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
521   u8 dst_byte;
522
523   ASSERT (a->dst_address_length <= 32);
524   ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
525
526   n_dst_bits_next_plies =
527     a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
528
529   dst_byte = a->dst_address.as_u8[dst_address_byte_index];
530   if (n_dst_bits_next_plies < 0)
531     dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
532
533   n_dst_bits_this_ply =
534     n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
535   n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
536
537   del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
538
539   for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
540     {
541       old_leaf = old_ply->leaves[i];
542       old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
543
544       if (old_leaf == del_leaf
545           || (!old_leaf_is_terminal
546               && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
547                              dst_address_byte_index + 1)))
548         {
549           old_ply->n_non_empty_leafs -=
550             ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
551
552           old_ply->leaves[i] =
553             ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
554           old_ply->dst_address_bits_of_leaves[i] =
555             clib_max (old_ply->dst_address_bits_base,
556                       a->cover_address_length);
557
558           old_ply->n_non_empty_leafs +=
559             ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
560
561           ASSERT (old_ply->n_non_empty_leafs >= 0);
562           if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
563             {
564               pool_put (ip4_ply_pool, old_ply);
565               /* Old ply was deleted. */
566               return 1;
567             }
568 #if CLIB_DEBUG > 0
569           else if (dst_address_byte_index)
570             {
571               int ii, count = 0;
572               for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++)
573                 {
574                   count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii);
575                 }
576               ASSERT (count);
577             }
578 #endif
579         }
580     }
581
582   /* Old ply was not deleted. */
583   return 0;
584 }
585
586 static void
587 unset_root_leaf (ip4_fib_mtrie_t * m,
588                  const ip4_fib_mtrie_set_unset_leaf_args_t * a)
589 {
590   ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
591   i32 n_dst_bits_next_plies;
592   i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
593   u16 dst_byte;
594   ip4_fib_mtrie_16_ply_t *old_ply;
595
596   ASSERT (a->dst_address_length <= 32);
597
598   old_ply = &m->root_ply;
599   n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
600
601   dst_byte = a->dst_address.as_u16[0];
602
603   n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ?
604                          (16 - a->dst_address_length) : 0);
605
606   del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
607
608   /* Starting at the value of the byte at this section of the v4 address
609    * fill the buckets/slots of the ply */
610   for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
611     {
612       u16 slot;
613
614       slot = clib_net_to_host_u16 (dst_byte);
615       slot += i;
616       slot = clib_host_to_net_u16 (slot);
617
618       old_leaf = old_ply->leaves[slot];
619       old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
620
621       if (old_leaf == del_leaf
622           || (!old_leaf_is_terminal
623               && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2)))
624         {
625           old_ply->leaves[slot] =
626             ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
627           old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length;
628         }
629     }
630 }
631
632 void
633 ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
634                          const ip4_address_t * dst_address,
635                          u32 dst_address_length, u32 adj_index)
636 {
637   ip4_fib_mtrie_set_unset_leaf_args_t a;
638   ip4_main_t *im = &ip4_main;
639
640   /* Honor dst_address_length. Fib masks are in network byte order */
641   a.dst_address.as_u32 = (dst_address->as_u32 &
642                           im->fib_masks[dst_address_length]);
643   a.dst_address_length = dst_address_length;
644   a.adj_index = adj_index;
645
646   set_root_leaf (m, &a);
647 }
648
649 void
650 ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
651                          const ip4_address_t * dst_address,
652                          u32 dst_address_length,
653                          u32 adj_index,
654                          u32 cover_address_length, u32 cover_adj_index)
655 {
656   ip4_fib_mtrie_set_unset_leaf_args_t a;
657   ip4_main_t *im = &ip4_main;
658
659   /* Honor dst_address_length. Fib masks are in network byte order */
660   a.dst_address.as_u32 = (dst_address->as_u32 &
661                           im->fib_masks[dst_address_length]);
662   a.dst_address_length = dst_address_length;
663   a.adj_index = adj_index;
664   a.cover_adj_index = cover_adj_index;
665   a.cover_address_length = cover_address_length;
666
667   /* the top level ply is never removed */
668   unset_root_leaf (m, &a);
669 }
670
671 /* Returns number of bytes of memory used by mtrie. */
672 static uword
673 mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p)
674 {
675   uword bytes, i;
676
677   bytes = sizeof (p[0]);
678   for (i = 0; i < ARRAY_LEN (p->leaves); i++)
679     {
680       ip4_fib_mtrie_leaf_t l = p->leaves[i];
681       if (ip4_fib_mtrie_leaf_is_next_ply (l))
682         bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
683     }
684
685   return bytes;
686 }
687
688 /* Returns number of bytes of memory used by mtrie. */
689 uword
690 ip4_fib_mtrie_memory_usage (ip4_fib_mtrie_t * m)
691 {
692   uword bytes, i;
693
694   bytes = sizeof (*m);
695   for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
696     {
697       ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i];
698       if (ip4_fib_mtrie_leaf_is_next_ply (l))
699         bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
700     }
701
702   return bytes;
703 }
704
705 static u8 *
706 format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
707 {
708   ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
709
710   if (ip4_fib_mtrie_leaf_is_terminal (l))
711     s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l));
712   else
713     s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
714   return s;
715 }
716
717 #define FORMAT_PLY(s, _p, _a, _i, _base_address, _ply_max_len, _indent) \
718 ({                                                                      \
719   u32 a, ia_length;                                                     \
720   ip4_address_t ia;                                                     \
721   ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)];                            \
722                                                                         \
723   a = (_base_address) + ((_a) << (32 - (_ply_max_len)));                \
724   ia.as_u32 = clib_host_to_net_u32 (a);                                 \
725   ia_length = (_p)->dst_address_bits_of_leaves[(_i)];                   \
726   s = format (s, "\n%U%U %U",                                           \
727               format_white_space, (_indent) + 4,                        \
728               format_ip4_address_and_length, &ia, ia_length,            \
729               format_ip4_fib_mtrie_leaf, _l);                           \
730                                                                         \
731   if (ip4_fib_mtrie_leaf_is_next_ply (_l))                              \
732     s = format (s, "\n%U",                                              \
733                 format_ip4_fib_mtrie_ply, m, a, (_indent) + 8,          \
734                 ip4_fib_mtrie_leaf_get_next_ply_index (_l));            \
735   s;                                                                    \
736 })
737
738 static u8 *
739 format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
740 {
741   ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
742   u32 base_address = va_arg (*va, u32);
743   u32 indent = va_arg (*va, u32);
744   u32 ply_index = va_arg (*va, u32);
745   ip4_fib_mtrie_8_ply_t *p;
746   int i;
747
748   p = pool_elt_at_index (ip4_ply_pool, ply_index);
749   s = format (s, "%Uply index %d, %d non-empty leaves",
750               format_white_space, indent, ply_index, p->n_non_empty_leafs);
751
752   for (i = 0; i < ARRAY_LEN (p->leaves); i++)
753     {
754       if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
755         {
756           s = FORMAT_PLY (s, p, i, i, base_address,
757                           p->dst_address_bits_base + 8, indent);
758         }
759     }
760
761   return s;
762 }
763
764 u8 *
765 format_ip4_fib_mtrie (u8 * s, va_list * va)
766 {
767   ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
768   int verbose = va_arg (*va, int);
769   ip4_fib_mtrie_16_ply_t *p;
770   u32 base_address = 0;
771   int i;
772
773   s = format (s, "%d plies, memory usage %U\n",
774               pool_elts (ip4_ply_pool),
775               format_memory_size, ip4_fib_mtrie_memory_usage (m));
776   s = format (s, "root-ply");
777   p = &m->root_ply;
778
779   if (verbose)
780     {
781       s = format (s, "root-ply");
782       p = &m->root_ply;
783
784       for (i = 0; i < ARRAY_LEN (p->leaves); i++)
785         {
786           u16 slot;
787
788           slot = clib_host_to_net_u16 (i);
789
790           if (p->dst_address_bits_of_leaves[slot] > 0)
791             {
792               s = FORMAT_PLY (s, p, i, slot, base_address, 16, 0);
793             }
794         }
795     }
796
797   return s;
798 }
799
800 /** Default heap size for the IPv4 mtries */
801 #define IP4_FIB_DEFAULT_MTRIE_HEAP_SIZE (32<<20)
802
803 static clib_error_t *
804 ip4_mtrie_module_init (vlib_main_t * vm)
805 {
806   CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p);
807   ip4_main_t *im = &ip4_main;
808   clib_error_t *error = NULL;
809   uword *old_heap;
810
811   if (0 == im->mtrie_heap_size)
812     im->mtrie_heap_size = IP4_FIB_DEFAULT_MTRIE_HEAP_SIZE;
813 #if USE_DLMALLOC == 0
814   im->mtrie_mheap = mheap_alloc (0, im->mtrie_heap_size);
815 #else
816   im->mtrie_mheap = create_mspace (im->mtrie_heap_size, 1 /* locked */ );
817 #endif
818
819   /* Burn one ply so index 0 is taken */
820   old_heap = clib_mem_set_heap (ip4_main.mtrie_mheap);
821   pool_get (ip4_ply_pool, p);
822   clib_mem_set_heap (old_heap);
823
824   return (error);
825 }
826
827 VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
828
829 /*
830  * fd.io coding-style-patch-verification: ON
831  *
832  * Local Variables:
833  * eval: (c-set-style "gnu")
834  * End:
835  */