MTRIE Optimisations 2
[vpp.git] / src / vnet / ip / ip4_mtrie.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_fib.h: ip4 mtrie fib
17  *
18  * Copyright (c) 2012 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/ip/ip.h>
41 #include <vnet/ip/ip4_mtrie.h>
42 #include <vnet/fib/ip4_fib.h>
43
44
45 /**
46  * Global pool of IPv4 8bit PLYs
47  */
48 ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
49
50 always_inline u32
51 ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte)
52 {
53   /*
54    * It's 'non-empty' if the length of the leaf stored is greater than the
55    * length of a leaf in the covering ply. i.e. the leaf is more specific
56    * than it's would be cover in the covering ply
57    */
58   if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base)
59     return (1);
60   return (0);
61 }
62
63 always_inline ip4_fib_mtrie_leaf_t
64 ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
65 {
66   ip4_fib_mtrie_leaf_t l;
67   l = 1 + 2 * adj_index;
68   ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
69   return l;
70 }
71
72 always_inline u32
73 ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
74 {
75   return (n & 1) == 0;
76 }
77
78 always_inline u32
79 ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
80 {
81   ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
82   return n >> 1;
83 }
84
85 always_inline ip4_fib_mtrie_leaf_t
86 ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
87 {
88   ip4_fib_mtrie_leaf_t l;
89   l = 0 + 2 * i;
90   ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
91   return l;
92 }
93
94 #ifndef __ALTIVEC__
95 #define PLY_X4_SPLAT_INIT(init_x4, init) \
96   init_x4 = u32x4_splat (init);
97 #else
98 #define PLY_X4_SPLAT_INIT(init_x4, init)                                \
99 {                                                                       \
100   u32x4_union_t y;                                                      \
101   y.as_u32[0] = init;                                                   \
102   y.as_u32[1] = init;                                                   \
103   y.as_u32[2] = init;                                                   \
104   y.as_u32[3] = init;                                                   \
105   init_x4 = y.as_u32x4;                                                 \
106 }
107 #endif
108
109 #ifdef CLIB_HAVE_VEC128
110 #define PLY_INIT_LEAVES(p)                                              \
111 {                                                                       \
112     u32x4 *l, init_x4;                                                  \
113                                                                         \
114     PLY_X4_SPLAT_INIT(init_x4, init);                                   \
115     for (l = p->leaves_as_u32x4;                                        \
116          l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4);       \
117          l += 4)                                                        \
118       {                                                                 \
119         l[0] = init_x4;                                                 \
120         l[1] = init_x4;                                                 \
121         l[2] = init_x4;                                                 \
122         l[3] = init_x4;                                                 \
123       }                                                                 \
124 }
125 #else
126 #define PLY_INIT_LEAVES(p)                                              \
127 {                                                                       \
128   u32 *l;                                                               \
129                                                                         \
130   for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)    \
131     {                                                                   \
132       l[0] = init;                                                      \
133       l[1] = init;                                                      \
134       l[2] = init;                                                      \
135       l[3] = init;                                                      \
136       }                                                                 \
137 }
138 #endif
139
140 #define PLY_INIT(p, init, prefix_len, ply_base_len)                     \
141 {                                                                       \
142   /*                                                                    \
143    * A leaf is 'empty' if it represents a leaf from the covering PLY    \
144    * i.e. if the prefix length of the leaf is less than or equal to     \
145    * the prefix length of the PLY                                       \
146    */                                                                   \
147   p->n_non_empty_leafs = (prefix_len > ply_base_len ?                   \
148                           ARRAY_LEN (p->leaves) : 0);                   \
149   memset (p->dst_address_bits_of_leaves, prefix_len,                    \
150           sizeof (p->dst_address_bits_of_leaves));                      \
151   p->dst_address_bits_base = ply_base_len;                              \
152                                                                         \
153   /* Initialize leaves. */                                              \
154   PLY_INIT_LEAVES(p);                                                   \
155 }
156
157 static void
158 ply_8_init (ip4_fib_mtrie_8_ply_t * p,
159             ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len)
160 {
161   PLY_INIT (p, init, prefix_len, ply_base_len);
162 }
163
164 static void
165 ply_16_init (ip4_fib_mtrie_16_ply_t * p,
166              ip4_fib_mtrie_leaf_t init, uword prefix_len)
167 {
168   memset (p->dst_address_bits_of_leaves, prefix_len,
169           sizeof (p->dst_address_bits_of_leaves));
170   PLY_INIT_LEAVES (p);
171 }
172
173 static ip4_fib_mtrie_leaf_t
174 ply_create (ip4_fib_mtrie_t * m,
175             ip4_fib_mtrie_leaf_t init_leaf,
176             u32 leaf_prefix_len, u32 ply_base_len)
177 {
178   ip4_fib_mtrie_8_ply_t *p;
179
180   /* Get cache aligned ply. */
181   pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
182
183   ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
184   return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
185 }
186
187 always_inline ip4_fib_mtrie_8_ply_t *
188 get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
189 {
190   uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
191
192   return pool_elt_at_index (ip4_ply_pool, n);
193 }
194
195 void
196 ip4_mtrie_free (ip4_fib_mtrie_t * m)
197 {
198   /* the root ply is embedded so the is nothing to do,
199    * the assumption being that the IP4 FIB table has emptied the trie
200    * before deletion.
201    */
202 #if CLIB_DEBUG > 0
203   int i;
204   for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
205     {
206       ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
207     }
208 #endif
209 }
210
211 void
212 ip4_mtrie_init (ip4_fib_mtrie_t * m)
213 {
214   ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0);
215 }
216
217 typedef struct
218 {
219   ip4_address_t dst_address;
220   u32 dst_address_length;
221   u32 adj_index;
222   u32 cover_address_length;
223   u32 cover_adj_index;
224 } ip4_fib_mtrie_set_unset_leaf_args_t;
225
226 static void
227 set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
228                                  ip4_fib_mtrie_8_ply_t * ply,
229                                  ip4_fib_mtrie_leaf_t new_leaf,
230                                  uword new_leaf_dst_address_bits)
231 {
232   ip4_fib_mtrie_leaf_t old_leaf;
233   uword i;
234
235   ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
236
237   for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
238     {
239       old_leaf = ply->leaves[i];
240
241       /* Recurse into sub plies. */
242       if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
243         {
244           ip4_fib_mtrie_8_ply_t *sub_ply =
245             get_next_ply_for_leaf (m, old_leaf);
246           set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
247                                            new_leaf_dst_address_bits);
248         }
249
250       /* Replace less specific terminal leaves with new leaf. */
251       else if (new_leaf_dst_address_bits >=
252                ply->dst_address_bits_of_leaves[i])
253         {
254           __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf);
255           ASSERT (ply->leaves[i] == new_leaf);
256           ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
257           ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i);
258         }
259     }
260 }
261
262 static void
263 set_leaf (ip4_fib_mtrie_t * m,
264           const ip4_fib_mtrie_set_unset_leaf_args_t * a,
265           u32 old_ply_index, u32 dst_address_byte_index)
266 {
267   ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
268   i32 n_dst_bits_next_plies;
269   u8 dst_byte;
270   ip4_fib_mtrie_8_ply_t *old_ply;
271
272   old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
273
274   ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
275   ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
276
277   /* how many bits of the destination address are in the next PLY */
278   n_dst_bits_next_plies =
279     a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
280
281   dst_byte = a->dst_address.as_u8[dst_address_byte_index];
282
283   /* Number of bits next plies <= 0 => insert leaves this ply. */
284   if (n_dst_bits_next_plies <= 0)
285     {
286       /* The mask length of the address to insert maps to this ply */
287       uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
288
289       /* The number of bits, and hence slots/buckets, we will fill */
290       n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
291       ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
292                pow2_mask (n_dst_bits_this_ply)) == 0);
293
294       /* Starting at the value of the byte at this section of the v4 address
295        * fill the buckets/slots of the ply */
296       for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
297         {
298           ip4_fib_mtrie_8_ply_t *new_ply;
299
300           old_leaf = old_ply->leaves[i];
301           old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
302
303           if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
304             {
305               /* The new leaf is more or equally specific than the one currently
306                * occupying the slot */
307               new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
308
309               if (old_leaf_is_terminal)
310                 {
311                   /* The current leaf is terminal, we can replace it with
312                    * the new one */
313                   old_ply->n_non_empty_leafs -=
314                     ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
315
316                   old_ply->dst_address_bits_of_leaves[i] =
317                     a->dst_address_length;
318                   __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
319                                                new_leaf);
320                   ASSERT (old_ply->leaves[i] == new_leaf);
321
322                   old_ply->n_non_empty_leafs +=
323                     ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
324                   ASSERT (old_ply->n_non_empty_leafs <=
325                           ARRAY_LEN (old_ply->leaves));
326                 }
327               else
328                 {
329                   /* Existing leaf points to another ply.  We need to place
330                    * new_leaf into all more specific slots. */
331                   new_ply = get_next_ply_for_leaf (m, old_leaf);
332                   set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
333                                                    a->dst_address_length);
334                 }
335             }
336           else if (!old_leaf_is_terminal)
337             {
338               /* The current leaf is less specific and not termial (i.e. a ply),
339                * recurse on down the trie */
340               new_ply = get_next_ply_for_leaf (m, old_leaf);
341               set_leaf (m, a, new_ply - ip4_ply_pool,
342                         dst_address_byte_index + 1);
343             }
344           /*
345            * else
346            *  the route we are adding is less specific than the leaf currently
347            *  occupying this slot. leave it there
348            */
349         }
350     }
351   else
352     {
353       /* The address to insert requires us to move down at a lower level of
354        * the trie - recurse on down */
355       ip4_fib_mtrie_8_ply_t *new_ply;
356       u8 ply_base_len;
357
358       ply_base_len = 8 * (dst_address_byte_index + 1);
359
360       old_leaf = old_ply->leaves[dst_byte];
361
362       if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
363         {
364           /* There is a leaf occupying the slot. Replace it with a new ply */
365           old_ply->n_non_empty_leafs -=
366             ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
367
368           new_leaf = ply_create (m, old_leaf,
369                                  clib_max (old_ply->dst_address_bits_of_leaves
370                                            [dst_byte], ply_base_len),
371                                  ply_base_len);
372           new_ply = get_next_ply_for_leaf (m, new_leaf);
373
374           /* Refetch since ply_create may move pool. */
375           old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
376
377           __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
378                                        new_leaf);
379           ASSERT (old_ply->leaves[dst_byte] == new_leaf);
380           old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
381
382           old_ply->n_non_empty_leafs +=
383             ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
384           ASSERT (old_ply->n_non_empty_leafs >= 0);
385         }
386       else
387         new_ply = get_next_ply_for_leaf (m, old_leaf);
388
389       set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
390     }
391 }
392
393 static void
394 set_root_leaf (ip4_fib_mtrie_t * m,
395                const ip4_fib_mtrie_set_unset_leaf_args_t * a)
396 {
397   ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
398   ip4_fib_mtrie_16_ply_t *old_ply;
399   i32 n_dst_bits_next_plies;
400   u16 dst_byte;
401
402   old_ply = &m->root_ply;
403
404   ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
405
406   /* how many bits of the destination address are in the next PLY */
407   n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
408
409   dst_byte = a->dst_address.as_u16[0];
410
411   /* Number of bits next plies <= 0 => insert leaves this ply. */
412   if (n_dst_bits_next_plies <= 0)
413     {
414       /* The mask length of the address to insert maps to this ply */
415       uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
416
417       /* The number of bits, and hence slots/buckets, we will fill */
418       n_dst_bits_this_ply = 16 - a->dst_address_length;
419       ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) &
420                pow2_mask (n_dst_bits_this_ply)) == 0);
421
422       /* Starting at the value of the byte at this section of the v4 address
423        * fill the buckets/slots of the ply */
424       for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
425         {
426           ip4_fib_mtrie_8_ply_t *new_ply;
427           u16 slot;
428
429           slot = clib_net_to_host_u16 (dst_byte);
430           slot += i;
431           slot = clib_host_to_net_u16 (slot);
432
433           old_leaf = old_ply->leaves[slot];
434           old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
435
436           if (a->dst_address_length >=
437               old_ply->dst_address_bits_of_leaves[slot])
438             {
439               /* The new leaf is more or equally specific than the one currently
440                * occupying the slot */
441               new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
442
443               if (old_leaf_is_terminal)
444                 {
445                   /* The current leaf is terminal, we can replace it with
446                    * the new one */
447                   old_ply->dst_address_bits_of_leaves[slot] =
448                     a->dst_address_length;
449                   __sync_val_compare_and_swap (&old_ply->leaves[slot],
450                                                old_leaf, new_leaf);
451                   ASSERT (old_ply->leaves[slot] == new_leaf);
452                 }
453               else
454                 {
455                   /* Existing leaf points to another ply.  We need to place
456                    * new_leaf into all more specific slots. */
457                   new_ply = get_next_ply_for_leaf (m, old_leaf);
458                   set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
459                                                    a->dst_address_length);
460                 }
461             }
462           else if (!old_leaf_is_terminal)
463             {
464               /* The current leaf is less specific and not termial (i.e. a ply),
465                * recurse on down the trie */
466               new_ply = get_next_ply_for_leaf (m, old_leaf);
467               set_leaf (m, a, new_ply - ip4_ply_pool, 2);
468             }
469           /*
470            * else
471            *  the route we are adding is less specific than the leaf currently
472            *  occupying this slot. leave it there
473            */
474         }
475     }
476   else
477     {
478       /* The address to insert requires us to move down at a lower level of
479        * the trie - recurse on down */
480       ip4_fib_mtrie_8_ply_t *new_ply;
481       u8 ply_base_len;
482
483       ply_base_len = 16;
484
485       old_leaf = old_ply->leaves[dst_byte];
486
487       if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
488         {
489           /* There is a leaf occupying the slot. Replace it with a new ply */
490           new_leaf = ply_create (m, old_leaf,
491                                  clib_max (old_ply->dst_address_bits_of_leaves
492                                            [dst_byte], ply_base_len),
493                                  ply_base_len);
494           new_ply = get_next_ply_for_leaf (m, new_leaf);
495
496           __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
497                                        new_leaf);
498           ASSERT (old_ply->leaves[dst_byte] == new_leaf);
499           old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
500         }
501       else
502         new_ply = get_next_ply_for_leaf (m, old_leaf);
503
504       set_leaf (m, a, new_ply - ip4_ply_pool, 2);
505     }
506 }
507
508 static uword
509 unset_leaf (ip4_fib_mtrie_t * m,
510             const ip4_fib_mtrie_set_unset_leaf_args_t * a,
511             ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index)
512 {
513   ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
514   i32 n_dst_bits_next_plies;
515   i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
516   u8 dst_byte;
517
518   ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
519   ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
520
521   n_dst_bits_next_plies =
522     a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
523
524   dst_byte = a->dst_address.as_u8[dst_address_byte_index];
525   if (n_dst_bits_next_plies < 0)
526     dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
527
528   n_dst_bits_this_ply =
529     n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
530   n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
531
532   del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
533
534   for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
535     {
536       old_leaf = old_ply->leaves[i];
537       old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
538
539       if (old_leaf == del_leaf
540           || (!old_leaf_is_terminal
541               && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
542                              dst_address_byte_index + 1)))
543         {
544           old_ply->n_non_empty_leafs -=
545             ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
546
547           old_ply->leaves[i] =
548             ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
549           old_ply->dst_address_bits_of_leaves[i] =
550             clib_max (old_ply->dst_address_bits_base,
551                       a->cover_address_length);
552
553           old_ply->n_non_empty_leafs +=
554             ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
555
556           ASSERT (old_ply->n_non_empty_leafs >= 0);
557           if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
558             {
559               pool_put (ip4_ply_pool, old_ply);
560               /* Old ply was deleted. */
561               return 1;
562             }
563 #if CLIB_DEBUG > 0
564           else if (dst_address_byte_index)
565             {
566               int ii, count = 0;
567               for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++)
568                 {
569                   count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii);
570                 }
571               ASSERT (count);
572             }
573 #endif
574         }
575     }
576
577   /* Old ply was not deleted. */
578   return 0;
579 }
580
581 static void
582 unset_root_leaf (ip4_fib_mtrie_t * m,
583                  const ip4_fib_mtrie_set_unset_leaf_args_t * a)
584 {
585   ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
586   i32 n_dst_bits_next_plies;
587   i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
588   u16 dst_byte;
589   ip4_fib_mtrie_16_ply_t *old_ply;
590
591   ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
592
593   old_ply = &m->root_ply;
594   n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
595
596   dst_byte = a->dst_address.as_u16[0];
597
598   n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ?
599                          (16 - a->dst_address_length) : 0);
600
601   del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
602
603   /* Starting at the value of the byte at this section of the v4 address
604    * fill the buckets/slots of the ply */
605   for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
606     {
607       u16 slot;
608
609       slot = clib_net_to_host_u16 (dst_byte);
610       slot += i;
611       slot = clib_host_to_net_u16 (slot);
612
613       old_leaf = old_ply->leaves[slot];
614       old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
615
616       if (old_leaf == del_leaf
617           || (!old_leaf_is_terminal
618               && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2)))
619         {
620           old_ply->leaves[slot] =
621             ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
622           old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length;
623         }
624     }
625 }
626
627 void
628 ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
629                          const ip4_address_t * dst_address,
630                          u32 dst_address_length, u32 adj_index)
631 {
632   ip4_fib_mtrie_set_unset_leaf_args_t a;
633   ip4_main_t *im = &ip4_main;
634
635   /* Honor dst_address_length. Fib masks are in network byte order */
636   a.dst_address.as_u32 = (dst_address->as_u32 &
637                           im->fib_masks[dst_address_length]);
638   a.dst_address_length = dst_address_length;
639   a.adj_index = adj_index;
640
641   set_root_leaf (m, &a);
642 }
643
644 void
645 ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
646                          const ip4_address_t * dst_address,
647                          u32 dst_address_length,
648                          u32 adj_index,
649                          u32 cover_address_length, u32 cover_adj_index)
650 {
651   ip4_fib_mtrie_set_unset_leaf_args_t a;
652   ip4_main_t *im = &ip4_main;
653
654   /* Honor dst_address_length. Fib masks are in network byte order */
655   a.dst_address.as_u32 = (dst_address->as_u32 &
656                           im->fib_masks[dst_address_length]);
657   a.dst_address_length = dst_address_length;
658   a.adj_index = adj_index;
659   a.cover_adj_index = cover_adj_index;
660   a.cover_address_length = cover_address_length;
661
662   /* the top level ply is never removed */
663   unset_root_leaf (m, &a);
664 }
665
666 /* Returns number of bytes of memory used by mtrie. */
667 static uword
668 mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p)
669 {
670   uword bytes, i;
671
672   bytes = sizeof (p[0]);
673   for (i = 0; i < ARRAY_LEN (p->leaves); i++)
674     {
675       ip4_fib_mtrie_leaf_t l = p->leaves[i];
676       if (ip4_fib_mtrie_leaf_is_next_ply (l))
677         bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
678     }
679
680   return bytes;
681 }
682
683 /* Returns number of bytes of memory used by mtrie. */
684 static uword
685 mtrie_memory_usage (ip4_fib_mtrie_t * m)
686 {
687   uword bytes, i;
688
689   bytes = sizeof (*m);
690   for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
691     {
692       ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i];
693       if (ip4_fib_mtrie_leaf_is_next_ply (l))
694         bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
695     }
696
697   return bytes;
698 }
699
700 static u8 *
701 format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
702 {
703   ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
704
705   if (ip4_fib_mtrie_leaf_is_terminal (l))
706     s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l));
707   else
708     s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
709   return s;
710 }
711
712 #define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent)     \
713 ({                                                                      \
714   u32 a, ia_length;                                                     \
715   ip4_address_t ia;                                                     \
716   ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)];                            \
717                                                                         \
718   a = (_base_address) + ((_i) << (32 - (_ply_max_len)));                \
719   ia.as_u32 = clib_host_to_net_u32 (a);                                 \
720   ia_length = (_p)->dst_address_bits_of_leaves[(_i)];                   \
721   s = format (s, "\n%U%20U %U",                                         \
722               format_white_space, (_indent) + 2,                        \
723               format_ip4_address_and_length, &ia, ia_length,            \
724               format_ip4_fib_mtrie_leaf, _l);                           \
725                                                                         \
726   if (ip4_fib_mtrie_leaf_is_next_ply (_l))                              \
727     s = format (s, "\n%U%U",                                            \
728                 format_white_space, (_indent) + 2,                      \
729                 format_ip4_fib_mtrie_ply, m, a,                         \
730                 ip4_fib_mtrie_leaf_get_next_ply_index (_l));            \
731   s;                                                                    \
732 })
733
734 static u8 *
735 format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
736 {
737   ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
738   u32 base_address = va_arg (*va, u32);
739   u32 ply_index = va_arg (*va, u32);
740   ip4_fib_mtrie_8_ply_t *p;
741   uword indent;
742   int i;
743
744   p = pool_elt_at_index (ip4_ply_pool, ply_index);
745   indent = format_get_indent (s);
746   s = format (s, "ply index %d, %d non-empty leaves", ply_index,
747               p->n_non_empty_leafs);
748
749   for (i = 0; i < ARRAY_LEN (p->leaves); i++)
750     {
751       if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
752         {
753           FORMAT_PLY (s, p, i, base_address,
754                       p->dst_address_bits_base + 8, indent);
755         }
756     }
757
758   return s;
759 }
760
761 u8 *
762 format_ip4_fib_mtrie (u8 * s, va_list * va)
763 {
764   ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
765   ip4_fib_mtrie_16_ply_t *p;
766   u32 base_address = 0;
767   int i;
768
769   s = format (s, "%d plies, memory usage %U\n",
770               pool_elts (ip4_ply_pool),
771               format_memory_size, mtrie_memory_usage (m));
772   s = format (s, "root-ply");
773   p = &m->root_ply;
774
775   for (i = 0; i < ARRAY_LEN (p->leaves); i++)
776     {
777       u16 slot;
778
779       slot = clib_host_to_net_u16 (i);
780
781       if (p->dst_address_bits_of_leaves[slot] > 0)
782         {
783           FORMAT_PLY (s, p, slot, base_address, 16, 2);
784         }
785     }
786
787   return s;
788 }
789
790 static clib_error_t *
791 ip4_mtrie_module_init (vlib_main_t * vm)
792 {
793   /* Burn one ply so index 0 is taken */
794   CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p);
795
796   pool_get (ip4_ply_pool, p);
797
798   return (NULL);
799 }
800
801 VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
802
803 /*
804  * fd.io coding-style-patch-verification: ON
805  *
806  * Local Variables:
807  * eval: (c-set-style "gnu")
808  * End:
809  */