0f4c47fe11a6809c5b58f3fc2701ebf90f430bf1
[vpp.git] / src / vnet / ip / ip4_mtrie.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_fib.h: ip4 mtrie fib
17  *
18  * Copyright (c) 2012 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/ip/ip.h>
41 #include <vnet/ip/ip4_mtrie.h>
42 #include <vnet/fib/ip4_fib.h>
43
44
45 /**
46  * Global pool of IPv4 8bit PLYs
47  */
48 ip4_mtrie_8_ply_t *ip4_ply_pool;
49
50 always_inline u32
51 ip4_mtrie_leaf_is_non_empty (ip4_mtrie_8_ply_t *p, u8 dst_byte)
52 {
53   /*
54    * It's 'non-empty' if the length of the leaf stored is greater than the
55    * length of a leaf in the covering ply. i.e. the leaf is more specific
56    * than it's would be cover in the covering ply
57    */
58   if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base)
59     return (1);
60   return (0);
61 }
62
63 always_inline ip4_mtrie_leaf_t
64 ip4_mtrie_leaf_set_adj_index (u32 adj_index)
65 {
66   ip4_mtrie_leaf_t l;
67   l = 1 + 2 * adj_index;
68   ASSERT (ip4_mtrie_leaf_get_adj_index (l) == adj_index);
69   return l;
70 }
71
72 always_inline u32
73 ip4_mtrie_leaf_is_next_ply (ip4_mtrie_leaf_t n)
74 {
75   return (n & 1) == 0;
76 }
77
78 always_inline u32
79 ip4_mtrie_leaf_get_next_ply_index (ip4_mtrie_leaf_t n)
80 {
81   ASSERT (ip4_mtrie_leaf_is_next_ply (n));
82   return n >> 1;
83 }
84
85 always_inline ip4_mtrie_leaf_t
86 ip4_mtrie_leaf_set_next_ply_index (u32 i)
87 {
88   ip4_mtrie_leaf_t l;
89   l = 0 + 2 * i;
90   ASSERT (ip4_mtrie_leaf_get_next_ply_index (l) == i);
91   return l;
92 }
93
94 #ifndef __ALTIVEC__
95 #define PLY_X4_SPLAT_INIT(init_x4, init) \
96   init_x4 = u32x4_splat (init);
97 #else
98 #define PLY_X4_SPLAT_INIT(init_x4, init)                                \
99 {                                                                       \
100   u32x4_union_t y;                                                      \
101   y.as_u32[0] = init;                                                   \
102   y.as_u32[1] = init;                                                   \
103   y.as_u32[2] = init;                                                   \
104   y.as_u32[3] = init;                                                   \
105   init_x4 = y.as_u32x4;                                                 \
106 }
107 #endif
108
109 #ifdef CLIB_HAVE_VEC128
110 #define PLY_INIT_LEAVES(p)                                              \
111 {                                                                       \
112     u32x4 *l, init_x4;                                                  \
113                                                                         \
114     PLY_X4_SPLAT_INIT(init_x4, init);                                   \
115     for (l = p->leaves_as_u32x4;                                        \
116          l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4);       \
117          l += 4)                                                        \
118       {                                                                 \
119         l[0] = init_x4;                                                 \
120         l[1] = init_x4;                                                 \
121         l[2] = init_x4;                                                 \
122         l[3] = init_x4;                                                 \
123       }                                                                 \
124 }
125 #else
126 #define PLY_INIT_LEAVES(p)                                              \
127 {                                                                       \
128   u32 *l;                                                               \
129                                                                         \
130   for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)    \
131     {                                                                   \
132       l[0] = init;                                                      \
133       l[1] = init;                                                      \
134       l[2] = init;                                                      \
135       l[3] = init;                                                      \
136       }                                                                 \
137 }
138 #endif
139
140 #define PLY_INIT(p, init, prefix_len, ply_base_len)                     \
141 {                                                                       \
142   /*                                                                    \
143    * A leaf is 'empty' if it represents a leaf from the covering PLY    \
144    * i.e. if the prefix length of the leaf is less than or equal to     \
145    * the prefix length of the PLY                                       \
146    */                                                                   \
147   p->n_non_empty_leafs = (prefix_len > ply_base_len ?                   \
148                           ARRAY_LEN (p->leaves) : 0);                   \
149   clib_memset (p->dst_address_bits_of_leaves, prefix_len,                    \
150           sizeof (p->dst_address_bits_of_leaves));                      \
151   p->dst_address_bits_base = ply_base_len;                              \
152                                                                         \
153   /* Initialize leaves. */                                              \
154   PLY_INIT_LEAVES(p);                                                   \
155 }
156
157 static void
158 ply_8_init (ip4_mtrie_8_ply_t *p, ip4_mtrie_leaf_t init, uword prefix_len,
159             u32 ply_base_len)
160 {
161   PLY_INIT (p, init, prefix_len, ply_base_len);
162 }
163
164 static void
165 ply_16_init (ip4_mtrie_16_ply_t *p, ip4_mtrie_leaf_t init, uword prefix_len)
166 {
167   clib_memset (p->dst_address_bits_of_leaves, prefix_len,
168                sizeof (p->dst_address_bits_of_leaves));
169   PLY_INIT_LEAVES (p);
170 }
171
172 static ip4_mtrie_leaf_t
173 ply_create (ip4_mtrie_leaf_t init_leaf, u32 leaf_prefix_len, u32 ply_base_len)
174 {
175   ip4_mtrie_8_ply_t *p;
176   /* Get cache aligned ply. */
177
178   pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
179
180   ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
181   return ip4_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
182 }
183
184 always_inline ip4_mtrie_8_ply_t *
185 get_next_ply_for_leaf (ip4_mtrie_leaf_t l)
186 {
187   uword n = ip4_mtrie_leaf_get_next_ply_index (l);
188
189   return pool_elt_at_index (ip4_ply_pool, n);
190 }
191
192 void
193 ip4_mtrie_16_free (ip4_mtrie_16_t *m)
194 {
195   /* the root ply is embedded so there is nothing to do,
196    * the assumption being that the IP4 FIB table has emptied the trie
197    * before deletion.
198    */
199 #if CLIB_DEBUG > 0
200   int i;
201   for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
202     {
203       ASSERT (!ip4_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
204     }
205 #endif
206 }
207
208 void
209 ip4_mtrie_16_init (ip4_mtrie_16_t *m)
210 {
211   ply_16_init (&m->root_ply, IP4_MTRIE_LEAF_EMPTY, 0);
212 }
213
214 void
215 ip4_mtrie_8_free (ip4_mtrie_8_t *m)
216 {
217   /* the root ply is embedded so there is nothing to do,
218    * the assumption being that the IP4 FIB table has emptied the trie
219    * before deletion.
220    */
221   ip4_mtrie_8_ply_t *root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
222
223 #if CLIB_DEBUG > 0
224   int i;
225   for (i = 0; i < ARRAY_LEN (root->leaves); i++)
226     {
227       ASSERT (!ip4_mtrie_leaf_is_next_ply (root->leaves[i]));
228     }
229 #endif
230
231   pool_put (ip4_ply_pool, root);
232 }
233
234 void
235 ip4_mtrie_8_init (ip4_mtrie_8_t *m)
236 {
237   ip4_mtrie_8_ply_t *root;
238
239   pool_get (ip4_ply_pool, root);
240   m->root_ply = root - ip4_ply_pool;
241
242   ply_8_init (root, IP4_MTRIE_LEAF_EMPTY, 0, 0);
243 }
244
245 typedef struct
246 {
247   ip4_address_t dst_address;
248   u32 dst_address_length;
249   u32 adj_index;
250   u32 cover_address_length;
251   u32 cover_adj_index;
252 } ip4_mtrie_set_unset_leaf_args_t;
253
254 static void
255 set_ply_with_more_specific_leaf (ip4_mtrie_8_ply_t *ply,
256                                  ip4_mtrie_leaf_t new_leaf,
257                                  uword new_leaf_dst_address_bits)
258 {
259   ip4_mtrie_leaf_t old_leaf;
260   uword i;
261
262   ASSERT (ip4_mtrie_leaf_is_terminal (new_leaf));
263
264   for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
265     {
266       old_leaf = ply->leaves[i];
267
268       /* Recurse into sub plies. */
269       if (!ip4_mtrie_leaf_is_terminal (old_leaf))
270         {
271           ip4_mtrie_8_ply_t *sub_ply = get_next_ply_for_leaf (old_leaf);
272           set_ply_with_more_specific_leaf (sub_ply, new_leaf,
273                                            new_leaf_dst_address_bits);
274         }
275
276       /* Replace less specific terminal leaves with new leaf. */
277       else if (new_leaf_dst_address_bits >=
278                ply->dst_address_bits_of_leaves[i])
279         {
280           clib_atomic_store_rel_n (&ply->leaves[i], new_leaf);
281           ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
282           ply->n_non_empty_leafs += ip4_mtrie_leaf_is_non_empty (ply, i);
283         }
284     }
285 }
286
287 static void
288 set_leaf (const ip4_mtrie_set_unset_leaf_args_t *a, u32 old_ply_index,
289           u32 dst_address_byte_index)
290 {
291   ip4_mtrie_leaf_t old_leaf, new_leaf;
292   i32 n_dst_bits_next_plies;
293   u8 dst_byte;
294   ip4_mtrie_8_ply_t *old_ply;
295
296   old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
297
298   ASSERT (a->dst_address_length <= 32);
299   ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
300
301   /* how many bits of the destination address are in the next PLY */
302   n_dst_bits_next_plies =
303     a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
304
305   dst_byte = a->dst_address.as_u8[dst_address_byte_index];
306
307   /* Number of bits next plies <= 0 => insert leaves this ply. */
308   if (n_dst_bits_next_plies <= 0)
309     {
310       /* The mask length of the address to insert maps to this ply */
311       uword old_leaf_is_terminal;
312       u32 i, n_dst_bits_this_ply;
313
314       /* The number of bits, and hence slots/buckets, we will fill */
315       n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
316       ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
317                pow2_mask (n_dst_bits_this_ply)) == 0);
318
319       /* Starting at the value of the byte at this section of the v4 address
320        * fill the buckets/slots of the ply */
321       for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
322         {
323           ip4_mtrie_8_ply_t *new_ply;
324
325           old_leaf = old_ply->leaves[i];
326           old_leaf_is_terminal = ip4_mtrie_leaf_is_terminal (old_leaf);
327
328           if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
329             {
330               /* The new leaf is more or equally specific than the one currently
331                * occupying the slot */
332               new_leaf = ip4_mtrie_leaf_set_adj_index (a->adj_index);
333
334               if (old_leaf_is_terminal)
335                 {
336                   /* The current leaf is terminal, we can replace it with
337                    * the new one */
338                   old_ply->n_non_empty_leafs -=
339                     ip4_mtrie_leaf_is_non_empty (old_ply, i);
340
341                   old_ply->dst_address_bits_of_leaves[i] =
342                     a->dst_address_length;
343                   clib_atomic_store_rel_n (&old_ply->leaves[i], new_leaf);
344
345                   old_ply->n_non_empty_leafs +=
346                     ip4_mtrie_leaf_is_non_empty (old_ply, i);
347                   ASSERT (old_ply->n_non_empty_leafs <=
348                           ARRAY_LEN (old_ply->leaves));
349                 }
350               else
351                 {
352                   /* Existing leaf points to another ply.  We need to place
353                    * new_leaf into all more specific slots. */
354                   new_ply = get_next_ply_for_leaf (old_leaf);
355                   set_ply_with_more_specific_leaf (new_ply, new_leaf,
356                                                    a->dst_address_length);
357                 }
358             }
359           else if (!old_leaf_is_terminal)
360             {
361               /* The current leaf is less specific and not termial (i.e. a ply),
362                * recurse on down the trie */
363               new_ply = get_next_ply_for_leaf (old_leaf);
364               set_leaf (a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
365             }
366           /*
367            * else
368            *  the route we are adding is less specific than the leaf currently
369            *  occupying this slot. leave it there
370            */
371         }
372     }
373   else
374     {
375       /* The address to insert requires us to move down at a lower level of
376        * the trie - recurse on down */
377       ip4_mtrie_8_ply_t *new_ply;
378       u8 ply_base_len;
379
380       ply_base_len = 8 * (dst_address_byte_index + 1);
381
382       old_leaf = old_ply->leaves[dst_byte];
383
384       if (ip4_mtrie_leaf_is_terminal (old_leaf))
385         {
386           /* There is a leaf occupying the slot. Replace it with a new ply */
387           old_ply->n_non_empty_leafs -=
388             ip4_mtrie_leaf_is_non_empty (old_ply, dst_byte);
389
390           new_leaf = ply_create (old_leaf,
391                                  old_ply->dst_address_bits_of_leaves[dst_byte],
392                                  ply_base_len);
393           new_ply = get_next_ply_for_leaf (new_leaf);
394
395           /* Refetch since ply_create may move pool. */
396           old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
397
398           clib_atomic_store_rel_n (&old_ply->leaves[dst_byte], new_leaf);
399           old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
400
401           old_ply->n_non_empty_leafs +=
402             ip4_mtrie_leaf_is_non_empty (old_ply, dst_byte);
403           ASSERT (old_ply->n_non_empty_leafs >= 0);
404         }
405       else
406         new_ply = get_next_ply_for_leaf (old_leaf);
407
408       set_leaf (a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
409     }
410 }
411
412 static void
413 set_root_leaf (ip4_mtrie_16_t *m, const ip4_mtrie_set_unset_leaf_args_t *a)
414 {
415   ip4_mtrie_leaf_t old_leaf, new_leaf;
416   ip4_mtrie_16_ply_t *old_ply;
417   i32 n_dst_bits_next_plies;
418   u16 dst_byte;
419
420   old_ply = &m->root_ply;
421
422   ASSERT (a->dst_address_length <= 32);
423
424   /* how many bits of the destination address are in the next PLY */
425   n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
426
427   dst_byte = a->dst_address.as_u16[0];
428
429   /* Number of bits next plies <= 0 => insert leaves this ply. */
430   if (n_dst_bits_next_plies <= 0)
431     {
432       /* The mask length of the address to insert maps to this ply */
433       uword old_leaf_is_terminal;
434       u32 i, n_dst_bits_this_ply;
435
436       /* The number of bits, and hence slots/buckets, we will fill */
437       n_dst_bits_this_ply = 16 - a->dst_address_length;
438       ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) &
439                pow2_mask (n_dst_bits_this_ply)) == 0);
440
441       /* Starting at the value of the byte at this section of the v4 address
442        * fill the buckets/slots of the ply */
443       for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
444         {
445           ip4_mtrie_8_ply_t *new_ply;
446           u16 slot;
447
448           slot = clib_net_to_host_u16 (dst_byte);
449           slot += i;
450           slot = clib_host_to_net_u16 (slot);
451
452           old_leaf = old_ply->leaves[slot];
453           old_leaf_is_terminal = ip4_mtrie_leaf_is_terminal (old_leaf);
454
455           if (a->dst_address_length >=
456               old_ply->dst_address_bits_of_leaves[slot])
457             {
458               /* The new leaf is more or equally specific than the one currently
459                * occupying the slot */
460               new_leaf = ip4_mtrie_leaf_set_adj_index (a->adj_index);
461
462               if (old_leaf_is_terminal)
463                 {
464                   /* The current leaf is terminal, we can replace it with
465                    * the new one */
466                   old_ply->dst_address_bits_of_leaves[slot] =
467                     a->dst_address_length;
468                   clib_atomic_store_rel_n (&old_ply->leaves[slot], new_leaf);
469                 }
470               else
471                 {
472                   /* Existing leaf points to another ply.  We need to place
473                    * new_leaf into all more specific slots. */
474                   new_ply = get_next_ply_for_leaf (old_leaf);
475                   set_ply_with_more_specific_leaf (new_ply, new_leaf,
476                                                    a->dst_address_length);
477                 }
478             }
479           else if (!old_leaf_is_terminal)
480             {
481               /* The current leaf is less specific and not termial (i.e. a ply),
482                * recurse on down the trie */
483               new_ply = get_next_ply_for_leaf (old_leaf);
484               set_leaf (a, new_ply - ip4_ply_pool, 2);
485             }
486           /*
487            * else
488            *  the route we are adding is less specific than the leaf currently
489            *  occupying this slot. leave it there
490            */
491         }
492     }
493   else
494     {
495       /* The address to insert requires us to move down at a lower level of
496        * the trie - recurse on down */
497       ip4_mtrie_8_ply_t *new_ply;
498       u8 ply_base_len;
499
500       ply_base_len = 16;
501
502       old_leaf = old_ply->leaves[dst_byte];
503
504       if (ip4_mtrie_leaf_is_terminal (old_leaf))
505         {
506           /* There is a leaf occupying the slot. Replace it with a new ply */
507           new_leaf = ply_create (old_leaf,
508                                  old_ply->dst_address_bits_of_leaves[dst_byte],
509                                  ply_base_len);
510           new_ply = get_next_ply_for_leaf (new_leaf);
511
512           clib_atomic_store_rel_n (&old_ply->leaves[dst_byte], new_leaf);
513           old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
514         }
515       else
516         new_ply = get_next_ply_for_leaf (old_leaf);
517
518       set_leaf (a, new_ply - ip4_ply_pool, 2);
519     }
520 }
521
522 static uword
523 unset_leaf (const ip4_mtrie_set_unset_leaf_args_t *a,
524             ip4_mtrie_8_ply_t *old_ply, u32 dst_address_byte_index)
525 {
526   ip4_mtrie_leaf_t old_leaf, del_leaf;
527   i32 n_dst_bits_next_plies;
528   i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
529   u8 dst_byte;
530
531   ASSERT (a->dst_address_length <= 32);
532   ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
533
534   n_dst_bits_next_plies =
535     a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
536
537   dst_byte = a->dst_address.as_u8[dst_address_byte_index];
538   if (n_dst_bits_next_plies < 0)
539     dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
540
541   n_dst_bits_this_ply =
542     n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
543   n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
544
545   del_leaf = ip4_mtrie_leaf_set_adj_index (a->adj_index);
546
547   for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
548     {
549       old_leaf = old_ply->leaves[i];
550       old_leaf_is_terminal = ip4_mtrie_leaf_is_terminal (old_leaf);
551
552       if (old_leaf == del_leaf ||
553           (!old_leaf_is_terminal &&
554            unset_leaf (a, get_next_ply_for_leaf (old_leaf),
555                        dst_address_byte_index + 1)))
556         {
557           old_ply->n_non_empty_leafs -=
558             ip4_mtrie_leaf_is_non_empty (old_ply, i);
559
560           clib_atomic_store_rel_n (
561             &old_ply->leaves[i],
562             ip4_mtrie_leaf_set_adj_index (a->cover_adj_index));
563           old_ply->dst_address_bits_of_leaves[i] = a->cover_address_length;
564
565           old_ply->n_non_empty_leafs +=
566             ip4_mtrie_leaf_is_non_empty (old_ply, i);
567
568           ASSERT (old_ply->n_non_empty_leafs >= 0);
569           if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
570             {
571               pool_put (ip4_ply_pool, old_ply);
572               /* Old ply was deleted. */
573               return 1;
574             }
575 #if CLIB_DEBUG > 0
576           else if (dst_address_byte_index)
577             {
578               int ii, count = 0;
579               for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++)
580                 {
581                   count += ip4_mtrie_leaf_is_non_empty (old_ply, ii);
582                 }
583               ASSERT (count);
584             }
585 #endif
586         }
587     }
588
589   /* Old ply was not deleted. */
590   return 0;
591 }
592
593 static void
594 unset_root_leaf (ip4_mtrie_16_t *m, const ip4_mtrie_set_unset_leaf_args_t *a)
595 {
596   ip4_mtrie_leaf_t old_leaf, del_leaf;
597   i32 n_dst_bits_next_plies;
598   i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
599   u16 dst_byte;
600   ip4_mtrie_16_ply_t *old_ply;
601
602   ASSERT (a->dst_address_length <= 32);
603
604   old_ply = &m->root_ply;
605   n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
606
607   dst_byte = a->dst_address.as_u16[0];
608
609   n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ?
610                          (16 - a->dst_address_length) : 0);
611
612   del_leaf = ip4_mtrie_leaf_set_adj_index (a->adj_index);
613
614   /* Starting at the value of the byte at this section of the v4 address
615    * fill the buckets/slots of the ply */
616   for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
617     {
618       u16 slot;
619
620       slot = clib_net_to_host_u16 (dst_byte);
621       slot += i;
622       slot = clib_host_to_net_u16 (slot);
623
624       old_leaf = old_ply->leaves[slot];
625       old_leaf_is_terminal = ip4_mtrie_leaf_is_terminal (old_leaf);
626
627       if (old_leaf == del_leaf ||
628           (!old_leaf_is_terminal &&
629            unset_leaf (a, get_next_ply_for_leaf (old_leaf), 2)))
630         {
631           clib_atomic_store_rel_n (
632             &old_ply->leaves[slot],
633             ip4_mtrie_leaf_set_adj_index (a->cover_adj_index));
634           old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length;
635         }
636     }
637 }
638
639 void
640 ip4_mtrie_16_route_add (ip4_mtrie_16_t *m, const ip4_address_t *dst_address,
641                         u32 dst_address_length, u32 adj_index)
642 {
643   ip4_mtrie_set_unset_leaf_args_t a;
644   ip4_main_t *im = &ip4_main;
645
646   /* Honor dst_address_length. Fib masks are in network byte order */
647   a.dst_address.as_u32 = (dst_address->as_u32 &
648                           im->fib_masks[dst_address_length]);
649   a.dst_address_length = dst_address_length;
650   a.adj_index = adj_index;
651
652   set_root_leaf (m, &a);
653 }
654
655 void
656 ip4_mtrie_8_route_add (ip4_mtrie_8_t *m, const ip4_address_t *dst_address,
657                        u32 dst_address_length, u32 adj_index)
658 {
659   ip4_mtrie_set_unset_leaf_args_t a;
660   ip4_main_t *im = &ip4_main;
661
662   /* Honor dst_address_length. Fib masks are in network byte order */
663   a.dst_address.as_u32 =
664     (dst_address->as_u32 & im->fib_masks[dst_address_length]);
665   a.dst_address_length = dst_address_length;
666   a.adj_index = adj_index;
667
668   ip4_mtrie_8_ply_t *root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
669
670   set_leaf (&a, root - ip4_ply_pool, 0);
671 }
672
673 void
674 ip4_mtrie_16_route_del (ip4_mtrie_16_t *m, const ip4_address_t *dst_address,
675                         u32 dst_address_length, u32 adj_index,
676                         u32 cover_address_length, u32 cover_adj_index)
677 {
678   ip4_mtrie_set_unset_leaf_args_t a;
679   ip4_main_t *im = &ip4_main;
680
681   /* Honor dst_address_length. Fib masks are in network byte order */
682   a.dst_address.as_u32 = (dst_address->as_u32 &
683                           im->fib_masks[dst_address_length]);
684   a.dst_address_length = dst_address_length;
685   a.adj_index = adj_index;
686   a.cover_adj_index = cover_adj_index;
687   a.cover_address_length = cover_address_length;
688
689   /* the top level ply is never removed */
690   unset_root_leaf (m, &a);
691 }
692
693 void
694 ip4_mtrie_8_route_del (ip4_mtrie_8_t *m, const ip4_address_t *dst_address,
695                        u32 dst_address_length, u32 adj_index,
696                        u32 cover_address_length, u32 cover_adj_index)
697 {
698   ip4_main_t *im = &ip4_main;
699
700   /* Honor dst_address_length. Fib masks are in network byte order */
701   ip4_mtrie_set_unset_leaf_args_t a = {
702     .dst_address.as_u32 =
703       (dst_address->as_u32 & im->fib_masks[dst_address_length]),
704     .dst_address_length = dst_address_length,
705     .adj_index = adj_index,
706     .cover_adj_index = cover_adj_index,
707     .cover_address_length = cover_address_length,
708   };
709
710   /* the top level ply is never removed */
711   ip4_mtrie_8_ply_t *root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
712
713   unset_leaf (&a, root, 0);
714 }
715
716 /* Returns number of bytes of memory used by mtrie. */
717 static uword
718 mtrie_ply_memory_usage (ip4_mtrie_8_ply_t *p)
719 {
720   uword bytes, i;
721
722   bytes = sizeof (p[0]);
723   for (i = 0; i < ARRAY_LEN (p->leaves); i++)
724     {
725       ip4_mtrie_leaf_t l = p->leaves[i];
726       if (ip4_mtrie_leaf_is_next_ply (l))
727         bytes += mtrie_ply_memory_usage (get_next_ply_for_leaf (l));
728     }
729
730   return bytes;
731 }
732
733 /* Returns number of bytes of memory used by mtrie. */
734 uword
735 ip4_mtrie_16_memory_usage (ip4_mtrie_16_t *m)
736 {
737   uword bytes, i;
738
739   bytes = sizeof (*m);
740   for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
741     {
742       ip4_mtrie_leaf_t l = m->root_ply.leaves[i];
743       if (ip4_mtrie_leaf_is_next_ply (l))
744         bytes += mtrie_ply_memory_usage (get_next_ply_for_leaf (l));
745     }
746
747   return bytes;
748 }
749 uword
750 ip4_mtrie_8_memory_usage (ip4_mtrie_8_t *m)
751 {
752   ip4_mtrie_8_ply_t *root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
753   uword bytes, i;
754
755   bytes = sizeof (*m);
756   for (i = 0; i < ARRAY_LEN (root->leaves); i++)
757     {
758       ip4_mtrie_leaf_t l = root->leaves[i];
759       if (ip4_mtrie_leaf_is_next_ply (l))
760         bytes += mtrie_ply_memory_usage (get_next_ply_for_leaf (l));
761     }
762
763   return bytes;
764 }
765
766 static u8 *
767 format_ip4_mtrie_leaf (u8 *s, va_list *va)
768 {
769   ip4_mtrie_leaf_t l = va_arg (*va, ip4_mtrie_leaf_t);
770
771   if (ip4_mtrie_leaf_is_terminal (l))
772     s = format (s, "lb-index %d", ip4_mtrie_leaf_get_adj_index (l));
773   else
774     s = format (s, "next ply %d", ip4_mtrie_leaf_get_next_ply_index (l));
775   return s;
776 }
777
778 #define FORMAT_PLY(s, _p, _a, _i, _base_address, _ply_max_len, _indent)       \
779   ({                                                                          \
780     u32 a, ia_length;                                                         \
781     ip4_address_t ia;                                                         \
782     ip4_mtrie_leaf_t _l = (_p)->leaves[(_i)];                                 \
783                                                                               \
784     a = (_base_address) + ((_a) << (32 - (_ply_max_len)));                    \
785     ia.as_u32 = clib_host_to_net_u32 (a);                                     \
786     ia_length = (_p)->dst_address_bits_of_leaves[(_i)];                       \
787     s = format (s, "\n%U%U %U", format_white_space, (_indent) + 4,            \
788                 format_ip4_address_and_length, &ia, ia_length,                \
789                 format_ip4_mtrie_leaf, _l);                                   \
790                                                                               \
791     if (ip4_mtrie_leaf_is_next_ply (_l))                                      \
792       s = format (s, "\n%U", format_ip4_mtrie_ply, m, a, (_indent) + 8,       \
793                   ip4_mtrie_leaf_get_next_ply_index (_l));                    \
794     s;                                                                        \
795   })
796
797 static u8 *
798 format_ip4_mtrie_ply (u8 *s, va_list *va)
799 {
800   ip4_mtrie_16_t *m = va_arg (*va, ip4_mtrie_16_t *);
801   u32 base_address = va_arg (*va, u32);
802   u32 indent = va_arg (*va, u32);
803   u32 ply_index = va_arg (*va, u32);
804   ip4_mtrie_8_ply_t *p;
805   int i;
806
807   p = pool_elt_at_index (ip4_ply_pool, ply_index);
808   s = format (s, "%Uply index %d, %d non-empty leaves",
809               format_white_space, indent, ply_index, p->n_non_empty_leafs);
810
811   for (i = 0; i < ARRAY_LEN (p->leaves); i++)
812     {
813       if (ip4_mtrie_leaf_is_non_empty (p, i))
814         {
815           s = FORMAT_PLY (s, p, i, i, base_address,
816                           p->dst_address_bits_base + 8, indent);
817         }
818     }
819
820   return s;
821 }
822
823 u8 *
824 format_ip4_mtrie_16 (u8 *s, va_list *va)
825 {
826   ip4_mtrie_16_t *m = va_arg (*va, ip4_mtrie_16_t *);
827   int verbose = va_arg (*va, int);
828   ip4_mtrie_16_ply_t *p;
829   u32 base_address = 0;
830   int i;
831
832   s =
833     format (s, "16-8-8: %d plies, memory usage %U\n", pool_elts (ip4_ply_pool),
834             format_memory_size, ip4_mtrie_16_memory_usage (m));
835   p = &m->root_ply;
836
837   if (verbose)
838     {
839       s = format (s, "root-ply");
840       p = &m->root_ply;
841
842       for (i = 0; i < ARRAY_LEN (p->leaves); i++)
843         {
844           u16 slot;
845
846           slot = clib_host_to_net_u16 (i);
847
848           if (p->dst_address_bits_of_leaves[slot] > 0)
849             {
850               s = FORMAT_PLY (s, p, i, slot, base_address, 16, 0);
851             }
852         }
853     }
854
855   return s;
856 }
857
858 u8 *
859 format_ip4_mtrie_8 (u8 *s, va_list *va)
860 {
861   ip4_mtrie_8_t *m = va_arg (*va, ip4_mtrie_8_t *);
862   int verbose = va_arg (*va, int);
863   ip4_mtrie_8_ply_t *root;
864   u32 base_address = 0;
865   u16 slot;
866
867   root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
868
869   s = format (s, "8-8-8-8; %d plies, memory usage %U\n",
870               pool_elts (ip4_ply_pool), format_memory_size,
871               ip4_mtrie_8_memory_usage (m));
872
873   if (verbose)
874     {
875       s = format (s, "root-ply");
876
877       for (slot = 0; slot < ARRAY_LEN (root->leaves); slot++)
878         {
879           if (root->dst_address_bits_of_leaves[slot] > 0)
880             {
881               s = FORMAT_PLY (s, root, slot, slot, base_address, 8, 0);
882             }
883         }
884     }
885
886   return s;
887 }
888
889 /** Default heap size for the IPv4 mtries */
890 #define IP4_FIB_DEFAULT_MTRIE_HEAP_SIZE (32<<20)
891 #ifndef MAP_HUGE_SHIFT
892 #define MAP_HUGE_SHIFT 26
893 #endif
894
895 static clib_error_t *
896 ip4_mtrie_module_init (vlib_main_t * vm)
897 {
898   CLIB_UNUSED (ip4_mtrie_8_ply_t * p);
899   clib_error_t *error = NULL;
900
901   /* Burn one ply so index 0 is taken */
902   pool_get (ip4_ply_pool, p);
903
904   return (error);
905 }
906
907 VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
908
909 /*
910  * fd.io coding-style-patch-verification: ON
911  *
912  * Local Variables:
913  * eval: (c-set-style "gnu")
914  * End:
915  */