70ce1bf7c39b0b17aca4f38929c226b1c07499d5
[vpp.git] / vnet / vnet / dpo / load_balance_map.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @brief
17  */
18 #include <vnet/fib/fib_path.h>
19 #include <vnet/fib/fib_node_list.h>
20 #include <vnet/dpo/load_balance_map.h>
21 #include <vnet/dpo/load_balance.h>
22
23 /**
24  * A hash-table of load-balance maps by path index.
25  * this provides the fast lookup of the LB map when a path goes down
26  */
27 static uword *lb_maps_by_path_index;
28
29 /**
30  * A hash-table of load-balance maps by set of paths.
31  * This provides the LB map sharing.
32  * LB maps do not necessarily use all the paths in the list, since
33  * the entry that is requesting the map, may not have an out-going
34  * label for each of the paths.
35  */
36 static uword *load_balance_map_db;
37
38 typedef enum load_balance_map_path_flags_t_
39 {
40     LOAD_BALANCE_MAP_PATH_UP     = (1 << 0),
41     LOAD_BALANCE_MAP_PATH_USABLE = (1 << 1),
42 } __attribute__ ((packed)) load_balance_map_path_flags_t;
43
44 typedef struct load_balance_map_path_t_ {
45     /**
46      * Index of the path
47      */
48     fib_node_index_t lbmp_index;
49
50     /**
51      * Sibling Index in the list of all maps with this path index
52      */
53     fib_node_index_t lbmp_sibling;
54
55     /**
56      * the normalised wegiht of the path
57      */
58     u32 lbmp_weight;
59
60     /**
61      * The sate of the path
62      */
63     load_balance_map_path_flags_t lbmp_flags;
64 } load_balance_map_path_t;
65
66 /**
67  * The global pool of LB maps
68  */
69 load_balance_map_t *load_balance_map_pool;
70
71 /*
72  * Debug macro
73  */
74 #ifdef FIB_DEBUG
75 #define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...)       \
76     {                                                   \
77         clib_warning("lbm: FIXME" _fmt,                 \
78                      ##_args);                          \
79     }
80 #else
81 #define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...)
82 #endif
83
84 static index_t
85 load_balance_map_get_index (load_balance_map_t *lbm)
86 {
87     return (lbm - load_balance_map_pool);
88 }
89
90 u8*
91 format_load_balance_map (u8 *s, va_list ap)
92 {
93     index_t lbmi = va_arg(ap, index_t);
94     u32 indent = va_arg(ap, u32);
95     load_balance_map_t *lbm;
96     u32 n_buckets, ii;
97
98     lbm = load_balance_map_get(lbmi);
99     n_buckets = vec_len(lbm->lbm_buckets);
100
101     s = format(s, "load-balance-map: index:%d buckets:%d", lbmi, n_buckets);
102     s = format(s, "\n%U index:", format_white_space, indent+2);
103     for (ii = 0; ii < n_buckets; ii++)
104     {
105         s = format(s, "%5d", ii);
106     }
107     s = format(s, "\n%U   map:", format_white_space, indent+2);
108     for (ii = 0; ii < n_buckets; ii++)
109     {
110         s = format(s, "%5d", lbm->lbm_buckets[ii]);
111     }
112
113     return (s);
114 }
115
116
117 static uword
118 load_balance_map_hash (load_balance_map_t *lbm)
119 {
120     u32 old_lbm_hash, new_lbm_hash, hash;
121     load_balance_map_path_t *lb_path;
122
123     new_lbm_hash = old_lbm_hash = vec_len(lbm->lbm_paths);
124
125     vec_foreach (lb_path, lbm->lbm_paths)
126     {
127         hash = lb_path->lbmp_index;
128         hash_mix32(hash, old_lbm_hash, new_lbm_hash);
129     }
130
131     return (new_lbm_hash);
132 }
133
134 always_inline uword
135 load_balance_map_db_hash_key_from_index (uword index)
136 {
137     return 1 + 2*index;
138 }
139
140 always_inline uword
141 load_balance_map_db_hash_key_is_index (uword key)
142 {
143     return key & 1;
144 }
145
146 always_inline uword
147 load_balance_map_db_hash_key_2_index (uword key)
148 {
149     ASSERT (load_balance_map_db_hash_key_is_index (key));
150     return key / 2;
151 }
152
153 static load_balance_map_t*
154 load_balance_map_db_get_from_hash_key (uword key)
155 {
156     load_balance_map_t *lbm;
157
158     if (load_balance_map_db_hash_key_is_index (key))
159     {
160         index_t lbm_index;
161
162         lbm_index = load_balance_map_db_hash_key_2_index(key);
163         lbm = load_balance_map_get(lbm_index);
164     }
165     else
166     {
167         lbm = uword_to_pointer (key, load_balance_map_t *);
168     }
169
170     return (lbm);
171 }
172
173 static uword
174 load_balance_map_db_hash_key_sum (hash_t * h,
175                                   uword key)
176 {
177     load_balance_map_t *lbm;
178
179     lbm = load_balance_map_db_get_from_hash_key(key);
180
181     return (load_balance_map_hash(lbm));
182 }
183
184 static uword
185 load_balance_map_db_hash_key_equal (hash_t * h,
186                                     uword key1,
187                                     uword key2)
188 {
189     load_balance_map_t *lbm1, *lbm2;
190
191     lbm1 = load_balance_map_db_get_from_hash_key(key1);
192     lbm2 = load_balance_map_db_get_from_hash_key(key2);
193
194     return (load_balance_map_hash(lbm1) ==
195             load_balance_map_hash(lbm2));
196 }
197
198 static index_t
199 load_balance_map_db_find (load_balance_map_t *lbm)
200 {
201     uword *p;
202
203     p = hash_get(load_balance_map_db, lbm);
204
205     if (NULL != p)
206     {
207         return p[0];
208     }
209
210     return (FIB_NODE_INDEX_INVALID);
211 }
212
213 static void
214 load_balance_map_db_insert (load_balance_map_t *lbm)
215 {
216     load_balance_map_path_t *lbmp;
217     fib_node_list_t list;
218     uword *p;
219
220     ASSERT(FIB_NODE_INDEX_INVALID == load_balance_map_db_find(lbm));
221
222     /*
223      * insert into the DB based on the set of paths.
224      */
225     hash_set (load_balance_map_db,
226               load_balance_map_db_hash_key_from_index(
227                   load_balance_map_get_index(lbm)),
228               load_balance_map_get_index(lbm));
229
230     /*
231      * insert into each per-path list.
232      */
233     vec_foreach(lbmp, lbm->lbm_paths)
234     {
235         p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
236
237         if (NULL == p)
238         {
239             list = fib_node_list_create();
240             hash_set(lb_maps_by_path_index, lbmp->lbmp_index, list);
241         }
242         else
243         {
244             list = p[0];
245         }
246
247         lbmp->lbmp_sibling =
248             fib_node_list_push_front(list,
249                                      0, FIB_NODE_TYPE_FIRST,
250                                      load_balance_map_get_index(lbm));
251     }
252
253     LOAD_BALANCE_MAP_DBG(lbm, "DB-inserted");
254 }
255
256 static void
257 load_balance_map_db_remove (load_balance_map_t *lbm)
258 {
259     load_balance_map_path_t *lbmp;
260     uword *p;
261
262     ASSERT(FIB_NODE_INDEX_INVALID != load_balance_map_db_find(lbm));
263
264     hash_unset(load_balance_map_db,
265                load_balance_map_db_hash_key_from_index(
266                    load_balance_map_get_index(lbm)));
267
268     /*
269      * remove from each per-path list.
270      */
271     vec_foreach(lbmp, lbm->lbm_paths)
272     {
273         p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
274
275         ASSERT(NULL != p);
276
277         fib_node_list_remove(p[0], lbmp->lbmp_sibling);
278     }
279
280     LOAD_BALANCE_MAP_DBG(lbm, "DB-removed");
281 }
282
283 /**
284  * @brief from the paths that are usable, fill the Map.
285  */
286 static void
287 load_balance_map_fill (load_balance_map_t *lbm)
288 {
289     load_balance_map_path_t *lbmp;
290     u32 n_buckets, bucket, ii, jj;
291     u16 *tmp_buckets;
292
293     tmp_buckets = NULL;
294     n_buckets = vec_len(lbm->lbm_buckets);
295
296     /*
297      * run throught the set of paths once, and build a vector of the
298      * indices that are usable. we do this is a scratch space, since we
299      * need to refer to it multiple times as we build the real buckets.
300      */
301     vec_validate(tmp_buckets, n_buckets-1);
302
303     bucket = jj = 0;
304     vec_foreach (lbmp, lbm->lbm_paths)
305     {
306         if (fib_path_is_resolved(lbmp->lbmp_index))
307         {
308             for (ii = 0; ii < lbmp->lbmp_weight; ii++)
309             {
310                 tmp_buckets[jj++] = bucket++;
311             }
312         }
313         else 
314         {
315             bucket += lbmp->lbmp_weight;
316         }
317     }
318     _vec_len(tmp_buckets) = jj;
319
320     /*
321      * If the number of temporaries written is as many as we need, implying
322      * all paths were up, then we can simply copy the scratch area over the
323      * actual buckets' memory
324      */
325     if (jj == n_buckets)
326     {
327         memcpy(lbm->lbm_buckets,
328                tmp_buckets,
329                sizeof(lbm->lbm_buckets[0]) * n_buckets);
330     }
331     else
332     {
333         /*
334          * one or more paths are down.
335          */
336         if (0 == vec_len(tmp_buckets))
337         {
338             /*
339              * if the scratch area is empty, then no paths are usable.
340              * they will all drop. so use them all, lest we account drops
341              * against only one.
342              */
343             for (bucket = 0; bucket < n_buckets; bucket++)
344             {
345                 lbm->lbm_buckets[bucket] = bucket;
346             }
347         }
348         else
349         {
350             bucket = jj = 0;
351             vec_foreach (lbmp, lbm->lbm_paths)
352             {
353                 if (fib_path_is_resolved(lbmp->lbmp_index))
354                 {
355                     for (ii = 0; ii < lbmp->lbmp_weight; ii++)
356                     {
357                         lbm->lbm_buckets[bucket] = bucket;
358                         bucket++;
359                     }
360                 }
361                 else
362                 {
363                     /*
364                      * path is unusable
365                      * cycle through the scratch space selecting a index.
366                      * this means we load balance, in the intended ratio,
367                      * over the paths that are still usable.
368                      */
369                     for (ii = 0; ii < lbmp->lbmp_weight; ii++)
370                     {
371                         lbm->lbm_buckets[bucket] = tmp_buckets[jj];
372                         jj = (jj + 1) % vec_len(tmp_buckets);
373                         bucket++;
374                     }
375                 }
376             }
377        }
378     }
379
380     vec_free(tmp_buckets);
381 }
382
383 static load_balance_map_t*
384 load_balance_map_alloc (const load_balance_path_t *paths)
385 {
386     load_balance_map_t *lbm;
387     u32 ii;
388
389     pool_get_aligned(load_balance_map_pool, lbm, CLIB_CACHE_LINE_BYTES);
390     memset(lbm, 0, sizeof(*lbm));
391
392     vec_validate(lbm->lbm_paths, vec_len(paths)-1);
393
394     vec_foreach_index(ii, paths)
395     {
396         lbm->lbm_paths[ii].lbmp_index  = paths[ii].path_index;
397         lbm->lbm_paths[ii].lbmp_weight = paths[ii].path_weight;
398     }
399
400     return (lbm);
401 }
402
403 static load_balance_map_t *
404 load_balance_map_init (load_balance_map_t *lbm,
405                        u32 n_buckets,
406                        u32 sum_of_weights)
407 {
408     lbm->lbm_sum_of_norm_weights = sum_of_weights;
409     vec_validate(lbm->lbm_buckets, n_buckets-1);
410
411     load_balance_map_db_insert(lbm);
412
413     load_balance_map_fill(lbm);
414
415     return (lbm);
416 }
417
418 index_t
419 load_balance_map_add_or_lock (u32 n_buckets,
420                               u32 sum_of_weights,
421                               const load_balance_path_t *paths)
422 {
423     load_balance_map_t *tmp, *lbm;
424     index_t lbmi;
425
426     tmp = load_balance_map_alloc(paths);
427
428     lbmi = load_balance_map_db_find(tmp);
429
430     if (INDEX_INVALID == lbmi)
431     {
432         lbm = load_balance_map_init(tmp, n_buckets, sum_of_weights);
433     }
434     else
435     {
436         lbm = load_balance_map_get(lbmi);
437     }
438
439     lbm->lbm_locks++;
440
441     return (load_balance_map_get_index(lbm));
442 }
443
444 void
445 load_balance_map_lock (index_t lbmi)
446 {
447     load_balance_map_t *lbm;
448
449     lbm = load_balance_map_get(lbmi);
450
451     lbm->lbm_locks++;
452 }
453
454 void
455 load_balance_map_unlock (index_t lbmi)
456 {
457     load_balance_map_t *lbm;
458
459     if (INDEX_INVALID == lbmi)
460     {
461         return;
462     }
463
464     lbm = load_balance_map_get(lbmi);
465
466     lbm->lbm_locks--;
467
468     if (0 == lbm->lbm_locks)
469     {
470         load_balance_map_db_remove(lbm);
471         vec_free(lbm->lbm_paths);
472         vec_free(lbm->lbm_buckets);
473         pool_put(load_balance_map_pool, lbm);
474     }
475 }
476
477 static int
478 load_balance_map_path_state_change_walk (fib_node_ptr_t *fptr,
479                                          void *ctx)
480 {
481     load_balance_map_t *lbm;
482
483     lbm = load_balance_map_get(fptr->fnp_index);
484
485     load_balance_map_fill(lbm);
486
487     return (!0);
488 }
489
490 /**
491  * @brief the state of a path has changed (it has no doubt gone down).
492  * This is the trigger to perform a PIC edge cutover and update the maps
493  * to exclude this path.
494  */
495 void
496 load_balance_map_path_state_change (fib_node_index_t path_index)
497 {
498     uword *p;
499
500     /*
501      * re-stripe the buckets for each affect MAP
502      */
503     p = hash_get(lb_maps_by_path_index, path_index);
504
505     if (NULL == p)
506         return;
507
508     fib_node_list_walk(p[0], load_balance_map_path_state_change_walk, NULL);
509 }
510
511 /**
512  * @brief Make/add a new or lock an existing Load-balance map
513  */
514 void
515 load_balance_map_module_init (void)
516 {
517     load_balance_map_db =
518         hash_create2 (/* elts */ 0,
519                       /* user */ 0,
520                       /* value_bytes */ sizeof (index_t),
521                       load_balance_map_db_hash_key_sum,
522                       load_balance_map_db_hash_key_equal,
523                       /* format pair/arg */
524                       0, 0);
525
526     lb_maps_by_path_index = hash_create(0, sizeof(fib_node_list_t));
527 }
528
529 void
530 load_balance_map_show_mem (void)
531 {
532     fib_show_memory_usage("Load-Balance Map",
533                           pool_elts(load_balance_map_pool),
534                           pool_len(load_balance_map_pool),
535                           sizeof(load_balance_map_t));
536 }
537
538 static clib_error_t *
539 load_balance_map_show (vlib_main_t * vm,
540                        unformat_input_t * input,
541                        vlib_cli_command_t * cmd)
542 {
543     index_t lbmi = INDEX_INVALID;
544
545     while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
546     {
547         if (unformat (input, "%d", &lbmi))
548             ;
549         else
550             break;
551     }
552
553     if (INDEX_INVALID != lbmi)
554     {
555         vlib_cli_output (vm, "%U", format_load_balance_map, lbmi, 0);
556     }
557     else
558     {
559         load_balance_map_t *lbm;
560
561         pool_foreach(lbm, load_balance_map_pool,
562         ({
563             vlib_cli_output (vm, "%U", format_load_balance_map,
564                              load_balance_map_get_index(lbm), 0);
565         }));
566     }
567
568     return 0;
569 }
570
571 VLIB_CLI_COMMAND (load_balance_map_show_command, static) = {
572     .path = "show load-balance-map",
573     .short_help = "show load-balance-map [<index>]",
574     .function = load_balance_map_show,
575 };