1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2022 Cisco Systems, Inc.
5 #include <cnat/cnat_maglev.h>
8 cnat_maglev_perm_compare (void *_a, void *_b)
10 return *(u64 *) _b - *(u64 *) _a;
14 * Maglev algorithm implementation. This takes permutation as input,
15 * with the values of offset & skip for the backends.
16 * It fills buckets matching the permuntations, provided buckets is
17 * already of length at least M
20 cnat_maglev_shuffle (cnat_maglev_perm_t *permutation, u32 *buckets)
22 u32 N, M, i, done = 0;
25 N = vec_len (permutation);
29 M = vec_len (buckets);
32 vec_set (buckets, -1);
34 vec_validate (next, N - 1);
39 for (i = 0; i < N; i++)
41 u32 c = (permutation[i].offset + next[i] * permutation[i].skip) % M;
42 while (buckets[c] != (u32) -1)
45 c = (permutation[i].offset + next[i] * permutation[i].skip) % M;
48 buckets[c] = permutation[i].index;
62 cnat_translation_init_maglev (cnat_translation_t *ct)
64 cnat_maglev_perm_t *permutations = NULL;
65 cnat_main_t *cm = &cnat_main;
67 u32 backend_index = 0;
69 if (vec_len (ct->ct_active_paths) == 0)
72 vec_foreach (trk, ct->ct_active_paths)
74 cnat_maglev_perm_t permutation;
77 if (AF_IP4 == ip_addr_version (&trk->ct_ep[VLIB_TX].ce_ip))
80 a = ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32;
81 b = (u64) trk->ct_ep[VLIB_TX].ce_port;
83 hash_v3_mix32 (a, b, c);
84 hash_v3_finalize32 (a, b, c);
91 a = ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[0];
92 b = ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[1];
93 c = (u64) trk->ct_ep[VLIB_TX].ce_port;
99 permutation.offset = h1 % cm->maglev_len;
100 permutation.skip = h2 % (cm->maglev_len - 1) + 1;
101 permutation.index = backend_index++;
103 if (trk->ct_flags & CNAT_TRK_FLAG_TEST_DISABLED)
106 vec_add1 (permutations, permutation);
109 vec_sort_with_function (permutations, cnat_maglev_perm_compare);
111 vec_validate (ct->lb_maglev, cm->maglev_len - 1);
113 cnat_maglev_shuffle (permutations, ct->lb_maglev);
115 vec_free (permutations);
119 cnat_u32_vec_contains (u32 *v, u32 e)
123 vec_foreach_index (i, v)
131 cnat_maglev_print_changes (vlib_main_t *vm, u32 *changed_bk_indices,
132 u32 *old_maglev_lb, u32 *new_maglev_lb)
134 u32 good_flow_buckets = 0, reset_flow_buckets = 0, stable_to_reset = 0;
135 u32 reset_to_stable = 0, switched_stable = 0;
136 if (vec_len (new_maglev_lb) == 0)
138 for (u32 i = 0; i < vec_len (new_maglev_lb); i++)
141 cnat_u32_vec_contains (changed_bk_indices, new_maglev_lb[i]);
143 cnat_u32_vec_contains (changed_bk_indices, old_maglev_lb[i]);
144 if (new_maglev_lb[i] == old_maglev_lb[i])
147 reset_flow_buckets++;
155 else if (is_old_changed)
162 "good B->B:%d | lost A->A':%d A->B:%d ~%0.2f%% | bad "
163 "B->A':%d B->C:%d ~%0.2f%%",
164 good_flow_buckets, reset_flow_buckets, reset_to_stable,
165 (f64) (reset_flow_buckets + reset_to_stable) /
166 vec_len (new_maglev_lb) * 100.0,
167 stable_to_reset, switched_stable,
168 (f64) (stable_to_reset + switched_stable) /
169 vec_len (new_maglev_lb) * 100.0);
173 format_cnat_maglev_buckets (u8 *s, va_list *args)
175 u32 *buckets = va_arg (*args, u32 *);
176 u32 backend_idx = va_arg (*args, u32);
177 u32 count = va_arg (*args, u32);
179 for (u32 ii = 0; ii < vec_len (buckets); ii++)
180 if (buckets[ii] == backend_idx)
182 s = format (s, "%d,", ii);
189 static clib_error_t *
190 cnat_translation_test_init_maglev (vlib_main_t *vm, unformat_input_t *input,
191 vlib_cli_command_t *cmd)
193 cnat_translation_t *trs = 0, *ct;
194 u64 num_backends = 0, n_tests = 0;
195 cnat_main_t *cm = &cnat_main;
198 u32 n_changes = 0, n_remove = 0, verbose = 0;
200 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
202 if (unformat (input, "tests %d", &n_tests))
204 else if (unformat (input, "backends %d", &num_backends))
206 else if (unformat (input, "len %d", &cm->maglev_len))
208 else if (unformat (input, "change %d", &n_changes))
210 else if (unformat (input, "rm %d", &n_remove))
212 else if (unformat (input, "verbose %d", &verbose))
215 return (clib_error_return (0, "unknown input '%U'",
216 format_unformat_error, input));
219 if (num_backends == 0 || n_tests == 0)
220 return (clib_error_return (0, "No backends / tests to run"));
223 vlib_cli_output (vm, "generating random backends...");
224 rnd = random_default_seed ();
226 vec_validate (trs, n_tests - 1);
227 vec_foreach (ct, trs)
229 vec_validate (ct->ct_active_paths, num_backends - 1);
230 vec_foreach (trk, ct->ct_active_paths)
233 ip_addr_version (&trk->ct_ep[VLIB_TX].ce_ip) = AF_IP4;
234 ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32 = random_u32 (&rnd);
235 trk->ct_ep[VLIB_TX].ce_port = random_u32 (&rnd);
239 vlib_cli_output (vm, "testing...");
240 f64 start_time = vlib_time_now (vm);
241 vec_foreach (ct, trs)
242 cnat_translation_init_maglev (ct);
243 f64 d = vlib_time_now (vm) - start_time;
245 vlib_cli_output (vm, "Test took : %U", format_duration, d);
246 vlib_cli_output (vm, "Per pool : %U", format_duration, d / n_tests);
248 /* sanity checking of the output */
249 u32 *backend_freqs = 0;
250 vec_validate (backend_freqs, num_backends - 1);
251 vec_foreach (ct, trs)
253 if (vec_len (ct->lb_maglev) != cm->maglev_len)
254 vlib_cli_output (vm, "Unexpected bucket length %d",
255 vec_len (ct->lb_maglev));
257 vec_zero (backend_freqs);
258 for (u32 i = 0; i < vec_len (ct->lb_maglev); i++)
260 if (ct->lb_maglev[i] >= num_backends)
261 clib_warning ("out of bound backend");
262 backend_freqs[ct->lb_maglev[i]]++;
264 u32 fmin = ~0, fmax = 0;
265 for (u32 i = 0; i < num_backends; i++)
267 if (backend_freqs[i] > fmax)
268 fmax = backend_freqs[i];
269 if (backend_freqs[i] < fmin)
270 fmin = backend_freqs[i];
272 f64 fdiff = (fmax - fmin);
273 if (fdiff / vec_len (ct->lb_maglev) - 1 > 0.02)
274 vlib_cli_output (vm, "More than 2%% frequency diff (min %d max %d)",
277 vec_free (backend_freqs);
281 vec_foreach (ct, trs)
283 vlib_cli_output (vm, "Translation %d", i++);
284 for (u32 i = 0; i < verbose; i++)
286 u32 j = random_u32 (&rnd) % vec_len (ct->ct_active_paths);
287 trk = &ct->ct_active_paths[j];
289 vm, "[%03d] %U:%d buckets:%U", j, format_ip_address,
290 &trk->ct_ep[VLIB_TX].ce_ip, trk->ct_ep[VLIB_TX].ce_port,
291 format_cnat_maglev_buckets, ct->lb_maglev, j, verbose);
298 vm, "Removing %d entries (refered to as A), others (B,C) stay same",
300 vec_foreach (ct, trs)
302 u32 *old_maglev_lb = 0;
303 u32 *changed_bk_indices = 0;
304 if (vec_len (ct->lb_maglev) != cm->maglev_len)
305 vlib_cli_output (vm, "Unexpected bucket length %d",
306 vec_len (ct->lb_maglev));
308 vec_validate (changed_bk_indices, n_remove - 1);
309 for (u32 i = 0; i < n_remove; i++)
311 /* remove n_remove backends from the LB set */
312 changed_bk_indices[i] =
313 random_u32 (&rnd) % vec_len (ct->ct_active_paths);
314 trk = &ct->ct_active_paths[changed_bk_indices[i]];
315 trk->ct_flags |= CNAT_TRK_FLAG_TEST_DISABLED;
318 old_maglev_lb = vec_dup (ct->lb_maglev);
319 cnat_translation_init_maglev (ct);
321 cnat_maglev_print_changes (vm, changed_bk_indices, old_maglev_lb,
324 vec_free (changed_bk_indices);
325 vec_free (old_maglev_lb);
329 /* Reshuffle and check changes */
334 "Changing %d entries (refered to as A->A'), others (B,C) stay same",
336 vec_foreach (ct, trs)
338 if (vec_len (ct->lb_maglev) != cm->maglev_len)
339 vlib_cli_output (vm, "Unexpected bucket length %d",
340 vec_len (ct->lb_maglev));
342 u32 *old_maglev_lb = 0;
343 u32 *changed_bk_indices = 0;
345 vec_validate (changed_bk_indices, n_changes - 1);
346 for (u32 i = 0; i < n_changes; i++)
348 /* Change n_changes backends in the LB set */
349 changed_bk_indices[i] =
350 random_u32 (&rnd) % vec_len (ct->ct_active_paths);
351 trk = &ct->ct_active_paths[changed_bk_indices[i]];
352 ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32 =
354 trk->ct_ep[VLIB_TX].ce_port = random_u32 (&rnd) & 0xffff;
356 old_maglev_lb = vec_dup (ct->lb_maglev);
358 cnat_translation_init_maglev (ct);
359 cnat_maglev_print_changes (vm, changed_bk_indices, old_maglev_lb,
362 vec_free (changed_bk_indices);
363 vec_free (old_maglev_lb);
367 vec_foreach (ct, trs)
368 vec_free (ct->ct_active_paths);
374 VLIB_CLI_COMMAND (cnat_translation_test_init_maglev_cmd, static) = {
375 .path = "test cnat maglev",
376 .short_help = "test cnat maglev tests [n_tests] backends [num_backends] len "
378 .function = cnat_translation_test_init_maglev,