cnat_translation_t *cnat_translation_pool;
clib_bihash_8_8_t cnat_translation_db;
addr_resolution_t *tr_resolutions;
-
-typedef void (*cnat_if_addr_add_cb_t) (addr_resolution_t * ar,
- ip_address_t * address, u8 is_del);
cnat_if_addr_add_cb_t *cnat_if_addr_add_cbs;
static fib_node_type_t cnat_translation_fib_node_type;
/* Delete tr resolution entries matching translation index */
addr_resolution_t *ar;
index_t *indexes = 0, *ari;
- /* *INDENT-OFF* */
- pool_foreach (ar, tr_resolutions, ({
- if ((cti == INDEX_INVALID || ar->cti == cti) &&
- (ar->type == type || CNAT_RESOLV_ADDR_ANY == type))
- vec_add1(indexes, ar - tr_resolutions);
- }));
- /* *INDENT-ON* */
+ pool_foreach (ar, tr_resolutions)
+ {
+ if ((cti == INDEX_INVALID || ar->cti == cti) &&
+ (ar->type == type || CNAT_RESOLV_ADDR_ANY == type))
+ vec_add1 (indexes, ar - tr_resolutions);
+ }
vec_foreach (ari, indexes) pool_put_index (tr_resolutions, *ari);
vec_free (indexes);
cnat_tracker_release (cnat_ep_trk_t * trk)
{
/* We only track fully resolved endpoints */
- if (!trk->is_active)
+ if (!(trk->ct_flags & CNAT_TRK_ACTIVE))
return;
fib_entry_untrack (trk->ct_fei, trk->ct_sibling);
}
{
fib_prefix_t pfx;
/* We only track fully resolved endpoints */
- trk->is_active = trk->ct_ep[VLIB_TX].ce_flags & CNAT_EP_FLAG_RESOLVED
- && trk->ct_ep[VLIB_RX].ce_flags & CNAT_EP_FLAG_RESOLVED;
- if (!trk->is_active)
- return;
+ if (trk->ct_ep[VLIB_TX].ce_flags & CNAT_EP_FLAG_RESOLVED &&
+ trk->ct_ep[VLIB_RX].ce_flags & CNAT_EP_FLAG_RESOLVED)
+ trk->ct_flags |= CNAT_TRK_ACTIVE;
+ else
+ {
+ trk->ct_flags &= ~CNAT_TRK_ACTIVE;
+ return;
+ }
ip_address_to_fib_prefix (&trk->ct_ep[VLIB_TX].ce_ip, &pfx);
trk->ct_fei = fib_entry_track (CNAT_FIB_TABLE,
(pfx.fp_proto), &trk->ct_dpo);
}
+u8 *
+format_cnat_lb_type (u8 *s, va_list *args)
+{
+ cnat_lb_type_t lb_type = va_arg (*args, int);
+ if (CNAT_LB_DEFAULT == lb_type)
+ s = format (s, "default");
+ else if (CNAT_LB_MAGLEV == lb_type)
+ s = format (s, "maglev");
+ else
+ s = format (s, "unknown");
+ return (s);
+}
+
+uword
+unformat_cnat_lb_type (unformat_input_t *input, va_list *args)
+{
+ cnat_lb_type_t *a = va_arg (*args, cnat_lb_type_t *);
+ if (unformat (input, "default"))
+ *a = CNAT_LB_DEFAULT;
+ else if (unformat (input, "maglev"))
+ *a = CNAT_LB_MAGLEV;
+ else
+ return 0;
+ return 1;
+}
+
/**
* Add a translation to the bihash
*
clib_bihash_add_del_8_8 (&cnat_translation_db, &bkey, 0);
}
+typedef struct
+{
+ cnat_ep_trk_t *trk;
+ u32 index;
+ u32 offset;
+ u32 skip;
+} cnat_maglev_entry_t;
+
+static int
+cnat_maglev_entry_compare (void *_a, void *_b)
+{
+ cnat_ep_trk_t *a = ((cnat_maglev_entry_t *) _a)->trk;
+ cnat_ep_trk_t *b = ((cnat_maglev_entry_t *) _b)->trk;
+ int rv = 0;
+ if ((rv =
+ ip_address_cmp (&a->ct_ep[VLIB_TX].ce_ip, &b->ct_ep[VLIB_TX].ce_ip)))
+ return rv;
+ if ((rv = a->ct_ep[VLIB_TX].ce_port - a->ct_ep[VLIB_TX].ce_port))
+ return rv;
+ if ((rv =
+ ip_address_cmp (&a->ct_ep[VLIB_RX].ce_ip, &b->ct_ep[VLIB_RX].ce_ip)))
+ return rv;
+ if ((rv = a->ct_ep[VLIB_RX].ce_port - a->ct_ep[VLIB_RX].ce_port))
+ return rv;
+ return 0;
+}
+
+static void
+cnat_translation_init_maglev (cnat_translation_t *ct)
+{
+ cnat_maglev_entry_t *backends = NULL, *bk;
+ cnat_main_t *cm = &cnat_main;
+ u32 done = 0;
+ cnat_ep_trk_t *trk;
+ int ep_idx = 0;
+
+ vec_foreach (trk, ct->ct_active_paths)
+ {
+ cnat_maglev_entry_t bk;
+ u32 h1, h2;
+
+ if (AF_IP4 == ip_addr_version (&trk->ct_ep[VLIB_TX].ce_ip))
+ {
+ u32 a, b, c;
+ a = ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32;
+ b = (u64) trk->ct_ep[VLIB_TX].ce_port << 16 |
+ (u64) trk->ct_ep[VLIB_RX].ce_port;
+ c = ip_addr_v4 (&trk->ct_ep[VLIB_RX].ce_ip).data_u32;
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+ h1 = c;
+ h2 = b;
+ }
+ else
+ {
+ u64 a, b, c;
+ a = ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[0] ^
+ ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[1];
+ b = (u64) trk->ct_ep[VLIB_TX].ce_port << 16 |
+ (u64) trk->ct_ep[VLIB_RX].ce_port;
+ c = ip_addr_v6 (&trk->ct_ep[VLIB_RX].ce_ip).as_u64[0] ^
+ ip_addr_v6 (&trk->ct_ep[VLIB_RX].ce_ip).as_u64[1];
+ hash_mix64 (a, b, c);
+ h1 = c;
+ h2 = b;
+ }
+
+ bk.offset = h1 % cm->maglev_len;
+ bk.skip = h2 % (cm->maglev_len - 1) + 1;
+ bk.index = ep_idx++;
+ bk.trk = trk;
+ vec_add1 (backends, bk);
+ }
+
+ if (0 == ep_idx)
+ return;
+
+ vec_sort_with_function (backends, cnat_maglev_entry_compare);
+
+ /* Don't free if previous vector exists, just zero */
+ vec_validate (ct->lb_maglev, cm->maglev_len);
+ vec_set (ct->lb_maglev, -1);
+
+ while (1)
+ {
+ vec_foreach (bk, backends)
+ {
+ u32 next = 0;
+ u32 c = (bk->offset + next * bk->skip) % cm->maglev_len;
+ while (ct->lb_maglev[c] != (u32) -1)
+ {
+ next++;
+ c = (bk->offset + next * bk->skip) % cm->maglev_len;
+ }
+ ct->lb_maglev[c] = bk->index;
+ done++;
+ if (done == cm->maglev_len)
+ goto finished;
+ }
+ }
+
+finished:
+ vec_free (backends);
+}
+
static void
cnat_translation_stack (cnat_translation_t * ct)
{
fproto = ip_address_family_to_fib_proto (ct->ct_vip.ce_ip.version);
dproto = fib_proto_to_dpo (fproto);
- vec_foreach (trk, ct->ct_paths) if (trk->is_active)
- ep_idx++;
+ vec_reset_length (ct->ct_active_paths);
+
+ vec_foreach (trk, ct->ct_paths)
+ if (trk->ct_flags & CNAT_TRK_ACTIVE)
+ vec_add1 (ct->ct_active_paths, *trk);
- lbi = load_balance_create (ep_idx, fib_proto_to_dpo (fproto),
- IP_FLOW_HASH_DEFAULT);
+ lbi = load_balance_create (vec_len (ct->ct_active_paths),
+ fib_proto_to_dpo (fproto), IP_FLOW_HASH_DEFAULT);
ep_idx = 0;
- vec_foreach (trk, ct->ct_paths) if (trk->is_active)
+ vec_foreach (trk, ct->ct_active_paths)
load_balance_set_bucket (lbi, ep_idx++, &trk->ct_dpo);
+ if (ep_idx > 0 && CNAT_LB_MAGLEV == ct->lb_type)
+ cnat_translation_init_maglev (ct);
+
dpo_set (&ct->ct_lb, DPO_LOAD_BALANCE, dproto, lbi);
dpo_stack (cnat_client_dpo, dproto, &ct->ct_lb, &ct->ct_lb);
ct->flags |= CNAT_TRANSLATION_STACKED;
dpo_reset (&ct->ct_lb);
- vec_foreach (trk, ct->ct_paths) cnat_tracker_release (trk);
+ vec_foreach (trk, ct->ct_active_paths)
+ cnat_tracker_release (trk);
cnat_remove_translation_from_db (ct->ct_cci, &ct->ct_vip, ct->ct_proto);
cnat_client_translation_deleted (ct->ct_cci);
}
u32
-cnat_translation_update (cnat_endpoint_t * vip,
- ip_protocol_t proto,
- cnat_endpoint_tuple_t * paths, u8 flags)
+cnat_translation_update (cnat_endpoint_t *vip, ip_protocol_t proto,
+ cnat_endpoint_tuple_t *paths, u8 flags,
+ cnat_lb_type_t lb_type)
{
cnat_endpoint_tuple_t *path;
const cnat_client_t *cc;
ct->ct_proto = proto;
ct->ct_cci = cci;
ct->index = ct - cnat_translation_pool;
+ ct->lb_type = lb_type;
cnat_add_translation_to_db (cci, vip, proto, ct->index);
cnat_client_translation_added (cci);
sizeof (trk->ct_ep[VLIB_TX]));
clib_memcpy (&trk->ct_ep[VLIB_RX], &path->src_ep,
sizeof (trk->ct_ep[VLIB_RX]));
+ trk->ct_flags = path->ep_flags;
cnat_tracker_track (ct->index, trk);
}
{
u32 api;
- /* *INDENT-OFF* */
- pool_foreach_index(api, cnat_translation_pool,
- ({
+ pool_foreach_index (api, cnat_translation_pool)
+ {
if (!cb(api, ctx))
break;
- }));
- /* *INDENT-ON* */
+ }
}
static u8 *
format_cnat_translation (u8 * s, va_list * args)
{
cnat_translation_t *ct = va_arg (*args, cnat_translation_t *);
+ cnat_main_t *cm = &cnat_main;
cnat_ep_trk_t *ck;
s = format (s, "[%d] ", ct->index);
- s = format (s, "%U %U", format_cnat_endpoint, &ct->ct_vip,
+ s = format (s, "%U %U ", format_cnat_endpoint, &ct->ct_vip,
format_ip_protocol, ct->ct_proto);
+ s = format (s, "lb:%U ", format_cnat_lb_type, ct->lb_type);
vec_foreach (ck, ct->ct_paths)
s = format (s, "\n%U", format_cnat_ep_trk, ck, 2);
format_white_space, 2, format_dpo_id, &ct->ct_lb, 2);
}
+ u32 bid = 0;
+ if (CNAT_LB_MAGLEV == ct->lb_type)
+ {
+ s = format (s, "\nmaglev backends map");
+ uword *bitmap = NULL;
+ clib_bitmap_alloc (bitmap, cm->maglev_len);
+ vec_foreach (ck, ct->ct_paths)
+ {
+ clib_bitmap_zero (bitmap);
+ for (u32 i = 0; i < vec_len (ct->lb_maglev); i++)
+ if (ct->lb_maglev[i] == bid)
+ clib_bitmap_set (bitmap, i, 1);
+ s = format (s, "\n backend#%d: %U", bid, format_bitmap_hex, bitmap);
+
+ bid++;
+ }
+ clib_bitmap_free (bitmap);
+ }
+
return (s);
}
if (INDEX_INVALID == cti)
{
- /* *INDENT-OFF* */
- pool_foreach_index(cti, cnat_translation_pool,
- ({
+ pool_foreach_index (cti, cnat_translation_pool)
+ {
ct = pool_elt_at_index (cnat_translation_pool, cti);
vlib_cli_output(vm, "%U", format_cnat_translation, ct);
- }));
- /* *INDENT-ON* */
+ }
}
else
{
/* purge all the translations */
index_t tri, *trp, *trs = NULL;
- /* *INDENT-OFF* */
- pool_foreach_index(tri, cnat_translation_pool,
- ({
+ pool_foreach_index (tri, cnat_translation_pool)
+ {
vec_add1(trs, tri);
- }));
- /* *INDENT-ON* */
+ }
vec_foreach (trp, trs) cnat_translation_delete (*trp);
return (0);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cnat_translation_show_cmd_node, static) = {
.path = "show cnat translation",
.function = cnat_translation_show,
.short_help = "show cnat translation <VIP>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static fib_node_t *
cnat_translation_get_node (fib_node_index_t index)
cnat_endpoint_tuple_t tmp, *paths = NULL, *path;
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *e = 0;
+ cnat_lb_type_t lb_type;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
vec_add2 (paths, path, 1);
clib_memcpy (path, &tmp, sizeof (cnat_endpoint_tuple_t));
}
+ else if (unformat (line_input, "%U", unformat_cnat_lb_type, &lb_type))
+ ;
else
{
e = clib_error_return (0, "unknown input '%U'",
}
if (INDEX_INVALID == del_index)
- cnat_translation_update (&vip, proto, paths, flags);
+ cnat_translation_update (&vip, proto, paths, flags, lb_type);
else
cnat_translation_delete (del_index);
return (e);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cnat_translation_cli_add_del_command, static) =
{
.path = "cnat translation",
.short_help = "cnat translation [add|del] proto [TCP|UDP] [vip|real] [ip|sw_if_index [v6]] [port] [to [ip|sw_if_index [v6]] [port]->[ip|sw_if_index [v6]] [port]]",
.function = cnat_translation_cli_add_del,
};
-/* *INDENT-ON* */
static void
cnat_if_addr_add_del_translation_cb (addr_resolution_t * ar,
ct->flags |= CNAT_TRANSLATION_STACKED;
}
-static void
-cnat_if_addr_add_del_snat_cb (addr_resolution_t * ar, ip_address_t * address,
- u8 is_del)
-{
- cnat_endpoint_t *ep;
- ep = AF_IP4 == ar->af ? &cnat_main.snat_ip4 : &cnat_main.snat_ip6;
-
- if (!is_del && ep->ce_flags & CNAT_EP_FLAG_RESOLVED)
- return;
-
- if (is_del)
- {
- ep->ce_flags &= ~CNAT_EP_FLAG_RESOLVED;
- /* Are there remaining addresses ? */
- if (0 == cnat_resolve_addr (ar->sw_if_index, ar->af, address))
- is_del = 0;
- }
-
- if (!is_del)
- {
- ip_address_copy (&ep->ce_ip, address);
- ep->ce_flags |= CNAT_EP_FLAG_RESOLVED;
- }
-
-}
-
static void
cnat_if_addr_add_del_callback (u32 sw_if_index, ip_address_t * address,
u8 is_del)
{
addr_resolution_t *ar;
- /* *INDENT-OFF* */
- pool_foreach (ar, tr_resolutions, ({
- if (ar->sw_if_index != sw_if_index)
- continue;
- if (ar->af != ip_addr_version (address))
- continue;
- cnat_if_addr_add_cbs[ar->type] (ar, address, is_del);
- }));
- /* *INDENT-ON* */
+ pool_foreach (ar, tr_resolutions)
+ {
+ if (ar->sw_if_index != sw_if_index)
+ continue;
+ if (ar->af != ip_addr_version (address))
+ continue;
+ cnat_if_addr_add_cbs[ar->type](ar, address, is_del);
+ }
}
static void
cnat_if_addr_add_del_callback (sw_if_index, &addr, is_del);
}
+void
+cnat_translation_register_addr_add_cb (cnat_addr_resol_type_t typ,
+ cnat_if_addr_add_cb_t fn)
+{
+ vec_validate (cnat_if_addr_add_cbs, CNAT_ADDR_N_RESOLUTIONS);
+ cnat_if_addr_add_cbs[typ] = fn;
+}
+
static clib_error_t *
cnat_translation_init (vlib_main_t * vm)
{
cb6.function = cnat_ip6_if_addr_add_del_callback;
vec_add1 (i6m->add_del_interface_address_callbacks, cb6);
- vec_validate (cnat_if_addr_add_cbs, CNAT_ADDR_N_RESOLUTIONS);
- cnat_if_addr_add_cbs[CNAT_RESOLV_ADDR_BACKEND] =
- cnat_if_addr_add_del_backend_cb;
- cnat_if_addr_add_cbs[CNAT_RESOLV_ADDR_SNAT] = cnat_if_addr_add_del_snat_cb;
- cnat_if_addr_add_cbs[CNAT_RESOLV_ADDR_TRANSLATION] =
- cnat_if_addr_add_del_translation_cb;
+ cnat_translation_register_addr_add_cb (CNAT_RESOLV_ADDR_BACKEND,
+ cnat_if_addr_add_del_backend_cb);
+ cnat_translation_register_addr_add_cb (CNAT_RESOLV_ADDR_TRANSLATION,
+ cnat_if_addr_add_del_translation_cb);
+
return (NULL);
}