#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
#include <vnet/api_errno.h>
-#include <vnet/udp/udp.h>
+#include <vnet/udp/udp_local.h>
+#include <vppinfra/lock.h>
//GC runs at most once every so many seconds
#define LB_GARBAGE_RUN 60
//After so many seconds. It is assumed that inter-core race condition will not occur.
#define LB_CONCURRENCY_TIMEOUT 10
+// FIB source for adding routes
+static fib_source_t lb_fib_src;
+
lb_main_t lb_main;
-#define lb_get_writer_lock() do {} while(__sync_lock_test_and_set (lb_main.writer_lock, 1))
-#define lb_put_writer_lock() lb_main.writer_lock[0] = 0
+#define lb_get_writer_lock() clib_spinlock_lock (&lb_main.writer_lock)
+#define lb_put_writer_lock() clib_spinlock_unlock (&lb_main.writer_lock)
static void lb_as_stack (lb_as_t *as);
[DPO_PROTO_IP6] = lb_dpo_nat6_ip6_port,
};
+const static char *const lb_dpo_gre4_ip4_sticky[] = { "lb4-gre4-sticky",
+ NULL };
+const static char *const lb_dpo_gre4_ip6_sticky[] = { "lb6-gre4-sticky",
+ NULL };
+const static char *const *const lb_dpo_gre4_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_gre4_ip4_sticky,
+ [DPO_PROTO_IP6] = lb_dpo_gre4_ip6_sticky,
+};
+
+const static char *const lb_dpo_gre6_ip4_sticky[] = { "lb4-gre6-sticky",
+ NULL };
+const static char *const lb_dpo_gre6_ip6_sticky[] = { "lb6-gre6-sticky",
+ NULL };
+const static char *const *const lb_dpo_gre6_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_gre6_ip4_sticky,
+ [DPO_PROTO_IP6] = lb_dpo_gre6_ip6_sticky,
+};
+
+const static char *const lb_dpo_gre4_ip4_port_sticky[] = {
+ "lb4-gre4-port-sticky", NULL
+};
+const static char *const lb_dpo_gre4_ip6_port_sticky[] = {
+ "lb6-gre4-port-sticky", NULL
+};
+const static char *const
+ *const lb_dpo_gre4_port_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_gre4_ip4_port_sticky,
+ [DPO_PROTO_IP6] = lb_dpo_gre4_ip6_port_sticky,
+ };
+
+const static char *const lb_dpo_gre6_ip4_port_sticky[] = {
+ "lb4-gre6-port-sticky", NULL
+};
+const static char *const lb_dpo_gre6_ip6_port_sticky[] = {
+ "lb6-gre6-port-sticky", NULL
+};
+const static char *const
+ *const lb_dpo_gre6_port_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_gre6_ip4_port_sticky,
+ [DPO_PROTO_IP6] = lb_dpo_gre6_ip6_port_sticky,
+ };
+
+const static char *const lb_dpo_l3dsr_ip4_sticky[] = { "lb4-l3dsr-sticky",
+ NULL };
+const static char *const *const lb_dpo_l3dsr_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_sticky,
+};
+
+const static char *const lb_dpo_l3dsr_ip4_port_sticky[] = {
+ "lb4-l3dsr-port-sticky", NULL
+};
+const static char *const
+ *const lb_dpo_l3dsr_port_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_port_sticky,
+ };
+
+const static char *const lb_dpo_nat4_ip4_port_sticky[] = {
+ "lb4-nat4-port-sticky", NULL
+};
+const static char *const
+ *const lb_dpo_nat4_port_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_nat4_ip4_port_sticky,
+ };
+
+const static char *const lb_dpo_nat6_ip6_port_sticky[] = {
+ "lb6-nat6-port-sticky", NULL
+};
+const static char *const
+ *const lb_dpo_nat6_port_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = lb_dpo_nat6_ip6_port_sticky,
+ };
+
u32 lb_hash_time_now(vlib_main_t * vm)
{
return (u32) (vlib_time_now(vm) + 10000);
lb_vip_t *vip = va_arg (*args, lb_vip_t *);
u32 indent = format_get_indent (s);
- s = format(s, "%U %U [%lu] %U%s\n"
+ /* clang-format off */
+ s = format(s, "%U %U [%lu] %U%s%s\n"
"%U new_size:%u\n",
format_white_space, indent,
format_lb_vip_type, vip->type,
vip - lbm->vips,
format_ip46_prefix, &vip->prefix, (u32) vip->plen, IP46_TYPE_ANY,
+ lb_vip_is_src_ip_sticky (vip) ? " src_ip_sticky" : "",
(vip->flags & LB_VIP_FLAGS_USED)?"":" removed",
format_white_space, indent,
vip->new_flow_table_mask + 1);
+ /* clang-format on */
if (vip->port != 0)
{
lb_as_t *as;
u32 *as_index;
- pool_foreach(as_index, vip->as_indexes, {
+ pool_foreach (as_index, vip->as_indexes) {
as = &lbm->ass[*as_index];
s = format(s, "%U %U %u buckets %Lu flows dpo:%u %s\n",
format_white_space, indent,
vlib_refcount_get(&lbm->as_refcount, as - lbm->ass),
as->dpo.dpoi_index,
(as->flags & LB_AS_FLAGS_USED)?"used":" removed");
- });
+ }
vec_free(count);
return s;
lb_snat6_key_t m_key6;
clib_bihash_kv_24_8_t kv6, value6;
lb_snat_mapping_t *m = 0;
- ASSERT (lbm->writer_lock[0]);
+ CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
u32 now = (u32) vlib_time_now(vlib_get_main());
if (!clib_u32_loop_gt(now, vip->last_garbage_collection + LB_GARBAGE_RUN))
vip->last_garbage_collection = now;
lb_as_t *as;
u32 *as_index;
- pool_foreach(as_index, vip->as_indexes, {
+ pool_foreach (as_index, vip->as_indexes) {
as = &lbm->ass[*as_index];
if (!(as->flags & LB_AS_FLAGS_USED) && //Not used
clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) &&
pool_put(vip->as_indexes, as_index);
pool_put(lbm->ass, as);
}
- });
+ }
}
void lb_garbage_collection()
lb_get_writer_lock();
lb_vip_t *vip;
u32 *to_be_removed_vips = 0, *i;
- pool_foreach(vip, lbm->vips, {
+ pool_foreach (vip, lbm->vips) {
lb_vip_garbage_collection(vip);
if (!(vip->flags & LB_VIP_FLAGS_USED) &&
(pool_elts(vip->as_indexes) == 0)) {
vec_add1(to_be_removed_vips, vip - lbm->vips);
}
- });
+ }
vec_foreach(i, to_be_removed_vips) {
vip = &lbm->vips[*i];
lb_as_t *as;
lb_pseudorand_t *pr, *sort_arr = 0;
- ASSERT (lbm->writer_lock[0]); //We must have the lock
+ CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock); // We must have the lock
//Check if some AS is configured or not
i = 0;
- pool_foreach(as_index, vip->as_indexes, {
+ pool_foreach (as_index, vip->as_indexes) {
as = &lbm->ass[*as_index];
if (as->flags & LB_AS_FLAGS_USED) { //Not used anymore
i = 1;
goto out; //Not sure 'break' works in this macro-loop
}
- });
+ }
out:
if (i == 0) {
vec_alloc(sort_arr, pool_elts(vip->as_indexes));
i = 0;
- pool_foreach(as_index, vip->as_indexes, {
+ pool_foreach (as_index, vip->as_indexes) {
as = &lbm->ass[*as_index];
if (!(as->flags & LB_AS_FLAGS_USED)) //Not used anymore
continue;
sort_arr[i].as_index = as - lbm->ass;
i++;
- });
- _vec_len(sort_arr) = i;
+ }
+ vec_set_len (sort_arr, i);
vec_sort_with_function(sort_arr, lb_pseudorand_compare);
//Let's create a new flow table
vec_validate(new_flow_table, vip->new_flow_table_mask);
for (i=0; i<vec_len(new_flow_table); i++)
- new_flow_table[i].as_index = ~0;
+ new_flow_table[i].as_index = 0;
u32 done = 0;
while (1) {
while (1) {
u32 last = pr->last;
pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask;
- if (new_flow_table[last].as_index == ~0) {
+ if (new_flow_table[last].as_index == 0) {
new_flow_table[last].as_index = pr->as_index;
break;
}
{
lb_main_t *lbm = &lb_main;
lb_vip_t *vip;
- ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned
+ /* This must be called with the lock owned */
+ CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
ip46_prefix_normalize(prefix, plen);
- pool_foreach(vip, lbm->vips, {
+ pool_foreach (vip, lbm->vips) {
if ((vip->flags & LB_AS_FLAGS_USED) &&
vip->plen == plen &&
vip->prefix.as_u64[0] == prefix->as_u64[0] &&
return 0;
}
}
- });
+ }
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index)
{
lb_main_t *lbm = &lb_main;
- ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned
+ /* This must be called with the lock owned */
+ CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
lb_as_t *as;
u32 *asi;
- pool_foreach(asi, vip->as_indexes, {
+ pool_foreach (asi, vip->as_indexes) {
as = &lbm->ass[*asi];
if (as->vip_index == (vip - lbm->vips) &&
as->address.as_u64[0] == address->as_u64[0] &&
*as_index = as - lbm->ass;
return 0;
}
- });
+ }
return -1;
}
{
/* Add SNAT static mapping */
pool_get (lbm->snat_mappings, m);
- memset (m, 0, sizeof (*m));
+ clib_memset (m, 0, sizeof (*m));
if (lb_vip_is_nat4_port(vip)) {
lb_snat4_key_t m_key4;
clib_bihash_kv_8_8_t kv4;
lb_hash_bucket_t *b;
lb_hash_foreach_entry(h, b, i) {
- if ((b->vip[i] == vip_index)
- || (b->value[i] == as_index))
+ if ((vip_index == ~0)
+ || ((b->vip[i] == vip_index) && (as_index == ~0))
+ || ((b->vip[i] == vip_index) && (b->value[i] == as_index)))
{
vlib_refcount_add(&lbm->as_refcount, thread_index, b->value[i], -1);
vlib_refcount_add(&lbm->as_refcount, thread_index, 0, 1);
b->vip[i] = ~0;
- b->value[i] = ~0;
+ b->value[i] = 0;
}
}
+ if (vip_index == ~0)
+ {
+ lb_hash_free(h);
+ lbm->per_cpu[thread_index].sticky_ht = 0;
+ }
+ }
}
- }
return 0;
}
int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n,
- u32 *as_index)
+ u8 flush)
{
lb_main_t *lbm = &lb_main;
u32 now = (u32) vlib_time_now(vlib_get_main());
u32 *ip = 0;
+ u32 as_index = 0;
lb_vip_t *vip;
if (!(vip = lb_vip_get_by_index(vip_index))) {
u32 *indexes = NULL;
while (n--) {
- if (lb_as_find_index_vip(vip, &addresses[n], as_index)) {
+ if (lb_as_find_index_vip(vip, &addresses[n], &as_index)) {
vec_free(indexes);
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
}
}
- vec_add1(indexes, *as_index);
+ vec_add1(indexes, as_index);
next:
continue;
}
vec_foreach(ip, indexes) {
lbm->ass[*ip].flags &= ~LB_AS_FLAGS_USED;
lbm->ass[*ip].last_used = now;
+
+ if(flush)
+ {
+ /* flush flow table for deleted ASs*/
+ lb_flush_vip_as(vip_index, *ip);
+ }
}
//Recompute flows
int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n, u8 flush)
{
- u32 as_index = 0;
lb_get_writer_lock();
- int ret = lb_vip_del_ass_withlock(vip_index, addresses, n, &as_index);
+ int ret = lb_vip_del_ass_withlock(vip_index, addresses, n, flush);
lb_put_writer_lock();
- if(flush)
- {
- /* flush flow table per as of per-port-vip */
- ret = lb_flush_vip_as(vip_index, as_index);
- }
-
return ret;
}
lb_vip_prefix_index_alloc (lb_main_t *lbm)
{
/*
- * Check for dynamically allocaetd instance number.
+ * Check for dynamically allocated instance number.
*/
u32 bit;
if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen,
vip->protocol, vip->port, &vip_idx))
{
+ lb_vip_t *exists_vip = lb_vip_get_by_index(vip_idx);
+ *vip_prefix_index = exists_vip ? exists_vip->vip_prefix_index : ~0;
return;
}
dpo_type = lbm->dpo_nat4_port_type;
else if (lb_vip_is_nat6_port(vip))
dpo_type = lbm->dpo_nat6_port_type;
+ else if (lb_vip_is_gre4_sticky (vip))
+ dpo_type = lbm->dpo_gre4_sticky_type;
+ else if (lb_vip_is_gre6_sticky (vip))
+ dpo_type = lbm->dpo_gre6_sticky_type;
+ else if (lb_vip_is_gre4_port_sticky (vip))
+ dpo_type = lbm->dpo_gre4_port_sticky_type;
+ else if (lb_vip_is_gre6_port_sticky (vip))
+ dpo_type = lbm->dpo_gre6_port_sticky_type;
+ else if (lb_vip_is_l3dsr_sticky (vip))
+ dpo_type = lbm->dpo_l3dsr_sticky_type;
+ else if (lb_vip_is_l3dsr_port_sticky (vip))
+ dpo_type = lbm->dpo_l3dsr_port_sticky_type;
+ else if (lb_vip_is_nat4_port_sticky (vip))
+ dpo_type = lbm->dpo_nat4_port_sticky_type;
+ else if (lb_vip_is_nat6_port_sticky (vip))
+ dpo_type = lbm->dpo_nat6_port_sticky_type;
dpo_set(&dpo, dpo_type, proto, *vip_prefix_index);
fib_table_entry_special_dpo_add(0,
&pfx,
- FIB_SOURCE_PLUGIN_HI,
+ lb_fib_src,
FIB_ENTRY_FLAG_EXCLUSIVE,
&dpo);
dpo_reset(&dpo);
key.vip_prefix_index = vip->vip_prefix_index;
key.protocol = vip->protocol;
key.port = clib_host_to_net_u16(vip->port);
+ key.rsv = 0;
kv.key = key.as_u64;
- if(clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) == 0)
- m = pool_elt_at_index (lbm->vips, value.value);
+ if(clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) != 0)
+ {
+ clib_warning("looking up vip_index_per_port failed.");
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+ m = pool_elt_at_index (lbm->vips, value.value);
ASSERT (m);
kv.value = m - lbm->vips;
pfx.fp_len = vip->plen;
pfx.fp_proto = FIB_PROTOCOL_IP6;
}
- fib_table_entry_special_remove(0, &pfx, FIB_SOURCE_PLUGIN_HI);
+ fib_table_entry_special_remove(0, &pfx, lb_fib_src);
}
int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
}
vip->flags = LB_VIP_FLAGS_USED;
+ if (args.src_ip_sticky)
+ {
+ vip->flags |= LB_VIP_FLAGS_SRC_IP_STICKY;
+ }
vip->as_indexes = 0;
//Validate counters
{
lb_main_t *lbm = &lb_main;
lb_vip_t *vip;
+ int rv = 0;
/* Does not remove default vip, i.e. vip_index = 0 */
if (vip_index == 0)
- return 0;
+ return VNET_API_ERROR_INVALID_VALUE;
lb_get_writer_lock();
if (!(vip = lb_vip_get_by_index(vip_index))) {
ip46_address_t *ass = 0;
lb_as_t *as;
u32 *as_index;
- pool_foreach(as_index, vip->as_indexes, {
+
+ pool_foreach (as_index, vip->as_indexes) {
as = &lbm->ass[*as_index];
vec_add1(ass, as->address);
- });
+ }
if (vec_len(ass))
- lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass), as_index);
+ lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass), 0);
vec_free(ass);
}
//Delete per-port vip filtering entry
if (vip->port != 0)
{
- lb_vip_del_port_filter(lbm, vip);
+ rv = lb_vip_del_port_filter(lbm, vip);
}
//Set the VIP as unused
vip->flags &= ~LB_VIP_FLAGS_USED;
lb_put_writer_lock();
- return 0;
+ return rv;
}
/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
- .description = "Load Balancer",
+ .description = "Load Balancer (LB)",
};
/* *INDENT-ON* */
dpo_type = lbm->dpo_nat4_port_type;
else if (lb_vip_is_nat6_port(vip))
dpo_type = lbm->dpo_nat6_port_type;
+ else if (lb_vip_is_gre4_sticky (vip))
+ dpo_type = lbm->dpo_gre4_sticky_type;
+ else if (lb_vip_is_gre6_sticky (vip))
+ dpo_type = lbm->dpo_gre6_sticky_type;
+ else if (lb_vip_is_gre4_port_sticky (vip))
+ dpo_type = lbm->dpo_gre4_port_sticky_type;
+ else if (lb_vip_is_gre6_port_sticky (vip))
+ dpo_type = lbm->dpo_gre6_port_sticky_type;
+ else if (lb_vip_is_l3dsr_sticky (vip))
+ dpo_type = lbm->dpo_l3dsr_sticky_type;
+ else if (lb_vip_is_l3dsr_port_sticky (vip))
+ dpo_type = lbm->dpo_l3dsr_port_sticky_type;
+ else if (lb_vip_is_nat4_port_sticky (vip))
+ dpo_type = lbm->dpo_nat4_port_sticky_type;
+ else if (lb_vip_is_nat6_port_sticky (vip))
+ dpo_type = lbm->dpo_nat6_port_sticky_type;
dpo_stack(dpo_type,
lb_vip_is_ip4(vip->type)?DPO_PROTO_IP4:DPO_PROTO_IP6,
//Allocate and init default VIP.
lbm->vips = 0;
pool_get(lbm->vips, default_vip);
+ default_vip->new_flow_table_mask = 0;
default_vip->prefix.ip6.as_u64[0] = 0xffffffffffffffffL;
default_vip->prefix.ip6.as_u64[1] = 0xffffffffffffffffL;
default_vip->protocol = ~0;
lbm->per_cpu = 0;
vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1);
- lbm->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
- lbm->writer_lock[0] = 0;
+ clib_spinlock_init (&lbm->writer_lock);
lbm->per_cpu_sticky_buckets = LB_DEFAULT_PER_CPU_STICKY_BUCKETS;
lbm->flow_timeout = LB_DEFAULT_FLOW_TIMEOUT;
lbm->ip4_src_address.as_u32 = 0xffffffff;
lb_dpo_nat4_port_nodes);
lbm->dpo_nat6_port_type = dpo_register_new_type(&lb_vft,
lb_dpo_nat6_port_nodes);
- lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft);
+ lbm->dpo_gre4_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_gre4_sticky_nodes);
+ lbm->dpo_gre6_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_gre6_sticky_nodes);
+ lbm->dpo_gre4_port_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_gre4_port_sticky_nodes);
+ lbm->dpo_gre6_port_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_gre6_port_sticky_nodes);
+ lbm->dpo_l3dsr_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_l3dsr_sticky_nodes);
+ lbm->dpo_l3dsr_port_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_l3dsr_port_sticky_nodes);
+ lbm->dpo_nat4_port_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_nat4_port_sticky_nodes);
+ lbm->dpo_nat6_port_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_nat6_port_sticky_nodes);
+ lbm->fib_node_type = fib_node_register_new_type ("lb", &lb_fib_node_vft);
//Init AS reference counters
vlib_refcount_init(&lbm->as_refcount);
default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL;
default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL;
+ /* Generate a valid flow table for default VIP */
+ default_vip->as_indexes = NULL;
+ lb_get_writer_lock();
+ lb_vip_update_new_flow_table(default_vip);
+ lb_put_writer_lock();
+
lbm->vip_index_by_nodeport
= hash_create_mem (0, sizeof(u16), sizeof (uword));
#define _(a,b,c) lbm->vip_counters[c].name = b;
lb_foreach_vip_counter
#undef _
+
+ lb_fib_src = fib_source_allocate("lb",
+ FIB_SOURCE_PRIORITY_HI,
+ FIB_SOURCE_BH_SIMPLE);
+
return NULL;
}