2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 #include <vnet/plugin/plugin.h>
18 #include <vpp/app/version.h>
19 #include <vnet/api_errno.h>
20 #include <vnet/udp/udp_local.h>
21 #include <vppinfra/lock.h>
23 //GC runs at most once every so many seconds
24 #define LB_GARBAGE_RUN 60
26 //After so many seconds. It is assumed that inter-core race condition will not occur.
27 #define LB_CONCURRENCY_TIMEOUT 10
29 // FIB source for adding routes
30 static fib_source_t lb_fib_src;
34 #define lb_get_writer_lock() clib_spinlock_lock (&lb_main.writer_lock)
35 #define lb_put_writer_lock() clib_spinlock_unlock (&lb_main.writer_lock)
37 static void lb_as_stack (lb_as_t *as);
40 const static char * const lb_dpo_gre4_ip4[] = { "lb4-gre4" , NULL };
41 const static char * const lb_dpo_gre4_ip6[] = { "lb6-gre4" , NULL };
42 const static char* const * const lb_dpo_gre4_nodes[DPO_PROTO_NUM] =
44 [DPO_PROTO_IP4] = lb_dpo_gre4_ip4,
45 [DPO_PROTO_IP6] = lb_dpo_gre4_ip6,
48 const static char * const lb_dpo_gre6_ip4[] = { "lb4-gre6" , NULL };
49 const static char * const lb_dpo_gre6_ip6[] = { "lb6-gre6" , NULL };
50 const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] =
52 [DPO_PROTO_IP4] = lb_dpo_gre6_ip4,
53 [DPO_PROTO_IP6] = lb_dpo_gre6_ip6,
56 const static char * const lb_dpo_gre4_ip4_port[] = { "lb4-gre4-port" , NULL };
57 const static char * const lb_dpo_gre4_ip6_port[] = { "lb6-gre4-port" , NULL };
58 const static char* const * const lb_dpo_gre4_port_nodes[DPO_PROTO_NUM] =
60 [DPO_PROTO_IP4] = lb_dpo_gre4_ip4_port,
61 [DPO_PROTO_IP6] = lb_dpo_gre4_ip6_port,
64 const static char * const lb_dpo_gre6_ip4_port[] = { "lb4-gre6-port" , NULL };
65 const static char * const lb_dpo_gre6_ip6_port[] = { "lb6-gre6-port" , NULL };
66 const static char* const * const lb_dpo_gre6_port_nodes[DPO_PROTO_NUM] =
68 [DPO_PROTO_IP4] = lb_dpo_gre6_ip4_port,
69 [DPO_PROTO_IP6] = lb_dpo_gre6_ip6_port,
72 const static char * const lb_dpo_l3dsr_ip4[] = {"lb4-l3dsr" , NULL};
73 const static char* const * const lb_dpo_l3dsr_nodes[DPO_PROTO_NUM] =
75 [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4,
78 const static char * const lb_dpo_l3dsr_ip4_port[] = {"lb4-l3dsr-port" , NULL};
79 const static char* const * const lb_dpo_l3dsr_port_nodes[DPO_PROTO_NUM] =
81 [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_port,
84 const static char * const lb_dpo_nat4_ip4_port[] = { "lb4-nat4-port" , NULL };
85 const static char* const * const lb_dpo_nat4_port_nodes[DPO_PROTO_NUM] =
87 [DPO_PROTO_IP4] = lb_dpo_nat4_ip4_port,
90 const static char * const lb_dpo_nat6_ip6_port[] = { "lb6-nat6-port" , NULL };
91 const static char* const * const lb_dpo_nat6_port_nodes[DPO_PROTO_NUM] =
93 [DPO_PROTO_IP6] = lb_dpo_nat6_ip6_port,
96 const static char *const lb_dpo_gre4_ip4_sticky[] = { "lb4-gre4-sticky",
98 const static char *const lb_dpo_gre4_ip6_sticky[] = { "lb6-gre4-sticky",
100 const static char *const *const lb_dpo_gre4_sticky_nodes[DPO_PROTO_NUM] = {
101 [DPO_PROTO_IP4] = lb_dpo_gre4_ip4_sticky,
102 [DPO_PROTO_IP6] = lb_dpo_gre4_ip6_sticky,
105 const static char *const lb_dpo_gre6_ip4_sticky[] = { "lb4-gre6-sticky",
107 const static char *const lb_dpo_gre6_ip6_sticky[] = { "lb6-gre6-sticky",
109 const static char *const *const lb_dpo_gre6_sticky_nodes[DPO_PROTO_NUM] = {
110 [DPO_PROTO_IP4] = lb_dpo_gre6_ip4_sticky,
111 [DPO_PROTO_IP6] = lb_dpo_gre6_ip6_sticky,
114 const static char *const lb_dpo_gre4_ip4_port_sticky[] = {
115 "lb4-gre4-port-sticky", NULL
117 const static char *const lb_dpo_gre4_ip6_port_sticky[] = {
118 "lb6-gre4-port-sticky", NULL
120 const static char *const
121 *const lb_dpo_gre4_port_sticky_nodes[DPO_PROTO_NUM] = {
122 [DPO_PROTO_IP4] = lb_dpo_gre4_ip4_port_sticky,
123 [DPO_PROTO_IP6] = lb_dpo_gre4_ip6_port_sticky,
126 const static char *const lb_dpo_gre6_ip4_port_sticky[] = {
127 "lb4-gre6-port-sticky", NULL
129 const static char *const lb_dpo_gre6_ip6_port_sticky[] = {
130 "lb6-gre6-port-sticky", NULL
132 const static char *const
133 *const lb_dpo_gre6_port_sticky_nodes[DPO_PROTO_NUM] = {
134 [DPO_PROTO_IP4] = lb_dpo_gre6_ip4_port_sticky,
135 [DPO_PROTO_IP6] = lb_dpo_gre6_ip6_port_sticky,
138 const static char *const lb_dpo_l3dsr_ip4_sticky[] = { "lb4-l3dsr-sticky",
140 const static char *const *const lb_dpo_l3dsr_sticky_nodes[DPO_PROTO_NUM] = {
141 [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_sticky,
144 const static char *const lb_dpo_l3dsr_ip4_port_sticky[] = {
145 "lb4-l3dsr-port-sticky", NULL
147 const static char *const
148 *const lb_dpo_l3dsr_port_sticky_nodes[DPO_PROTO_NUM] = {
149 [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_port_sticky,
152 const static char *const lb_dpo_nat4_ip4_port_sticky[] = {
153 "lb4-nat4-port-sticky", NULL
155 const static char *const
156 *const lb_dpo_nat4_port_sticky_nodes[DPO_PROTO_NUM] = {
157 [DPO_PROTO_IP4] = lb_dpo_nat4_ip4_port_sticky,
160 const static char *const lb_dpo_nat6_ip6_port_sticky[] = {
161 "lb6-nat6-port-sticky", NULL
163 const static char *const
164 *const lb_dpo_nat6_port_sticky_nodes[DPO_PROTO_NUM] = {
165 [DPO_PROTO_IP6] = lb_dpo_nat6_ip6_port_sticky,
168 u32 lb_hash_time_now(vlib_main_t * vm)
170 return (u32) (vlib_time_now(vm) + 10000);
173 u8 *format_lb_main (u8 * s, va_list * args)
175 vlib_thread_main_t *tm = vlib_get_thread_main();
176 lb_main_t *lbm = &lb_main;
177 s = format(s, "lb_main");
178 s = format(s, " ip4-src-address: %U \n", format_ip4_address, &lbm->ip4_src_address);
179 s = format(s, " ip6-src-address: %U \n", format_ip6_address, &lbm->ip6_src_address);
180 s = format(s, " #vips: %u\n", pool_elts(lbm->vips));
181 s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1);
184 for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
185 lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
187 s = format(s, "core %d\n", thread_index);
188 s = format(s, " timeout: %ds\n", h->timeout);
189 s = format(s, " usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())), lb_hash_size(h));
196 static char *lb_vip_type_strings[] = {
197 [LB_VIP_TYPE_IP6_GRE6] = "ip6-gre6",
198 [LB_VIP_TYPE_IP6_GRE4] = "ip6-gre4",
199 [LB_VIP_TYPE_IP4_GRE6] = "ip4-gre6",
200 [LB_VIP_TYPE_IP4_GRE4] = "ip4-gre4",
201 [LB_VIP_TYPE_IP4_L3DSR] = "ip4-l3dsr",
202 [LB_VIP_TYPE_IP4_NAT4] = "ip4-nat4",
203 [LB_VIP_TYPE_IP6_NAT6] = "ip6-nat6",
206 u8 *format_lb_vip_type (u8 * s, va_list * args)
208 lb_vip_type_t vipt = va_arg (*args, lb_vip_type_t);
210 for (i=0; i<LB_VIP_N_TYPES; i++)
212 return format(s, lb_vip_type_strings[i]);
213 return format(s, "_WRONG_TYPE_");
216 uword unformat_lb_vip_type (unformat_input_t * input, va_list * args)
218 lb_vip_type_t *vipt = va_arg (*args, lb_vip_type_t *);
220 for (i=0; i<LB_VIP_N_TYPES; i++)
221 if (unformat(input, lb_vip_type_strings[i])) {
228 u8 *format_lb_vip (u8 * s, va_list * args)
230 lb_vip_t *vip = va_arg (*args, lb_vip_t *);
231 s = format(s, "%U %U new_size:%u #as:%u%s",
232 format_lb_vip_type, vip->type,
233 format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY,
234 vip->new_flow_table_mask + 1,
235 pool_elts(vip->as_indexes),
236 (vip->flags & LB_VIP_FLAGS_USED)?"":" removed");
240 s = format(s, " protocol:%u port:%u ", vip->protocol, vip->port);
243 if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
245 s = format(s, " dscp:%u", vip->encap_args.dscp);
247 else if ((vip->type == LB_VIP_TYPE_IP4_NAT4)
248 || (vip->type == LB_VIP_TYPE_IP6_NAT6))
250 s = format (s, " type:%s port:%u target_port:%u",
251 (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)?"clusterip":
253 ntohs(vip->port), ntohs(vip->encap_args.target_port));
259 u8 *format_lb_as (u8 * s, va_list * args)
261 lb_as_t *as = va_arg (*args, lb_as_t *);
262 return format(s, "%U %s", format_ip46_address,
263 &as->address, IP46_TYPE_ANY,
264 (as->flags & LB_AS_FLAGS_USED)?"used":"removed");
267 u8 *format_lb_vip_detailed (u8 * s, va_list * args)
269 lb_main_t *lbm = &lb_main;
270 lb_vip_t *vip = va_arg (*args, lb_vip_t *);
271 u32 indent = format_get_indent (s);
273 /* clang-format off */
274 s = format(s, "%U %U [%lu] %U%s%s\n"
276 format_white_space, indent,
277 format_lb_vip_type, vip->type,
279 format_ip46_prefix, &vip->prefix, (u32) vip->plen, IP46_TYPE_ANY,
280 lb_vip_is_src_ip_sticky (vip) ? " src_ip_sticky" : "",
281 (vip->flags & LB_VIP_FLAGS_USED)?"":" removed",
282 format_white_space, indent,
283 vip->new_flow_table_mask + 1);
284 /* clang-format on */
288 s = format(s, "%U protocol:%u port:%u\n",
289 format_white_space, indent,
290 vip->protocol, vip->port);
293 if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
295 s = format(s, "%U dscp:%u\n",
296 format_white_space, indent,
297 vip->encap_args.dscp);
299 else if ((vip->type == LB_VIP_TYPE_IP4_NAT4)
300 || (vip->type == LB_VIP_TYPE_IP6_NAT6))
302 s = format (s, "%U type:%s port:%u target_port:%u",
303 format_white_space, indent,
304 (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)?"clusterip":
306 ntohs(vip->port), ntohs(vip->encap_args.target_port));
310 s = format(s, "%U counters:\n",
311 format_white_space, indent);
313 for (i=0; i<LB_N_VIP_COUNTERS; i++)
314 s = format(s, "%U %s: %Lu\n",
315 format_white_space, indent,
316 lbm->vip_counters[i].name,
317 vlib_get_simple_counter(&lbm->vip_counters[i], vip - lbm->vips));
320 s = format(s, "%U #as:%u\n",
321 format_white_space, indent,
322 pool_elts(vip->as_indexes));
324 //Let's count the buckets for each AS
326 vec_validate(count, pool_len(lbm->ass)); //Possibly big alloc for not much...
327 lb_new_flow_entry_t *nfe;
328 vec_foreach(nfe, vip->new_flow_table)
329 count[nfe->as_index]++;
333 pool_foreach (as_index, vip->as_indexes) {
334 as = &lbm->ass[*as_index];
335 s = format(s, "%U %U %u buckets %Lu flows dpo:%u %s\n",
336 format_white_space, indent,
337 format_ip46_address, &as->address, IP46_TYPE_ANY,
338 count[as - lbm->ass],
339 vlib_refcount_get(&lbm->as_refcount, as - lbm->ass),
341 (as->flags & LB_AS_FLAGS_USED)?"used":" removed");
354 static int lb_pseudorand_compare(void *a, void *b)
357 lb_main_t *lbm = &lb_main;
358 asa = &lbm->ass[((lb_pseudorand_t *)a)->as_index];
359 asb = &lbm->ass[((lb_pseudorand_t *)b)->as_index];
360 return memcmp(&asa->address, &asb->address, sizeof(asb->address));
363 static void lb_vip_garbage_collection(lb_vip_t *vip)
365 lb_main_t *lbm = &lb_main;
366 lb_snat4_key_t m_key4;
367 clib_bihash_kv_8_8_t kv4, value4;
368 lb_snat6_key_t m_key6;
369 clib_bihash_kv_24_8_t kv6, value6;
370 lb_snat_mapping_t *m = 0;
371 CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
373 u32 now = (u32) vlib_time_now(vlib_get_main());
374 if (!clib_u32_loop_gt(now, vip->last_garbage_collection + LB_GARBAGE_RUN))
377 vip->last_garbage_collection = now;
380 pool_foreach (as_index, vip->as_indexes) {
381 as = &lbm->ass[*as_index];
382 if (!(as->flags & LB_AS_FLAGS_USED) && //Not used
383 clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) &&
384 (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0))
387 if (lb_vip_is_nat4_port(vip)) {
388 m_key4.addr = as->address.ip4;
389 m_key4.port = vip->encap_args.target_port;
391 m_key4.fib_index = 0;
393 kv4.key = m_key4.as_u64;
394 if(!clib_bihash_search_8_8(&lbm->mapping_by_as4, &kv4, &value4))
395 m = pool_elt_at_index (lbm->snat_mappings, value4.value);
398 kv4.value = m - lbm->snat_mappings;
399 clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 0);
400 pool_put (lbm->snat_mappings, m);
401 } else if (lb_vip_is_nat6_port(vip)) {
402 m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0];
403 m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1];
404 m_key6.port = vip->encap_args.target_port;
406 m_key6.fib_index = 0;
408 kv6.key[0] = m_key6.as_u64[0];
409 kv6.key[1] = m_key6.as_u64[1];
410 kv6.key[2] = m_key6.as_u64[2];
412 if (!clib_bihash_search_24_8 (&lbm->mapping_by_as6, &kv6, &value6))
413 m = pool_elt_at_index (lbm->snat_mappings, value6.value);
416 kv6.value = m - lbm->snat_mappings;
417 clib_bihash_add_del_24_8(&lbm->mapping_by_as6, &kv6, 0);
418 pool_put (lbm->snat_mappings, m);
420 fib_entry_child_remove(as->next_hop_fib_entry_index,
421 as->next_hop_child_index);
422 fib_table_entry_delete_index(as->next_hop_fib_entry_index,
424 as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID;
426 pool_put(vip->as_indexes, as_index);
427 pool_put(lbm->ass, as);
432 void lb_garbage_collection()
434 lb_main_t *lbm = &lb_main;
435 lb_get_writer_lock();
437 u32 *to_be_removed_vips = 0, *i;
438 pool_foreach (vip, lbm->vips) {
439 lb_vip_garbage_collection(vip);
441 if (!(vip->flags & LB_VIP_FLAGS_USED) &&
442 (pool_elts(vip->as_indexes) == 0)) {
443 vec_add1(to_be_removed_vips, vip - lbm->vips);
447 vec_foreach(i, to_be_removed_vips) {
448 vip = &lbm->vips[*i];
449 pool_put(lbm->vips, vip);
450 pool_free(vip->as_indexes);
453 vec_free(to_be_removed_vips);
454 lb_put_writer_lock();
457 static void lb_vip_update_new_flow_table(lb_vip_t *vip)
459 lb_main_t *lbm = &lb_main;
460 lb_new_flow_entry_t *old_table;
462 lb_new_flow_entry_t *new_flow_table = 0;
464 lb_pseudorand_t *pr, *sort_arr = 0;
466 CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock); // We must have the lock
468 //Check if some AS is configured or not
470 pool_foreach (as_index, vip->as_indexes) {
471 as = &lbm->ass[*as_index];
472 if (as->flags & LB_AS_FLAGS_USED) { //Not used anymore
474 goto out; //Not sure 'break' works in this macro-loop
480 //Only the default. i.e. no AS
481 vec_validate(new_flow_table, vip->new_flow_table_mask);
482 for (i=0; i<vec_len(new_flow_table); i++)
483 new_flow_table[i].as_index = 0;
488 //First, let's sort the ASs
489 vec_alloc(sort_arr, pool_elts(vip->as_indexes));
492 pool_foreach (as_index, vip->as_indexes) {
493 as = &lbm->ass[*as_index];
494 if (!(as->flags & LB_AS_FLAGS_USED)) //Not used anymore
497 sort_arr[i].as_index = as - lbm->ass;
500 vec_set_len (sort_arr, i);
502 vec_sort_with_function(sort_arr, lb_pseudorand_compare);
504 //Now let's pseudo-randomly generate permutations
505 vec_foreach(pr, sort_arr) {
506 lb_as_t *as = &lbm->ass[pr->as_index];
508 u64 seed = clib_xxhash(as->address.as_u64[0] ^
509 as->address.as_u64[1]);
510 /* We have 2^n buckets.
511 * skip must be prime with 2^n.
512 * So skip must be odd.
513 * MagLev actually state that M should be prime,
514 * but this has a big computation cost (% operation).
515 * Using 2^n is more better (& operation).
517 pr->skip = ((seed & 0xffffffff) | 1) & vip->new_flow_table_mask;
518 pr->last = (seed >> 32) & vip->new_flow_table_mask;
521 //Let's create a new flow table
522 vec_validate(new_flow_table, vip->new_flow_table_mask);
523 for (i=0; i<vec_len(new_flow_table); i++)
524 new_flow_table[i].as_index = 0;
528 vec_foreach(pr, sort_arr) {
531 pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask;
532 if (new_flow_table[last].as_index == 0) {
533 new_flow_table[last].as_index = pr->as_index;
538 if (done == vec_len(new_flow_table))
546 old_table = vip->new_flow_table;
547 vip->new_flow_table = new_flow_table;
551 int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
552 u32 per_cpu_sticky_buckets, u32 flow_timeout)
554 lb_main_t *lbm = &lb_main;
556 if (!is_pow2(per_cpu_sticky_buckets))
557 return VNET_API_ERROR_INVALID_MEMORY_SIZE;
559 lb_get_writer_lock(); //Not exactly necessary but just a reminder that it exists for my future self
560 lbm->ip4_src_address = *ip4_address;
561 lbm->ip6_src_address = *ip6_address;
562 lbm->per_cpu_sticky_buckets = per_cpu_sticky_buckets;
563 lbm->flow_timeout = flow_timeout;
564 lb_put_writer_lock();
571 int lb_vip_port_find_index(ip46_address_t *prefix, u8 plen,
572 u8 protocol, u16 port,
573 lb_lkp_type_t lkp_type,
576 lb_main_t *lbm = &lb_main;
578 /* This must be called with the lock owned */
579 CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
580 ip46_prefix_normalize(prefix, plen);
581 pool_foreach (vip, lbm->vips) {
582 if ((vip->flags & LB_AS_FLAGS_USED) &&
584 vip->prefix.as_u64[0] == prefix->as_u64[0] &&
585 vip->prefix.as_u64[1] == prefix->as_u64[1])
587 if((lkp_type == LB_LKP_SAME_IP_PORT &&
588 vip->protocol == protocol &&
589 vip->port == port) ||
590 (lkp_type == LB_LKP_ALL_PORT_IP &&
592 (lkp_type == LB_LKP_DIFF_IP_PORT &&
593 (vip->protocol != protocol ||
594 vip->port != port) ) )
596 *vip_index = vip - lbm->vips;
601 return VNET_API_ERROR_NO_SUCH_ENTRY;
605 int lb_vip_port_find_index_with_lock(ip46_address_t *prefix, u8 plen,
606 u8 protocol, u16 port, u32 *vip_index)
608 return lb_vip_port_find_index(prefix, plen, protocol, port,
609 LB_LKP_SAME_IP_PORT, vip_index);
613 int lb_vip_port_find_all_port_vip(ip46_address_t *prefix, u8 plen,
616 return lb_vip_port_find_index(prefix, plen, ~0, 0,
617 LB_LKP_ALL_PORT_IP, vip_index);
620 /* Find out per-port-vip entry with different protocol and port */
622 int lb_vip_port_find_diff_port(ip46_address_t *prefix, u8 plen,
623 u8 protocol, u16 port, u32 *vip_index)
625 return lb_vip_port_find_index(prefix, plen, protocol, port,
626 LB_LKP_DIFF_IP_PORT, vip_index);
629 int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u8 protocol,
630 u16 port, u32 *vip_index)
633 lb_get_writer_lock();
634 ret = lb_vip_port_find_index_with_lock(prefix, plen,
635 protocol, port, vip_index);
636 lb_put_writer_lock();
640 static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index)
642 lb_main_t *lbm = &lb_main;
643 /* This must be called with the lock owned */
644 CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
647 pool_foreach (asi, vip->as_indexes) {
648 as = &lbm->ass[*asi];
649 if (as->vip_index == (vip - lbm->vips) &&
650 as->address.as_u64[0] == address->as_u64[0] &&
651 as->address.as_u64[1] == address->as_u64[1])
653 *as_index = as - lbm->ass;
660 int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
662 lb_main_t *lbm = &lb_main;
663 lb_get_writer_lock();
665 if (!(vip = lb_vip_get_by_index(vip_index))) {
666 lb_put_writer_lock();
667 return VNET_API_ERROR_NO_SUCH_ENTRY;
670 ip46_type_t type = lb_encap_is_ip4(vip)?IP46_TYPE_IP4:IP46_TYPE_IP6;
671 u32 *to_be_added = 0;
672 u32 *to_be_updated = 0;
675 lb_snat_mapping_t *m;
680 if (!lb_as_find_index_vip(vip, &addresses[n], &i)) {
681 if (lbm->ass[i].flags & LB_AS_FLAGS_USED) {
682 vec_free(to_be_added);
683 vec_free(to_be_updated);
684 lb_put_writer_lock();
685 return VNET_API_ERROR_VALUE_EXIST;
687 vec_add1(to_be_updated, i);
691 if (ip46_address_type(&addresses[n]) != type) {
692 vec_free(to_be_added);
693 vec_free(to_be_updated);
694 lb_put_writer_lock();
695 return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
700 while(n2--) //Check for duplicates
701 if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
702 addresses[n2].as_u64[1] == addresses[n].as_u64[1])
706 vec_add1(to_be_added, n);
713 vec_foreach(ip, to_be_updated) {
714 lbm->ass[*ip].flags = LB_AS_FLAGS_USED;
716 vec_free(to_be_updated);
718 //Create those who have to be created
719 vec_foreach(ip, to_be_added) {
722 pool_get(lbm->ass, as);
723 as->address = addresses[*ip];
724 as->flags = LB_AS_FLAGS_USED;
725 as->vip_index = vip_index;
726 pool_get(vip->as_indexes, as_index);
727 *as_index = as - lbm->ass;
730 * become a child of the FIB entry
731 * so we are informed when its forwarding changes
733 fib_prefix_t nh = {};
734 if (lb_encap_is_ip4(vip)) {
735 nh.fp_addr.ip4 = as->address.ip4;
737 nh.fp_proto = FIB_PROTOCOL_IP4;
739 nh.fp_addr.ip6 = as->address.ip6;
741 nh.fp_proto = FIB_PROTOCOL_IP6;
744 as->next_hop_fib_entry_index =
745 fib_table_entry_special_add(0,
748 FIB_ENTRY_FLAG_NONE);
749 as->next_hop_child_index =
750 fib_entry_child_add(as->next_hop_fib_entry_index,
756 if ( lb_vip_is_nat4_port(vip) || lb_vip_is_nat6_port(vip) )
758 /* Add SNAT static mapping */
759 pool_get (lbm->snat_mappings, m);
760 clib_memset (m, 0, sizeof (*m));
761 if (lb_vip_is_nat4_port(vip)) {
762 lb_snat4_key_t m_key4;
763 clib_bihash_kv_8_8_t kv4;
764 m_key4.addr = as->address.ip4;
765 m_key4.port = vip->encap_args.target_port;
767 m_key4.fib_index = 0;
769 if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
771 m->src_ip.ip4 = vip->prefix.ip4;
773 else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT)
775 m->src_ip.ip4 = lbm->ip4_src_address;
777 m->src_ip_is_ipv6 = 0;
778 m->as_ip.ip4 = as->address.ip4;
779 m->as_ip_is_ipv6 = 0;
780 m->src_port = vip->port;
781 m->target_port = vip->encap_args.target_port;
785 kv4.key = m_key4.as_u64;
786 kv4.value = m - lbm->snat_mappings;
787 clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 1);
789 lb_snat6_key_t m_key6;
790 clib_bihash_kv_24_8_t kv6;
791 m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0];
792 m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1];
793 m_key6.port = vip->encap_args.target_port;
795 m_key6.fib_index = 0;
797 if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
799 m->src_ip.ip6.as_u64[0] = vip->prefix.ip6.as_u64[0];
800 m->src_ip.ip6.as_u64[1] = vip->prefix.ip6.as_u64[1];
802 else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT)
804 m->src_ip.ip6.as_u64[0] = lbm->ip6_src_address.as_u64[0];
805 m->src_ip.ip6.as_u64[1] = lbm->ip6_src_address.as_u64[1];
807 m->src_ip_is_ipv6 = 1;
808 m->as_ip.ip6.as_u64[0] = as->address.ip6.as_u64[0];
809 m->as_ip.ip6.as_u64[1] = as->address.ip6.as_u64[1];
810 m->as_ip_is_ipv6 = 1;
811 m->src_port = vip->port;
812 m->target_port = vip->encap_args.target_port;
816 kv6.key[0] = m_key6.as_u64[0];
817 kv6.key[1] = m_key6.as_u64[1];
818 kv6.key[2] = m_key6.as_u64[2];
819 kv6.value = m - lbm->snat_mappings;
820 clib_bihash_add_del_24_8(&lbm->mapping_by_as6, &kv6, 1);
824 vec_free(to_be_added);
827 lb_vip_update_new_flow_table(vip);
829 //Garbage collection maybe
830 lb_vip_garbage_collection(vip);
832 lb_put_writer_lock();
837 lb_flush_vip_as (u32 vip_index, u32 as_index)
840 vlib_thread_main_t *tm = vlib_get_thread_main();
841 lb_main_t *lbm = &lb_main;
843 for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
844 lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
849 lb_hash_foreach_entry(h, b, i) {
850 if ((vip_index == ~0)
851 || ((b->vip[i] == vip_index) && (as_index == ~0))
852 || ((b->vip[i] == vip_index) && (b->value[i] == as_index)))
854 vlib_refcount_add(&lbm->as_refcount, thread_index, b->value[i], -1);
855 vlib_refcount_add(&lbm->as_refcount, thread_index, 0, 1);
863 lbm->per_cpu[thread_index].sticky_ht = 0;
871 int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n,
874 lb_main_t *lbm = &lb_main;
875 u32 now = (u32) vlib_time_now(vlib_get_main());
880 if (!(vip = lb_vip_get_by_index(vip_index))) {
881 return VNET_API_ERROR_NO_SUCH_ENTRY;
886 if (lb_as_find_index_vip(vip, &addresses[n], &as_index)) {
888 return VNET_API_ERROR_NO_SUCH_ENTRY;
891 if (n) { //Check for duplicates
894 if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
895 addresses[n2].as_u64[1] == addresses[n].as_u64[1])
900 vec_add1(indexes, as_index);
905 //Garbage collection maybe
906 lb_vip_garbage_collection(vip);
908 if (indexes != NULL) {
909 vec_foreach(ip, indexes) {
910 lbm->ass[*ip].flags &= ~LB_AS_FLAGS_USED;
911 lbm->ass[*ip].last_used = now;
915 /* flush flow table for deleted ASs*/
916 lb_flush_vip_as(vip_index, *ip);
921 lb_vip_update_new_flow_table(vip);
928 int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n, u8 flush)
930 lb_get_writer_lock();
931 int ret = lb_vip_del_ass_withlock(vip_index, addresses, n, flush);
932 lb_put_writer_lock();
938 lb_vip_prefix_index_alloc (lb_main_t *lbm)
941 * Check for dynamically allocated instance number.
945 bit = clib_bitmap_first_clear (lbm->vip_prefix_indexes);
947 lbm->vip_prefix_indexes = clib_bitmap_set(lbm->vip_prefix_indexes, bit, 1);
953 lb_vip_prefix_index_free (lb_main_t *lbm, u32 instance)
956 if (clib_bitmap_get (lbm->vip_prefix_indexes, instance) == 0)
961 lbm->vip_prefix_indexes = clib_bitmap_set (lbm->vip_prefix_indexes,
968 * Add the VIP adjacency to the ip4 or ip6 fib
970 static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip,
971 u32 *vip_prefix_index)
973 dpo_proto_t proto = 0;
974 dpo_type_t dpo_type = 0;
979 /* for per-port vip, if VIP adjacency has been added,
980 * no need to add adjacency. */
981 if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen,
982 vip->protocol, vip->port, &vip_idx))
984 lb_vip_t *exists_vip = lb_vip_get_by_index(vip_idx);
985 *vip_prefix_index = exists_vip ? exists_vip->vip_prefix_index : ~0;
989 /* Allocate an index for per-port vip */
990 *vip_prefix_index = lb_vip_prefix_index_alloc(lbm);
994 *vip_prefix_index = vip - lbm->vips;
997 dpo_id_t dpo = DPO_INVALID;
998 fib_prefix_t pfx = {};
999 if (lb_vip_is_ip4(vip->type)) {
1000 pfx.fp_addr.ip4 = vip->prefix.ip4;
1001 pfx.fp_len = vip->plen - 96;
1002 pfx.fp_proto = FIB_PROTOCOL_IP4;
1003 proto = DPO_PROTO_IP4;
1005 pfx.fp_addr.ip6 = vip->prefix.ip6;
1006 pfx.fp_len = vip->plen;
1007 pfx.fp_proto = FIB_PROTOCOL_IP6;
1008 proto = DPO_PROTO_IP6;
1011 if (lb_vip_is_gre4(vip))
1012 dpo_type = lbm->dpo_gre4_type;
1013 else if (lb_vip_is_gre6(vip))
1014 dpo_type = lbm->dpo_gre6_type;
1015 else if (lb_vip_is_gre4_port(vip))
1016 dpo_type = lbm->dpo_gre4_port_type;
1017 else if (lb_vip_is_gre6_port(vip))
1018 dpo_type = lbm->dpo_gre6_port_type;
1019 else if (lb_vip_is_l3dsr(vip))
1020 dpo_type = lbm->dpo_l3dsr_type;
1021 else if (lb_vip_is_l3dsr_port(vip))
1022 dpo_type = lbm->dpo_l3dsr_port_type;
1023 else if(lb_vip_is_nat4_port(vip))
1024 dpo_type = lbm->dpo_nat4_port_type;
1025 else if (lb_vip_is_nat6_port(vip))
1026 dpo_type = lbm->dpo_nat6_port_type;
1027 else if (lb_vip_is_gre4_sticky (vip))
1028 dpo_type = lbm->dpo_gre4_sticky_type;
1029 else if (lb_vip_is_gre6_sticky (vip))
1030 dpo_type = lbm->dpo_gre6_sticky_type;
1031 else if (lb_vip_is_gre4_port_sticky (vip))
1032 dpo_type = lbm->dpo_gre4_port_sticky_type;
1033 else if (lb_vip_is_gre6_port_sticky (vip))
1034 dpo_type = lbm->dpo_gre6_port_sticky_type;
1035 else if (lb_vip_is_l3dsr_sticky (vip))
1036 dpo_type = lbm->dpo_l3dsr_sticky_type;
1037 else if (lb_vip_is_l3dsr_port_sticky (vip))
1038 dpo_type = lbm->dpo_l3dsr_port_sticky_type;
1039 else if (lb_vip_is_nat4_port_sticky (vip))
1040 dpo_type = lbm->dpo_nat4_port_sticky_type;
1041 else if (lb_vip_is_nat6_port_sticky (vip))
1042 dpo_type = lbm->dpo_nat6_port_sticky_type;
1044 dpo_set(&dpo, dpo_type, proto, *vip_prefix_index);
1045 fib_table_entry_special_dpo_add(0,
1048 FIB_ENTRY_FLAG_EXCLUSIVE,
1054 * Add the VIP filter entry
1056 static int lb_vip_add_port_filter(lb_main_t *lbm, lb_vip_t *vip,
1057 u32 vip_prefix_index, u32 vip_idx)
1060 clib_bihash_kv_8_8_t kv;
1062 key.vip_prefix_index = vip_prefix_index;
1063 key.protocol = vip->protocol;
1064 key.port = clib_host_to_net_u16(vip->port);
1067 kv.key = key.as_u64;
1069 clib_bihash_add_del_8_8(&lbm->vip_index_per_port, &kv, 1);
1075 * Del the VIP filter entry
1077 static int lb_vip_del_port_filter(lb_main_t *lbm, lb_vip_t *vip)
1080 clib_bihash_kv_8_8_t kv, value;
1083 key.vip_prefix_index = vip->vip_prefix_index;
1084 key.protocol = vip->protocol;
1085 key.port = clib_host_to_net_u16(vip->port);
1088 kv.key = key.as_u64;
1089 if(clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) != 0)
1091 clib_warning("looking up vip_index_per_port failed.");
1092 return VNET_API_ERROR_NO_SUCH_ENTRY;
1094 m = pool_elt_at_index (lbm->vips, value.value);
1097 kv.value = m - lbm->vips;
1098 clib_bihash_add_del_8_8(&lbm->vip_index_per_port, &kv, 0);
1104 * Deletes the adjacency associated with the VIP
1106 static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip)
1108 fib_prefix_t pfx = {};
1113 /* If this vip adjacency is used by other per-port vip,
1114 * no need to del this adjacency. */
1115 if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen,
1116 vip->protocol, vip->port, &vip_idx))
1118 lb_put_writer_lock();
1122 /* Return vip_prefix_index for per-port vip */
1123 lb_vip_prefix_index_free(lbm, vip->vip_prefix_index);
1127 if (lb_vip_is_ip4(vip->type)) {
1128 pfx.fp_addr.ip4 = vip->prefix.ip4;
1129 pfx.fp_len = vip->plen - 96;
1130 pfx.fp_proto = FIB_PROTOCOL_IP4;
1132 pfx.fp_addr.ip6 = vip->prefix.ip6;
1133 pfx.fp_len = vip->plen;
1134 pfx.fp_proto = FIB_PROTOCOL_IP6;
1136 fib_table_entry_special_remove(0, &pfx, lb_fib_src);
1139 int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
1141 lb_main_t *lbm = &lb_main;
1142 vlib_main_t *vm = vlib_get_main();
1144 lb_vip_type_t type = args.type;
1145 u32 vip_prefix_index = 0;
1147 lb_get_writer_lock();
1148 ip46_prefix_normalize(&(args.prefix), args.plen);
1150 if (!lb_vip_port_find_index_with_lock(&(args.prefix), args.plen,
1151 args.protocol, args.port,
1154 lb_put_writer_lock();
1155 return VNET_API_ERROR_VALUE_EXIST;
1158 /* Make sure we can't add a per-port VIP entry
1159 * when there already is an all-port VIP for the same prefix. */
1160 if ((args.port != 0) &&
1161 !lb_vip_port_find_all_port_vip(&(args.prefix), args.plen, vip_index))
1163 lb_put_writer_lock();
1164 return VNET_API_ERROR_VALUE_EXIST;
1167 /* Make sure we can't add a all-port VIP entry
1168 * when there already is an per-port VIP for the same prefix. */
1169 if ((args.port == 0) &&
1170 !lb_vip_port_find_diff_port(&(args.prefix), args.plen,
1171 args.protocol, args.port, vip_index))
1173 lb_put_writer_lock();
1174 return VNET_API_ERROR_VALUE_EXIST;
1177 /* Make sure all VIP for a given prefix (using different ports) have the same type. */
1178 if ((args.port != 0) &&
1179 !lb_vip_port_find_diff_port(&(args.prefix), args.plen,
1180 args.protocol, args.port, vip_index)
1181 && (args.type != lbm->vips[*vip_index].type))
1183 lb_put_writer_lock();
1184 return VNET_API_ERROR_INVALID_ARGUMENT;
1187 if (!is_pow2(args.new_length)) {
1188 lb_put_writer_lock();
1189 return VNET_API_ERROR_INVALID_MEMORY_SIZE;
1192 if (ip46_prefix_is_ip4(&(args.prefix), args.plen) &&
1193 !lb_vip_is_ip4(type)) {
1194 lb_put_writer_lock();
1195 return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
1198 if ((!ip46_prefix_is_ip4(&(args.prefix), args.plen)) &&
1199 !lb_vip_is_ip6(type)) {
1200 lb_put_writer_lock();
1201 return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
1204 if ((type == LB_VIP_TYPE_IP4_L3DSR) &&
1205 (args.encap_args.dscp >= 64) )
1207 lb_put_writer_lock();
1208 return VNET_API_ERROR_VALUE_EXIST;
1212 pool_get(lbm->vips, vip);
1215 memcpy (&(vip->prefix), &(args.prefix), sizeof(args.prefix));
1216 vip->plen = args.plen;
1219 vip->protocol = args.protocol;
1220 vip->port = args.port;
1224 vip->protocol = (u8)~0;
1227 vip->last_garbage_collection = (u32) vlib_time_now(vlib_get_main());
1228 vip->type = args.type;
1230 if (args.type == LB_VIP_TYPE_IP4_L3DSR) {
1231 vip->encap_args.dscp = args.encap_args.dscp;
1233 else if ((args.type == LB_VIP_TYPE_IP4_NAT4)
1234 ||(args.type == LB_VIP_TYPE_IP6_NAT6)) {
1235 vip->encap_args.srv_type = args.encap_args.srv_type;
1236 vip->encap_args.target_port =
1237 clib_host_to_net_u16(args.encap_args.target_port);
1240 vip->flags = LB_VIP_FLAGS_USED;
1241 if (args.src_ip_sticky)
1243 vip->flags |= LB_VIP_FLAGS_SRC_IP_STICKY;
1245 vip->as_indexes = 0;
1249 for (i = 0; i < LB_N_VIP_COUNTERS; i++) {
1250 vlib_validate_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
1251 vlib_zero_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
1254 //Configure new flow table
1255 vip->new_flow_table_mask = args.new_length - 1;
1256 vip->new_flow_table = 0;
1258 //Update flow hash table
1259 lb_vip_update_new_flow_table(vip);
1261 //Create adjacency to direct traffic
1262 lb_vip_add_adjacency(lbm, vip, &vip_prefix_index);
1264 if ( (lb_vip_is_nat4_port(vip) || lb_vip_is_nat6_port(vip))
1265 && (args.encap_args.srv_type == LB_SRV_TYPE_NODEPORT) )
1270 //Create maping from nodeport to vip_index
1271 key = clib_host_to_net_u16(args.port);
1272 entry = hash_get_mem (lbm->vip_index_by_nodeport, &key);
1274 lb_put_writer_lock();
1275 return VNET_API_ERROR_VALUE_EXIST;
1278 hash_set_mem (lbm->vip_index_by_nodeport, &key, vip - lbm->vips);
1280 /* receive packets destined to NodeIP:NodePort */
1281 udp_register_dst_port (vm, args.port, lb4_nodeport_node.index, 1);
1282 udp_register_dst_port (vm, args.port, lb6_nodeport_node.index, 0);
1285 *vip_index = vip - lbm->vips;
1286 //Create per-port vip filtering table
1289 lb_vip_add_port_filter(lbm, vip, vip_prefix_index, *vip_index);
1290 vip->vip_prefix_index = vip_prefix_index;
1293 lb_put_writer_lock();
1297 int lb_vip_del(u32 vip_index)
1299 lb_main_t *lbm = &lb_main;
1303 /* Does not remove default vip, i.e. vip_index = 0 */
1305 return VNET_API_ERROR_INVALID_VALUE;
1307 lb_get_writer_lock();
1308 if (!(vip = lb_vip_get_by_index(vip_index))) {
1309 lb_put_writer_lock();
1310 return VNET_API_ERROR_NO_SUCH_ENTRY;
1313 //FIXME: This operation is actually not working
1314 //We will need to remove state before performing this.
1318 ip46_address_t *ass = 0;
1322 pool_foreach (as_index, vip->as_indexes) {
1323 as = &lbm->ass[*as_index];
1324 vec_add1(ass, as->address);
1327 lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass), 0);
1332 lb_vip_del_adjacency(lbm, vip);
1334 //Delete per-port vip filtering entry
1337 rv = lb_vip_del_port_filter(lbm, vip);
1340 //Set the VIP as unused
1341 vip->flags &= ~LB_VIP_FLAGS_USED;
1343 lb_put_writer_lock();
1348 VLIB_PLUGIN_REGISTER () = {
1349 .version = VPP_BUILD_VER,
1350 .description = "Load Balancer (LB)",
1354 u8 *format_lb_dpo (u8 * s, va_list * va)
1356 index_t index = va_arg (*va, index_t);
1357 CLIB_UNUSED(u32 indent) = va_arg (*va, u32);
1358 lb_main_t *lbm = &lb_main;
1359 lb_vip_t *vip = pool_elt_at_index (lbm->vips, index);
1360 return format (s, "%U", format_lb_vip, vip);
1363 static void lb_dpo_lock (dpo_id_t *dpo) {}
1364 static void lb_dpo_unlock (dpo_id_t *dpo) {}
1367 lb_fib_node_get_node (fib_node_index_t index)
1369 lb_main_t *lbm = &lb_main;
1370 lb_as_t *as = pool_elt_at_index (lbm->ass, index);
1371 return (&as->fib_node);
1375 lb_fib_node_last_lock_gone (fib_node_t *node)
1380 lb_as_from_fib_node (fib_node_t *node)
1382 return ((lb_as_t*)(((char*)node) -
1383 STRUCT_OFFSET_OF(lb_as_t, fib_node)));
1387 lb_as_stack (lb_as_t *as)
1389 lb_main_t *lbm = &lb_main;
1390 lb_vip_t *vip = &lbm->vips[as->vip_index];
1391 dpo_type_t dpo_type = 0;
1393 if (lb_vip_is_gre4(vip))
1394 dpo_type = lbm->dpo_gre4_type;
1395 else if (lb_vip_is_gre6(vip))
1396 dpo_type = lbm->dpo_gre6_type;
1397 else if (lb_vip_is_gre4_port(vip))
1398 dpo_type = lbm->dpo_gre4_port_type;
1399 else if (lb_vip_is_gre6_port(vip))
1400 dpo_type = lbm->dpo_gre6_port_type;
1401 else if (lb_vip_is_l3dsr(vip))
1402 dpo_type = lbm->dpo_l3dsr_type;
1403 else if (lb_vip_is_l3dsr_port(vip))
1404 dpo_type = lbm->dpo_l3dsr_port_type;
1405 else if(lb_vip_is_nat4_port(vip))
1406 dpo_type = lbm->dpo_nat4_port_type;
1407 else if (lb_vip_is_nat6_port(vip))
1408 dpo_type = lbm->dpo_nat6_port_type;
1409 else if (lb_vip_is_gre4_sticky (vip))
1410 dpo_type = lbm->dpo_gre4_sticky_type;
1411 else if (lb_vip_is_gre6_sticky (vip))
1412 dpo_type = lbm->dpo_gre6_sticky_type;
1413 else if (lb_vip_is_gre4_port_sticky (vip))
1414 dpo_type = lbm->dpo_gre4_port_sticky_type;
1415 else if (lb_vip_is_gre6_port_sticky (vip))
1416 dpo_type = lbm->dpo_gre6_port_sticky_type;
1417 else if (lb_vip_is_l3dsr_sticky (vip))
1418 dpo_type = lbm->dpo_l3dsr_sticky_type;
1419 else if (lb_vip_is_l3dsr_port_sticky (vip))
1420 dpo_type = lbm->dpo_l3dsr_port_sticky_type;
1421 else if (lb_vip_is_nat4_port_sticky (vip))
1422 dpo_type = lbm->dpo_nat4_port_sticky_type;
1423 else if (lb_vip_is_nat6_port_sticky (vip))
1424 dpo_type = lbm->dpo_nat6_port_sticky_type;
1427 lb_vip_is_ip4(vip->type)?DPO_PROTO_IP4:DPO_PROTO_IP6,
1429 fib_entry_contribute_ip_forwarding(
1430 as->next_hop_fib_entry_index));
1433 static fib_node_back_walk_rc_t
1434 lb_fib_node_back_walk_notify (fib_node_t *node,
1435 fib_node_back_walk_ctx_t *ctx)
1437 lb_as_stack(lb_as_from_fib_node(node));
1438 return (FIB_NODE_BACK_WALK_CONTINUE);
1441 int lb_nat4_interface_add_del (u32 sw_if_index, int is_del)
1445 vnet_feature_enable_disable ("ip4-unicast", "lb-nat4-in2out",
1446 sw_if_index, 0, 0, 0);
1450 vnet_feature_enable_disable ("ip4-unicast", "lb-nat4-in2out",
1451 sw_if_index, 1, 0, 0);
1457 int lb_nat6_interface_add_del (u32 sw_if_index, int is_del)
1461 vnet_feature_enable_disable ("ip6-unicast", "lb-nat6-in2out",
1462 sw_if_index, 0, 0, 0);
1466 vnet_feature_enable_disable ("ip6-unicast", "lb-nat6-in2out",
1467 sw_if_index, 1, 0, 0);
1474 lb_init (vlib_main_t * vm)
1476 vlib_thread_main_t *tm = vlib_get_thread_main ();
1477 lb_main_t *lbm = &lb_main;
1478 lbm->vnet_main = vnet_get_main ();
1479 lbm->vlib_main = vm;
1481 lb_vip_t *default_vip;
1482 lb_as_t *default_as;
1483 fib_node_vft_t lb_fib_node_vft = {
1484 .fnv_get = lb_fib_node_get_node,
1485 .fnv_last_lock = lb_fib_node_last_lock_gone,
1486 .fnv_back_walk = lb_fib_node_back_walk_notify,
1488 dpo_vft_t lb_vft = {
1489 .dv_lock = lb_dpo_lock,
1490 .dv_unlock = lb_dpo_unlock,
1491 .dv_format = format_lb_dpo,
1494 //Allocate and init default VIP.
1496 pool_get(lbm->vips, default_vip);
1497 default_vip->new_flow_table_mask = 0;
1498 default_vip->prefix.ip6.as_u64[0] = 0xffffffffffffffffL;
1499 default_vip->prefix.ip6.as_u64[1] = 0xffffffffffffffffL;
1500 default_vip->protocol = ~0;
1501 default_vip->port = 0;
1502 default_vip->flags = LB_VIP_FLAGS_USED;
1505 vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1);
1506 clib_spinlock_init (&lbm->writer_lock);
1507 lbm->per_cpu_sticky_buckets = LB_DEFAULT_PER_CPU_STICKY_BUCKETS;
1508 lbm->flow_timeout = LB_DEFAULT_FLOW_TIMEOUT;
1509 lbm->ip4_src_address.as_u32 = 0xffffffff;
1510 lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL;
1511 lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL;
1512 lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes);
1513 lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes);
1514 lbm->dpo_gre4_port_type = dpo_register_new_type(&lb_vft,
1515 lb_dpo_gre4_port_nodes);
1516 lbm->dpo_gre6_port_type = dpo_register_new_type(&lb_vft,
1517 lb_dpo_gre6_port_nodes);
1518 lbm->dpo_l3dsr_type = dpo_register_new_type(&lb_vft,
1519 lb_dpo_l3dsr_nodes);
1520 lbm->dpo_l3dsr_port_type = dpo_register_new_type(&lb_vft,
1521 lb_dpo_l3dsr_port_nodes);
1522 lbm->dpo_nat4_port_type = dpo_register_new_type(&lb_vft,
1523 lb_dpo_nat4_port_nodes);
1524 lbm->dpo_nat6_port_type = dpo_register_new_type(&lb_vft,
1525 lb_dpo_nat6_port_nodes);
1526 lbm->dpo_gre4_sticky_type =
1527 dpo_register_new_type (&lb_vft, lb_dpo_gre4_sticky_nodes);
1528 lbm->dpo_gre6_sticky_type =
1529 dpo_register_new_type (&lb_vft, lb_dpo_gre6_sticky_nodes);
1530 lbm->dpo_gre4_port_sticky_type =
1531 dpo_register_new_type (&lb_vft, lb_dpo_gre4_port_sticky_nodes);
1532 lbm->dpo_gre6_port_sticky_type =
1533 dpo_register_new_type (&lb_vft, lb_dpo_gre6_port_sticky_nodes);
1534 lbm->dpo_l3dsr_sticky_type =
1535 dpo_register_new_type (&lb_vft, lb_dpo_l3dsr_sticky_nodes);
1536 lbm->dpo_l3dsr_port_sticky_type =
1537 dpo_register_new_type (&lb_vft, lb_dpo_l3dsr_port_sticky_nodes);
1538 lbm->dpo_nat4_port_sticky_type =
1539 dpo_register_new_type (&lb_vft, lb_dpo_nat4_port_sticky_nodes);
1540 lbm->dpo_nat6_port_sticky_type =
1541 dpo_register_new_type (&lb_vft, lb_dpo_nat6_port_sticky_nodes);
1542 lbm->fib_node_type = fib_node_register_new_type ("lb", &lb_fib_node_vft);
1544 //Init AS reference counters
1545 vlib_refcount_init(&lbm->as_refcount);
1547 //Allocate and init default AS.
1549 pool_get(lbm->ass, default_as);
1550 default_as->flags = 0;
1551 default_as->dpo.dpoi_next_node = LB_NEXT_DROP;
1552 default_as->vip_index = ~0;
1553 default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL;
1554 default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL;
1556 /* Generate a valid flow table for default VIP */
1557 default_vip->as_indexes = NULL;
1558 lb_get_writer_lock();
1559 lb_vip_update_new_flow_table(default_vip);
1560 lb_put_writer_lock();
1562 lbm->vip_index_by_nodeport
1563 = hash_create_mem (0, sizeof(u16), sizeof (uword));
1565 clib_bihash_init_8_8 (&lbm->vip_index_per_port,
1566 "vip_index_per_port", LB_VIP_PER_PORT_BUCKETS,
1567 LB_VIP_PER_PORT_MEMORY_SIZE);
1569 clib_bihash_init_8_8 (&lbm->mapping_by_as4,
1570 "mapping_by_as4", LB_MAPPING_BUCKETS,
1571 LB_MAPPING_MEMORY_SIZE);
1573 clib_bihash_init_24_8 (&lbm->mapping_by_as6,
1574 "mapping_by_as6", LB_MAPPING_BUCKETS,
1575 LB_MAPPING_MEMORY_SIZE);
1577 #define _(a,b,c) lbm->vip_counters[c].name = b;
1578 lb_foreach_vip_counter
1581 lb_fib_src = fib_source_allocate("lb",
1582 FIB_SOURCE_PRIORITY_HI,
1583 FIB_SOURCE_BH_SIMPLE);
1588 VLIB_INIT_FUNCTION (lb_init);