2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 * lb-plugin implements a MagLev-like load balancer.
18 * http://research.google.com/pubs/pub44824.html
20 * It hasn't been tested for interoperability with the original MagLev
21 * but intends to provide similar functionality.
22 * The load-balancer receives traffic destined to VIP (Virtual IP)
23 * addresses from one or multiple(ECMP) routers.
24 * The load-balancer tunnels the traffic toward many application servers
25 * ensuring session stickyness (i.e. that a single sessions is tunneled
26 * towards a single application server).
30 #ifndef LB_PLUGIN_LB_LB_H_
31 #define LB_PLUGIN_LB_LB_H_
34 #include <lb/refcount.h>
36 #include <vnet/vnet.h>
37 #include <vnet/ip/ip.h>
39 #include <lb/lbhash.h>
41 #define LB_DEFAULT_PER_CPU_STICKY_BUCKETS 1 << 10
42 #define LB_DEFAULT_FLOW_TIMEOUT 40
45 * Each VIP is configured with a set of
50 * Destination address used to tunnel traffic towards
51 * that application server.
52 * The address is also used as ID and pseudo-random
53 * seed for the load-balancing process.
55 ip46_address_t address;
58 * Second ip lookup can be avoided by sending directly the packet
59 * to ip-rewrite with a configured adjacency.
60 * When set to ~0, the packets are sent to ip6-lookup.
65 * ASs are indexed by address and VIP Index.
66 * Which means there will be duplicated if the same server
67 * address is used for multiple VIPs.
73 * For now only LB_AS_FLAGS_USED is defined.
77 #define LB_AS_FLAGS_USED 0x1
80 * Rotating timestamp of when LB_AS_FLAGS_USED flag was last set.
82 * AS removal is based on garbage collection and reference counting.
83 * When an AS is removed, there is a race between configuration core
84 * and worker cores which may still add a reference while it should not
85 * be used. This timestamp is used to not remove the AS while a race condition
91 format_function_t format_lb_as;
95 } lb_new_flow_entry_t;
97 #define lb_foreach_vip_counter \
98 _(TRACKED_SESSION, "tracked session", 0) \
99 _(UNTRACKED_PACKET, "untracked packet", 1)
102 #define _(a,b,c) LB_VIP_COUNTER_##a = c,
103 lb_foreach_vip_counter
109 * The load balancer supports IPv4 and IPv6 traffic
110 * and GRE4 and GRE6 encap.
113 LB_VIP_TYPE_IP6_GRE6,
114 LB_VIP_TYPE_IP6_GRE4,
115 LB_VIP_TYPE_IP4_GRE6,
116 LB_VIP_TYPE_IP4_GRE4,
120 format_function_t format_lb_vip_type;
121 unformat_function_t unformat_lb_vip_type;
124 * Load balancing service is provided per VIP.
125 * In this data model, a VIP can be a whole prefix.
126 * But load balancing only
127 * occurs on a per-source-address/port basis. Meaning that if a given source
128 * reuses the same port for multiple destinations within the same VIP,
129 * they will be considered as a single flow.
136 * Vector mapping (flow-hash & new_connect_table_mask) to AS index.
137 * This is used for new flows.
139 lb_new_flow_entry_t *new_flow_table;
142 * New flows table length - 1
143 * (length MUST be a power of 2)
145 u32 new_flow_table_mask;
148 * Last time garbage collection was run to free the ASs.
150 u32 last_garbage_collection;
155 * A Virtual IP represents a given service delivered
156 * by a set of application servers. It can be a single
157 * address or a prefix.
158 * IPv4 prefixes are encoded using IPv4-in-IPv6 embedded address
159 * (i.e. ::/96 prefix).
161 ip46_address_t prefix;
164 * The VIP prefix length.
165 * In case of IPv4, plen = 96 + ip4_plen.
170 * The type of traffic for this.
171 * LB_TYPE_UNDEFINED if unknown.
176 * Flags related to this VIP.
177 * LB_VIP_FLAGS_USED means the VIP is active.
178 * When it is not set, the VIP in the process of being removed.
179 * We cannot immediately remove a VIP because the VIP index still may be stored
180 * in the adjacency index.
185 * Pool of AS indexes used for this VIP.
186 * This also includes ASs that have been removed (but are still referenced).
190 #define LB_VIP_FLAGS_USED 0x1
194 #define lb_vip_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP4_GRE6 || (vip)->type == LB_VIP_TYPE_IP4_GRE4)
195 #define lb_vip_is_gre4(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE4 || (vip)->type == LB_VIP_TYPE_IP4_GRE4)
196 format_function_t format_lb_vip;
197 format_function_t format_lb_vip_detailed;
201 * Each CPU has its own sticky flow hash table.
202 * One single table is used for all VIPs.
204 lb_hash_t *sticky_ht;
209 * Pool of all Virtual IPs
215 * ASs are referenced by address and vip index.
216 * The first element (index 0) is special and used only to fill
217 * new_flow_tables when no AS has been configured.
222 * Each AS has an associated reference counter.
223 * As ass[0] has a special meaning, its associated counter
224 * starts at 0 and is decremented instead. i.e. do not use it.
226 vlib_refcount_t as_refcount;
229 * Some global data is per-cpu
231 lb_per_cpu_t *per_cpu;
234 * Node next index for IP adjacencies, for each of the traffic types.
236 u32 ip_lookup_next_index[LB_VIP_N_TYPES];
239 * Source address used in IPv6 encapsulated traffic
241 ip6_address_t ip6_src_address;
244 * Source address used for IPv4 encapsulated traffic
246 ip4_address_t ip4_src_address;
249 * Number of buckets in the per-cpu sticky hash table.
251 u32 per_cpu_sticky_buckets;
254 * Flow timeout in seconds.
261 vlib_simple_counter_main_t vip_counters[LB_N_VIP_COUNTERS];
264 * API dynamically registered base ID.
268 volatile u32 *writer_lock;
272 * struct stored in adj->opaque data.
276 * Index of the VIP associated with that IP adjacency.
281 extern lb_main_t lb_main;
282 extern vlib_node_registration_t lb6_node;
283 extern vlib_node_registration_t lb4_node;
286 * Fix global load-balancer parameters.
287 * @param ip4_address IPv4 source address used for encapsulated traffic
288 * @param ip6_address IPv6 source address used for encapsulated traffic
289 * @return 0 on success. VNET_LB_ERR_XXX on error
291 int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
292 u32 sticky_buckets, u32 flow_timeout);
294 int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type,
295 u32 new_length, u32 *vip_index);
296 int lb_vip_del(u32 vip_index);
298 int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index);
300 #define lb_vip_get_by_index(index) (pool_is_free_index(lb_main.vips, index)?NULL:pool_elt_at_index(lb_main.vips, index))
302 int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n);
303 int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n);
306 * Updates the adjacency index stored in the AS such that the second
307 * IP lookup (after encap) can be bypassed.
309 int lb_as_lookup_bypass(u32 vip_index, ip46_address_t *address, u8 is_disable);
311 u32 lb_hash_time_now(vlib_main_t * vm);
313 void lb_garbage_collection();
315 format_function_t format_lb_main;
317 #endif /* LB_PLUGIN_LB_LB_H_ */