2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
18 #include <vnet/gre/packet.h>
19 #include <lb/lbhash.h>
21 #define foreach_lb_error \
23 _(PROTO_NOT_SUPPORTED, "protocol not supported") \
24 _(NO_SERVER, "no configured application server")
27 #define _(sym,str) LB_ERROR_##sym,
33 static char *lb_error_strings[] = {
34 #define _(sym,string) string,
45 format_lb_trace (u8 * s, va_list * args)
47 lb_main_t *lbm = &lb_main;
48 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50 lb_trace_t *t = va_arg (*args, lb_trace_t *);
51 s = format(s, "lb vip[%d]: %U\n", t->vip_index, format_lb_vip, &lbm->vips[t->vip_index]);
52 s = format(s, "lb as[%d]: %U\n", t->as_index, format_lb_as, &lbm->ass[t->as_index]);
56 lb_hash_t *lb_get_sticky_table(u32 cpu_index)
58 lb_main_t *lbm = &lb_main;
59 lb_hash_t *sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
60 //Check if size changed
61 if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht)))) {
63 //Dereference everything in there
65 lb_hash_foreach_entry(sticky_ht, e) {
66 vlib_refcount_add(&lbm->as_refcount, cpu_index, e->value, -1);
67 vlib_refcount_add(&lbm->as_refcount, cpu_index, 0, -1);
70 lb_hash_free(sticky_ht);
75 if (PREDICT_FALSE(sticky_ht == NULL)) {
76 lbm->per_cpu[cpu_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout);
77 sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
78 clib_warning("Regenerated sticky table %p", sticky_ht);
84 sticky_ht->timeout = lbm->flow_timeout;
88 static_always_inline uword
89 lb_node_fn (vlib_main_t * vm,
90 vlib_node_runtime_t * node, vlib_frame_t * frame,
91 u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6)
92 u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6)
94 lb_main_t *lbm = &lb_main;
95 vlib_node_runtime_t *error_node = node;
96 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
97 u32 cpu_index = os_get_cpu_number();
98 u32 lb_time = lb_hash_time_now(vm);
100 lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index);
101 from = vlib_frame_vector_args (frame);
102 n_left_from = frame->n_vectors;
103 next_index = node->cached_next_index;
105 while (n_left_from > 0)
107 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
108 while (n_left_from > 0 && n_left_to_next > 0)
116 u32 value0, available_index0, hash0;
118 lb_error_t error0 = LB_ERROR_NONE;
120 if (PREDICT_TRUE(n_left_from > 1))
123 p2 = vlib_get_buffer(vm, from[1]);
124 vlib_prefetch_buffer_header(p2, STORE);
125 /* IPv4 + 8 = 28. possibly plus -40 */
126 CLIB_PREFETCH (vlib_buffer_get_current(p2) - 40, 128, STORE);
129 pi0 = to_next[0] = from[0];
135 p0 = vlib_get_buffer (vm, pi0);
136 vip0 = pool_elt_at_index (lbm->vips,
137 vnet_buffer (p0)->ip.adj_index[VLIB_TX]);
141 ip40 = vlib_buffer_get_current (p0);
142 len0 = clib_net_to_host_u16(ip40->length);
143 key0[0] = (u64) ip40->src_address.as_u32;
144 key0[1] = (u64) ip40->dst_address.as_u32;
147 key0[4] = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 32) |
148 ((u64)((udp_header_t *)(ip40 + 1))->dst_port << 16);
150 hash0 = lb_hash_hash(key0);
153 ip60 = vlib_buffer_get_current (p0);
154 len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t);
155 key0[0] = ip60->src_address.as_u64[0];
156 key0[1] = ip60->src_address.as_u64[1];
157 key0[2] = ip60->dst_address.as_u64[0];
158 key0[3] = ip60->dst_address.as_u64[1];
159 key0[4] = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 32) |
160 ((u64)((udp_header_t *)(ip60 + 1))->dst_port << 16);
162 hash0 = lb_hash_hash(key0);
165 //NOTE: This is an ugly trick to not include the VIP index in the hash calculation
166 //but actually use it in the key determination.
167 key0[4] |= ((vip0 - lbm->vips));
169 lb_hash_get(sticky_ht, key0, hash0, lb_time, &available_index0, &value0);
170 if (PREDICT_TRUE(value0 != ~0)) {
171 //Found an existing entry
172 as0 = &lbm->ass[value0];
173 } else if (PREDICT_TRUE(available_index0 != ~0)) {
174 //There is an available slot for a new flow
175 as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index];
176 if (PREDICT_FALSE(as0 == lbm->ass)) { //Special first element
177 error0 = LB_ERROR_NO_SERVER;
179 vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_TRACKED_SESSION],
180 cpu_index, vip0 - lbm->vips, 1);
183 //TODO: There are race conditions with as0 and vip0 manipulation.
184 //Configuration may be changed, vectors resized, etc...
186 //Dereference previously used
187 vlib_refcount_add(&lbm->as_refcount, cpu_index, lb_hash_available_value(sticky_ht, available_index0), -1);
188 vlib_refcount_add(&lbm->as_refcount, cpu_index, as0 - lbm->ass, 1);
191 //Note that when there is no AS configured, an entry is configured anyway.
192 //But no configured AS is not something that should happen
193 lb_hash_put(sticky_ht, key0, as0 - lbm->ass, available_index0, lb_time);
195 //Could not store new entry in the table
196 as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index];
197 vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_UNTRACKED_PACKET],
198 cpu_index, vip0 - lbm->vips, 1);
204 vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t));
205 ip40 = vlib_buffer_get_current(p0);
206 gre0 = (gre_header_t *)(ip40 + 1);
207 ip40->src_address = lbm->ip4_src_address;
208 ip40->dst_address = as0->address.ip4;
209 ip40->ip_version_and_header_length = 0x45;
211 ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t));
212 ip40->protocol = IP_PROTOCOL_GRE;
213 ip40->checksum = ip4_header_checksum (ip40);
216 vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t));
217 ip60 = vlib_buffer_get_current(p0);
218 gre0 = (gre_header_t *)(ip60 + 1);
219 ip60->dst_address = as0->address.ip6;
220 ip60->src_address = lbm->ip6_src_address;
221 ip60->hop_limit = 128;
222 ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28);
223 ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t));
224 ip60->protocol = IP_PROTOCOL_GRE;
227 gre0->flags_and_version = 0;
228 gre0->protocol = (is_input_v4)?
229 clib_host_to_net_u16(0x0800):
230 clib_host_to_net_u16(0x86DD);
232 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = as0->dpo.dpoi_index;
234 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
236 lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
237 tr->as_index = as0 - lbm->ass;
238 tr->vip_index = vip0 - lbm->vips;
241 p0->error = error_node->errors[error0];
242 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
244 as0->dpo.dpoi_next_node);
246 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
249 return frame->n_vectors;
253 lb6_gre6_node_fn (vlib_main_t * vm,
254 vlib_node_runtime_t * node, vlib_frame_t * frame)
256 return lb_node_fn(vm, node, frame, 0, 0);
260 lb6_gre4_node_fn (vlib_main_t * vm,
261 vlib_node_runtime_t * node, vlib_frame_t * frame)
263 return lb_node_fn(vm, node, frame, 0, 1);
267 lb4_gre6_node_fn (vlib_main_t * vm,
268 vlib_node_runtime_t * node, vlib_frame_t * frame)
270 return lb_node_fn(vm, node, frame, 1, 0);
274 lb4_gre4_node_fn (vlib_main_t * vm,
275 vlib_node_runtime_t * node, vlib_frame_t * frame)
277 return lb_node_fn(vm, node, frame, 1, 1);
280 VLIB_REGISTER_NODE (lb6_gre6_node) =
282 .function = lb6_gre6_node_fn,
284 .vector_size = sizeof (u32),
285 .format_trace = format_lb_trace,
287 .n_errors = LB_N_ERROR,
288 .error_strings = lb_error_strings,
290 .n_next_nodes = LB_N_NEXT,
293 [LB_NEXT_DROP] = "error-drop"
297 VLIB_REGISTER_NODE (lb6_gre4_node) =
299 .function = lb6_gre4_node_fn,
301 .vector_size = sizeof (u32),
302 .format_trace = format_lb_trace,
304 .n_errors = LB_N_ERROR,
305 .error_strings = lb_error_strings,
307 .n_next_nodes = LB_N_NEXT,
310 [LB_NEXT_DROP] = "error-drop"
314 VLIB_REGISTER_NODE (lb4_gre6_node) =
316 .function = lb4_gre6_node_fn,
318 .vector_size = sizeof (u32),
319 .format_trace = format_lb_trace,
321 .n_errors = LB_N_ERROR,
322 .error_strings = lb_error_strings,
324 .n_next_nodes = LB_N_NEXT,
327 [LB_NEXT_DROP] = "error-drop"
331 VLIB_REGISTER_NODE (lb4_gre4_node) =
333 .function = lb4_gre4_node_fn,
335 .vector_size = sizeof (u32),
336 .format_trace = format_lb_trace,
338 .n_errors = LB_N_ERROR,
339 .error_strings = lb_error_strings,
341 .n_next_nodes = LB_N_NEXT,
344 [LB_NEXT_DROP] = "error-drop"