2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
18 #include <vnet/gre/packet.h>
19 #include <lb/lbhash.h>
21 #define foreach_lb_error \
23 _(PROTO_NOT_SUPPORTED, "protocol not supported")
26 #define _(sym,str) LB_ERROR_##sym,
32 static char *lb_error_strings[] = {
33 #define _(sym,string) string,
44 format_lb_trace (u8 * s, va_list * args)
46 lb_main_t *lbm = &lb_main;
47 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
48 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
49 lb_trace_t *t = va_arg (*args, lb_trace_t *);
50 if (pool_is_free_index(lbm->vips, t->vip_index)) {
51 s = format(s, "lb vip[%d]: This VIP was freed since capture\n");
53 s = format(s, "lb vip[%d]: %U\n", t->vip_index, format_lb_vip, &lbm->vips[t->vip_index]);
55 if (pool_is_free_index(lbm->ass, t->as_index)) {
56 s = format(s, "lb as[%d]: This AS was freed since capture\n");
58 s = format(s, "lb as[%d]: %U\n", t->as_index, format_lb_as, &lbm->ass[t->as_index]);
63 lb_hash_t *lb_get_sticky_table(u32 thread_index)
65 lb_main_t *lbm = &lb_main;
66 lb_hash_t *sticky_ht = lbm->per_cpu[thread_index].sticky_ht;
67 //Check if size changed
68 if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht))))
70 //Dereference everything in there
73 lb_hash_foreach_entry(sticky_ht, b, i) {
74 vlib_refcount_add(&lbm->as_refcount, thread_index, b->value[i], -1);
75 vlib_refcount_add(&lbm->as_refcount, thread_index, 0, 1);
78 lb_hash_free(sticky_ht);
83 if (PREDICT_FALSE(sticky_ht == NULL)) {
84 lbm->per_cpu[thread_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout);
85 sticky_ht = lbm->per_cpu[thread_index].sticky_ht;
86 clib_warning("Regenerated sticky table %p", sticky_ht);
92 sticky_ht->timeout = lbm->flow_timeout;
97 lb_node_get_other_ports4(ip4_header_t *ip40)
103 lb_node_get_other_ports6(ip6_header_t *ip60)
108 static_always_inline u32
109 lb_node_get_hash(vlib_buffer_t *p, u8 is_input_v4)
116 ip40 = vlib_buffer_get_current (p);
117 if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP ||
118 ip40->protocol == IP_PROTOCOL_UDP))
119 ports = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 16) |
120 ((u64)((udp_header_t *)(ip40 + 1))->dst_port);
122 ports = lb_node_get_other_ports4(ip40);
124 hash = lb_hash_hash(*((u64 *)&ip40->address_pair), ports,
130 ip60 = vlib_buffer_get_current (p);
132 if (PREDICT_TRUE (ip60->protocol == IP_PROTOCOL_TCP ||
133 ip60->protocol == IP_PROTOCOL_UDP))
134 ports = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 16) |
135 ((u64)((udp_header_t *)(ip60 + 1))->dst_port);
137 ports = lb_node_get_other_ports6(ip60);
139 hash = lb_hash_hash(ip60->src_address.as_u64[0],
140 ip60->src_address.as_u64[1],
141 ip60->dst_address.as_u64[0],
142 ip60->dst_address.as_u64[1],
148 static_always_inline uword
149 lb_node_fn (vlib_main_t * vm,
150 vlib_node_runtime_t * node, vlib_frame_t * frame,
151 u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6)
152 u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6)
154 lb_main_t *lbm = &lb_main;
155 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
156 u32 thread_index = vlib_get_thread_index();
157 u32 lb_time = lb_hash_time_now(vm);
159 lb_hash_t *sticky_ht = lb_get_sticky_table(thread_index);
160 from = vlib_frame_vector_args (frame);
161 n_left_from = frame->n_vectors;
162 next_index = node->cached_next_index;
165 if (PREDICT_TRUE(n_left_from > 0))
166 nexthash0 = lb_node_get_hash(vlib_get_buffer (vm, from[0]), is_input_v4);
168 while (n_left_from > 0)
170 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
171 while (n_left_from > 0 && n_left_to_next > 0)
178 u32 available_index0;
180 u32 hash0 = nexthash0;
182 if (PREDICT_TRUE(n_left_from > 1))
184 vlib_buffer_t *p1 = vlib_get_buffer (vm, from[1]);
185 //Compute next hash and prefetch bucket
186 nexthash0 = lb_node_get_hash(p1, is_input_v4);
187 lb_hash_prefetch_bucket(sticky_ht, nexthash0);
188 //Prefetch for encap, next
189 CLIB_PREFETCH (vlib_buffer_get_current(p1) - 64, 64, STORE);
192 if (PREDICT_TRUE(n_left_from > 2))
195 p2 = vlib_get_buffer(vm, from[2]);
196 /* prefetch packet header and data */
197 vlib_prefetch_buffer_header(p2, STORE);
198 CLIB_PREFETCH (vlib_buffer_get_current(p2), 64, STORE);
201 pi0 = to_next[0] = from[0];
207 p0 = vlib_get_buffer (vm, pi0);
208 vip0 = pool_elt_at_index (lbm->vips,
209 vnet_buffer (p0)->ip.adj_index[VLIB_TX]);
214 ip40 = vlib_buffer_get_current (p0);
215 len0 = clib_net_to_host_u16(ip40->length);
220 ip60 = vlib_buffer_get_current (p0);
221 len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t);
224 lb_hash_get(sticky_ht, hash0, vnet_buffer (p0)->ip.adj_index[VLIB_TX],
225 lb_time, &available_index0, &asindex0);
227 if (PREDICT_TRUE(asindex0 != ~0))
229 //Found an existing entry
230 counter = LB_VIP_COUNTER_NEXT_PACKET;
232 else if (PREDICT_TRUE(available_index0 != ~0))
234 //There is an available slot for a new flow
235 asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index;
236 counter = LB_VIP_COUNTER_FIRST_PACKET;
237 counter = (asindex0 == 0)?LB_VIP_COUNTER_NO_SERVER:counter;
239 //TODO: There are race conditions with as0 and vip0 manipulation.
240 //Configuration may be changed, vectors resized, etc...
242 //Dereference previously used
243 vlib_refcount_add(&lbm->as_refcount, thread_index,
244 lb_hash_available_value(sticky_ht, hash0, available_index0), -1);
245 vlib_refcount_add(&lbm->as_refcount, thread_index,
249 //Note that when there is no AS configured, an entry is configured anyway.
250 //But no configured AS is not something that should happen
251 lb_hash_put(sticky_ht, hash0, asindex0,
252 vnet_buffer (p0)->ip.adj_index[VLIB_TX],
253 available_index0, lb_time);
257 //Could not store new entry in the table
258 asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index;
259 counter = LB_VIP_COUNTER_UNTRACKED_PACKET;
262 vlib_increment_simple_counter(&lbm->vip_counters[counter],
264 vnet_buffer (p0)->ip.adj_index[VLIB_TX],
273 vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t));
274 ip40 = vlib_buffer_get_current(p0);
275 gre0 = (gre_header_t *)(ip40 + 1);
276 ip40->src_address = lbm->ip4_src_address;
277 ip40->dst_address = lbm->ass[asindex0].address.ip4;
278 ip40->ip_version_and_header_length = 0x45;
280 ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t));
281 ip40->protocol = IP_PROTOCOL_GRE;
282 ip40->checksum = ip4_header_checksum (ip40);
287 vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t));
288 ip60 = vlib_buffer_get_current(p0);
289 gre0 = (gre_header_t *)(ip60 + 1);
290 ip60->dst_address = lbm->ass[asindex0].address.ip6;
291 ip60->src_address = lbm->ip6_src_address;
292 ip60->hop_limit = 128;
293 ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28);
294 ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t));
295 ip60->protocol = IP_PROTOCOL_GRE;
298 gre0->flags_and_version = 0;
299 gre0->protocol = (is_input_v4)?
300 clib_host_to_net_u16(0x0800):
301 clib_host_to_net_u16(0x86DD);
304 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
306 lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
307 tr->as_index = asindex0;
308 tr->vip_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
312 //Note that this is going to error if asindex0 == 0
313 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbm->ass[asindex0].dpo.dpoi_index;
314 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
316 lbm->ass[asindex0].dpo.dpoi_next_node);
318 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
321 return frame->n_vectors;
325 lb6_gre6_node_fn (vlib_main_t * vm,
326 vlib_node_runtime_t * node, vlib_frame_t * frame)
328 return lb_node_fn(vm, node, frame, 0, 0);
332 lb6_gre4_node_fn (vlib_main_t * vm,
333 vlib_node_runtime_t * node, vlib_frame_t * frame)
335 return lb_node_fn(vm, node, frame, 0, 1);
339 lb4_gre6_node_fn (vlib_main_t * vm,
340 vlib_node_runtime_t * node, vlib_frame_t * frame)
342 return lb_node_fn(vm, node, frame, 1, 0);
346 lb4_gre4_node_fn (vlib_main_t * vm,
347 vlib_node_runtime_t * node, vlib_frame_t * frame)
349 return lb_node_fn(vm, node, frame, 1, 1);
352 VLIB_REGISTER_NODE (lb6_gre6_node) =
354 .function = lb6_gre6_node_fn,
356 .vector_size = sizeof (u32),
357 .format_trace = format_lb_trace,
359 .n_errors = LB_N_ERROR,
360 .error_strings = lb_error_strings,
362 .n_next_nodes = LB_N_NEXT,
365 [LB_NEXT_DROP] = "error-drop"
369 VLIB_REGISTER_NODE (lb6_gre4_node) =
371 .function = lb6_gre4_node_fn,
373 .vector_size = sizeof (u32),
374 .format_trace = format_lb_trace,
376 .n_errors = LB_N_ERROR,
377 .error_strings = lb_error_strings,
379 .n_next_nodes = LB_N_NEXT,
382 [LB_NEXT_DROP] = "error-drop"
386 VLIB_REGISTER_NODE (lb4_gre6_node) =
388 .function = lb4_gre6_node_fn,
390 .vector_size = sizeof (u32),
391 .format_trace = format_lb_trace,
393 .n_errors = LB_N_ERROR,
394 .error_strings = lb_error_strings,
396 .n_next_nodes = LB_N_NEXT,
399 [LB_NEXT_DROP] = "error-drop"
403 VLIB_REGISTER_NODE (lb4_gre4_node) =
405 .function = lb4_gre4_node_fn,
407 .vector_size = sizeof (u32),
408 .format_trace = format_lb_trace,
410 .n_errors = LB_N_ERROR,
411 .error_strings = lb_error_strings,
413 .n_next_nodes = LB_N_NEXT,
416 [LB_NEXT_DROP] = "error-drop"