3 * Copyright (c) 2016 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <vnet/vnet.h>
18 #include <vppinfra/xxhash.h>
19 #include <vlib/threads.h>
20 #include <vnet/handoff.h>
23 uword * workers_bitmap;
25 } per_inteface_handoff_data_t;
28 u32 cached_next_index;
30 u32 first_worker_index;
32 per_inteface_handoff_data_t * if_data;
34 /* convenience variables */
35 vlib_main_t * vlib_main;
36 vnet_main_t * vnet_main;
39 handoff_main_t handoff_main;
43 u32 next_worker_index;
45 } worker_handoff_trace_t;
47 /* packet trace format function */
48 static u8 * format_worker_handoff_trace (u8 * s, va_list * args)
50 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
51 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
52 worker_handoff_trace_t * t = va_arg (*args, worker_handoff_trace_t *);
54 s = format (s, "worker-handoff: sw_if_index %d, next_worker %d, buffer 0x%x",
55 t->sw_if_index, t->next_worker_index, t->buffer_index);
59 vlib_node_registration_t handoff_node;
62 worker_handoff_node_fn (vlib_main_t * vm,
63 vlib_node_runtime_t * node,
66 handoff_main_t * hm = &handoff_main;
67 vlib_thread_main_t * tm = vlib_get_thread_main();
68 u32 n_left_from, * from;
69 static __thread vlib_frame_queue_elt_t ** handoff_queue_elt_by_worker_index;
70 static __thread vlib_frame_queue_t ** congested_handoff_queue_by_worker_index = 0;
71 vlib_frame_queue_elt_t * hf = 0;
73 u32 n_left_to_next_worker = 0, * to_next_worker = 0;
74 u32 next_worker_index = 0;
75 u32 current_worker_index = ~0;
77 if (PREDICT_FALSE(handoff_queue_elt_by_worker_index == 0))
79 vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
81 vec_validate_init_empty (congested_handoff_queue_by_worker_index,
82 hm->first_worker_index + hm->num_workers - 1,
83 (vlib_frame_queue_t *)(~0));
86 from = vlib_frame_vector_args (frame);
87 n_left_from = frame->n_vectors;
89 while (n_left_from > 0)
96 per_inteface_handoff_data_t * ihd0;
103 b0 = vlib_get_buffer (vm, bi0);
104 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
105 ASSERT (hm->if_data);
106 ihd0 = vec_elt_at_index (hm->if_data, sw_if_index0);
108 next_worker_index = hm->first_worker_index;
111 * Force unknown traffic onto worker 0,
112 * and into ethernet-input. $$$$ add more hashes.
115 /* Compute ingress LB hash */
116 hash_key = eth_get_key ((ethernet_header_t *) b0->data);
117 hash = (u32) clib_xxhash (hash_key);
119 /* if input node did not specify next index, then packet
120 should go to eternet-input */
121 if (PREDICT_FALSE ((b0->flags & BUFFER_HANDOFF_NEXT_VALID) == 0))
122 vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT;
123 else if (vnet_buffer(b0)->handoff.next_index == HANDOFF_DISPATCH_NEXT_IP4_INPUT ||
124 vnet_buffer(b0)->handoff.next_index == HANDOFF_DISPATCH_NEXT_IP6_INPUT ||
125 vnet_buffer(b0)->handoff.next_index == HANDOFF_DISPATCH_NEXT_MPLS_INPUT)
126 vlib_buffer_advance (b0, (sizeof(ethernet_header_t)));
128 if (PREDICT_TRUE (is_pow2 (vec_len (ihd0->workers))))
129 index0 = hash & (vec_len (ihd0->workers) - 1);
131 index0 = hash % vec_len (ihd0->workers);
133 next_worker_index += ihd0->workers[index0];
135 if (next_worker_index != current_worker_index)
138 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
140 hf = dpdk_get_handoff_queue_elt(next_worker_index,
141 handoff_queue_elt_by_worker_index);
143 n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
144 to_next_worker = &hf->buffer_index[hf->n_vectors];
145 current_worker_index = next_worker_index;
148 /* enqueue to correct worker thread */
149 to_next_worker[0] = bi0;
151 n_left_to_next_worker--;
153 if (n_left_to_next_worker == 0)
155 hf->n_vectors = VLIB_FRAME_SIZE;
156 vlib_put_handoff_queue_elt(hf);
157 current_worker_index = ~0;
158 handoff_queue_elt_by_worker_index[next_worker_index] = 0;
162 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
163 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
165 worker_handoff_trace_t *t =
166 vlib_add_trace (vm, node, b0, sizeof (*t));
167 t->sw_if_index = sw_if_index0;
168 t->next_worker_index = next_worker_index - hm->first_worker_index;
169 t->buffer_index = bi0;
175 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
177 /* Ship frames to the worker nodes */
178 for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
180 if (handoff_queue_elt_by_worker_index[i])
182 hf = handoff_queue_elt_by_worker_index[i];
184 * It works better to let the handoff node
185 * rate-adapt, always ship the handoff queue element.
187 if (1 || hf->n_vectors == hf->last_n_vectors)
189 vlib_put_handoff_queue_elt(hf);
190 handoff_queue_elt_by_worker_index[i] = 0;
193 hf->last_n_vectors = hf->n_vectors;
195 congested_handoff_queue_by_worker_index[i] = (vlib_frame_queue_t *)(~0);
198 current_worker_index = ~0;
199 return frame->n_vectors;
202 VLIB_REGISTER_NODE (worker_handoff_node) = {
203 .function = worker_handoff_node_fn,
204 .name = "worker-handoff",
205 .vector_size = sizeof (u32),
206 .format_trace = format_worker_handoff_trace,
207 .type = VLIB_NODE_TYPE_INTERNAL,
215 VLIB_NODE_FUNCTION_MULTIARCH (worker_handoff_node, worker_handoff_node_fn)
217 int interface_handoff_enable_disable (vlib_main_t * vm, u32 sw_if_index,
218 uword * bitmap, int enable_disable)
220 handoff_main_t * hm = &handoff_main;
221 vnet_sw_interface_t * sw;
222 vnet_main_t * vnm = vnet_get_main();
223 per_inteface_handoff_data_t * d;
225 u32 node_index = enable_disable ? worker_handoff_node.index : ~0;
227 if (pool_is_free_index (vnm->interface_main.sw_interfaces,
229 return VNET_API_ERROR_INVALID_SW_IF_INDEX;
231 sw = vnet_get_sw_interface (vnm, sw_if_index);
232 if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
233 return VNET_API_ERROR_INVALID_SW_IF_INDEX;
235 if (clib_bitmap_last_set(bitmap) >= hm->num_workers)
236 return VNET_API_ERROR_INVALID_WORKER;
238 vec_validate (hm->if_data, sw_if_index);
239 d = vec_elt_at_index(hm->if_data, sw_if_index);
241 vec_free (d->workers);
242 vec_free (d->workers_bitmap);
246 d->workers_bitmap = bitmap;
247 clib_bitmap_foreach (i, bitmap,
249 vec_add1(d->workers, i);
253 rv = vnet_hw_interface_rx_redirect_to_node (vnm, sw_if_index, node_index);
257 static clib_error_t *
258 set_interface_handoff_command_fn (vlib_main_t * vm,
259 unformat_input_t * input,
260 vlib_cli_command_t * cmd)
262 u32 sw_if_index = ~0;
263 int enable_disable = 1;
268 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
269 if (unformat (input, "disable"))
271 else if (unformat (input, "workers %U", unformat_bitmap_list,
274 else if (unformat (input, "%U", unformat_vnet_sw_interface,
275 vnet_get_main(), &sw_if_index))
281 if (sw_if_index == ~0)
282 return clib_error_return (0, "Please specify an interface...");
285 return clib_error_return (0, "Please specify list of workers...");
287 rv = interface_handoff_enable_disable (vm, sw_if_index, bitmap, enable_disable);
293 case VNET_API_ERROR_INVALID_SW_IF_INDEX:
294 return clib_error_return (0, "Invalid interface");
297 case VNET_API_ERROR_INVALID_WORKER:
298 return clib_error_return (0, "Invalid worker(s)");
301 case VNET_API_ERROR_UNIMPLEMENTED:
302 return clib_error_return (0, "Device driver doesn't support redirection");
306 return clib_error_return (0, "unknown return value %d", rv);
311 VLIB_CLI_COMMAND (set_interface_handoff_command, static) = {
312 .path = "set interface handoff",
314 "set interface handoff <interface-name> workers <workers-list>",
315 .function = set_interface_handoff_command_fn,
322 } handoff_dispatch_trace_t;
324 /* packet trace format function */
325 static u8 * format_handoff_dispatch_trace (u8 * s, va_list * args)
327 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
328 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
329 handoff_dispatch_trace_t * t = va_arg (*args, handoff_dispatch_trace_t *);
331 s = format (s, "handoff-dispatch: sw_if_index %d next_index %d buffer 0x%x",
339 vlib_node_registration_t handoff_dispatch_node;
341 #define foreach_handoff_dispatch_error \
342 _(EXAMPLE, "example packets")
345 #define _(sym,str) HANDOFF_DISPATCH_ERROR_##sym,
346 foreach_handoff_dispatch_error
348 HANDOFF_DISPATCH_N_ERROR,
349 } handoff_dispatch_error_t;
351 static char * handoff_dispatch_error_strings[] = {
352 #define _(sym,string) string,
353 foreach_handoff_dispatch_error
358 handoff_dispatch_node_fn (vlib_main_t * vm,
359 vlib_node_runtime_t * node,
360 vlib_frame_t * frame)
362 u32 n_left_from, * from, * to_next;
363 handoff_dispatch_next_t next_index;
365 from = vlib_frame_vector_args (frame);
366 n_left_from = frame->n_vectors;
367 next_index = node->cached_next_index;
369 while (n_left_from > 0)
373 vlib_get_next_frame (vm, node, next_index,
374 to_next, n_left_to_next);
376 while (n_left_from >= 4 && n_left_to_next >= 2)
379 vlib_buffer_t * b0, * b1;
381 u32 sw_if_index0, sw_if_index1;
383 /* Prefetch next iteration. */
385 vlib_buffer_t * p2, * p3;
387 p2 = vlib_get_buffer (vm, from[2]);
388 p3 = vlib_get_buffer (vm, from[3]);
390 vlib_prefetch_buffer_header (p2, LOAD);
391 vlib_prefetch_buffer_header (p3, LOAD);
394 /* speculatively enqueue b0 and b1 to the current next frame */
395 to_next[0] = bi0 = from[0];
396 to_next[1] = bi1 = from[1];
402 b0 = vlib_get_buffer (vm, bi0);
403 b1 = vlib_get_buffer (vm, bi1);
405 next0 = vnet_buffer(b0)->handoff.next_index;
406 next1 = vnet_buffer(b1)->handoff.next_index;
408 if (PREDICT_FALSE(vm->trace_main.trace_active_hint))
410 if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
412 vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
413 handoff_dispatch_trace_t *t =
414 vlib_add_trace (vm, node, b0, sizeof (*t));
415 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
416 t->sw_if_index = sw_if_index0;
417 t->next_index = next0;
418 t->buffer_index = bi0;
420 if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
422 vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */ 0);
423 handoff_dispatch_trace_t *t =
424 vlib_add_trace (vm, node, b1, sizeof (*t));
425 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
426 t->sw_if_index = sw_if_index1;
427 t->next_index = next1;
428 t->buffer_index = bi1;
432 /* verify speculative enqueues, maybe switch current next frame */
433 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
434 to_next, n_left_to_next,
435 bi0, bi1, next0, next1);
438 while (n_left_from > 0 && n_left_to_next > 0)
445 /* speculatively enqueue b0 to the current next frame */
453 b0 = vlib_get_buffer (vm, bi0);
455 next0 = vnet_buffer(b0)->handoff.next_index;
457 if (PREDICT_FALSE(vm->trace_main.trace_active_hint))
459 if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
461 vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
462 handoff_dispatch_trace_t *t =
463 vlib_add_trace (vm, node, b0, sizeof (*t));
464 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
465 t->sw_if_index = sw_if_index0;
466 t->next_index = next0;
467 t->buffer_index = bi0;
471 /* verify speculative enqueue, maybe switch current next frame */
472 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
473 to_next, n_left_to_next,
477 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
480 return frame->n_vectors;
483 VLIB_REGISTER_NODE (handoff_dispatch_node) = {
484 .function = handoff_dispatch_node_fn,
485 .name = "handoff-dispatch",
486 .vector_size = sizeof (u32),
487 .format_trace = format_handoff_dispatch_trace,
488 .type = VLIB_NODE_TYPE_INTERNAL,
489 .flags = VLIB_NODE_FLAG_IS_HANDOFF,
491 .n_errors = ARRAY_LEN(handoff_dispatch_error_strings),
492 .error_strings = handoff_dispatch_error_strings,
494 .n_next_nodes = HANDOFF_DISPATCH_N_NEXT,
497 [HANDOFF_DISPATCH_NEXT_DROP] = "error-drop",
498 [HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT] = "ethernet-input",
499 [HANDOFF_DISPATCH_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
500 [HANDOFF_DISPATCH_NEXT_IP6_INPUT] = "ip6-input",
501 [HANDOFF_DISPATCH_NEXT_MPLS_INPUT] = "mpls-gre-input",
505 VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn)
507 clib_error_t *handoff_init (vlib_main_t *vm)
509 handoff_main_t * hm = &handoff_main;
510 vlib_thread_main_t * tm = vlib_get_thread_main();
511 clib_error_t * error;
514 if ((error = vlib_call_init_function (vm, threads_init)))
517 vlib_thread_registration_t * tr;
518 /* Only the standard vnet worker threads are supported */
519 p = hash_get_mem (tm->thread_registrations_by_name, "workers");
522 tr = (vlib_thread_registration_t *) p[0];
525 hm->num_workers = tr->count;
526 hm->first_worker_index = tr->first_index;
531 hm->vnet_main = &vnet_main;
533 ASSERT (tm->handoff_dispatch_node_index == ~0);
534 tm->handoff_dispatch_node_index = handoff_dispatch_node.index;
539 VLIB_INIT_FUNCTION (handoff_init);