2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
18 * @brief IPv4 Shallow Virtual Reassembly.
20 * This file contains the source code for IPv4 Shallow Virtual reassembly.
23 #include <vppinfra/vec.h>
24 #include <vnet/vnet.h>
25 #include <vnet/ip/ip.h>
26 #include <vnet/ip/ip4_to_ip6.h>
27 #include <vppinfra/fifo.h>
28 #include <vppinfra/bihash_16_8.h>
29 #include <vnet/ip/reass/ip4_sv_reass.h>
31 #define MSEC_PER_SEC 1000
32 #define IP4_SV_REASS_TIMEOUT_DEFAULT_MS 100
33 #define IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
34 #define IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024
35 #define IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
36 #define IP4_SV_REASS_HT_LOAD_FACTOR (0.75)
41 IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS,
42 IP4_SV_REASS_RC_UNSUPP_IP_PROTO,
79 clib_bihash_kv_16_8_t kv;
85 ip4_sv_reass_key_t key;
86 // time when last packet was received
88 // internal id of this reassembly
90 // trace operation counter
92 // minimum fragment length for this reassembly - used to estimate MTU
93 u16 min_fragment_length;
94 // buffer indexes of buffers in this reassembly in chronological order -
95 // including overlaps and duplicate fragments
97 // set to true when this reassembly is completed
113 ip4_sv_reass_t *pool;
116 clib_spinlock_t lock;
121 } ip4_sv_reass_per_thread_t;
128 u32 expire_walk_interval_ms;
129 // maximum number of fragments in one reassembly
131 // maximum number of reassemblies
135 clib_bihash_16_8_t hash;
137 ip4_sv_reass_per_thread_t *per_thread_data;
140 vlib_main_t *vlib_main;
141 vnet_main_t *vnet_main;
143 // node index of ip4-drop node
145 u32 ip4_sv_reass_expire_node_idx;
147 /** Worker handoff */
149 u32 fq_feature_index;
151 // reference count for enabling/disabling feature - per interface
152 u32 *feature_use_refcount_per_intf;
154 } ip4_sv_reass_main_t;
156 extern ip4_sv_reass_main_t ip4_sv_reass_main;
158 #ifndef CLIB_MARCH_VARIANT
159 ip4_sv_reass_main_t ip4_sv_reass_main;
160 #endif /* CLIB_MARCH_VARIANT */
164 IP4_SV_REASSEMBLY_NEXT_INPUT,
165 IP4_SV_REASSEMBLY_NEXT_DROP,
166 IP4_SV_REASSEMBLY_NEXT_HANDOFF,
167 IP4_SV_REASSEMBLY_N_NEXT,
168 } ip4_sv_reass_next_t;
172 REASS_FRAGMENT_CACHE,
174 REASS_FRAGMENT_FORWARD,
175 } ip4_sv_reass_trace_operation_e;
179 ip4_sv_reass_trace_operation_e action;
185 } ip4_sv_reass_trace_t;
187 extern vlib_node_registration_t ip4_sv_reass_node;
188 extern vlib_node_registration_t ip4_sv_reass_node_feature;
191 format_ip4_sv_reass_trace (u8 * s, va_list * args)
193 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
194 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
195 ip4_sv_reass_trace_t *t = va_arg (*args, ip4_sv_reass_trace_t *);
196 s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
199 case REASS_FRAGMENT_CACHE:
200 s = format (s, "[cached]");
204 format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]",
205 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
206 clib_net_to_host_u16 (t->l4_dst_port));
208 case REASS_FRAGMENT_FORWARD:
210 format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]",
211 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
212 clib_net_to_host_u16 (t->l4_dst_port));
219 ip4_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
220 ip4_sv_reass_main_t * rm, ip4_sv_reass_t * reass,
221 u32 bi, ip4_sv_reass_trace_operation_e action,
222 u32 ip_proto, u16 l4_src_port, u16 l4_dst_port)
224 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
225 ip4_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
226 t->reass_id = reass->id;
228 t->op_id = reass->trace_op_counter;
229 t->ip_proto = ip_proto;
230 t->l4_src_port = l4_src_port;
231 t->l4_dst_port = l4_dst_port;
232 ++reass->trace_op_counter;
235 s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t);
236 printf ("%.*s\n", vec_len (s), s);
238 vec_reset_length (s);
244 ip4_sv_reass_free (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
245 ip4_sv_reass_per_thread_t * rt, ip4_sv_reass_t * reass)
247 clib_bihash_kv_16_8_t kv;
248 kv.key[0] = reass->key.as_u64[0];
249 kv.key[1] = reass->key.as_u64[1];
250 clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
251 vlib_buffer_free (vm, reass->cached_buffers,
252 vec_len (reass->cached_buffers));
253 vec_free (reass->cached_buffers);
254 reass->cached_buffers = NULL;
255 if (~0 != reass->lru_prev)
257 ip4_sv_reass_t *lru_prev =
258 pool_elt_at_index (rt->pool, reass->lru_prev);
259 lru_prev->lru_next = reass->lru_next;
261 if (~0 != reass->lru_next)
263 ip4_sv_reass_t *lru_next =
264 pool_elt_at_index (rt->pool, reass->lru_next);
265 lru_next->lru_prev = reass->lru_prev;
267 if (rt->lru_first == reass - rt->pool)
269 rt->lru_first = reass->lru_next;
271 if (rt->lru_last == reass - rt->pool)
273 rt->lru_last = reass->lru_prev;
275 pool_put (rt->pool, reass);
280 ip4_sv_reass_init (ip4_sv_reass_t * reass)
282 reass->cached_buffers = NULL;
283 reass->is_complete = false;
286 always_inline ip4_sv_reass_t *
287 ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
288 ip4_sv_reass_per_thread_t * rt,
289 ip4_sv_reass_kv_t * kv, u8 * do_handoff)
291 ip4_sv_reass_t *reass = NULL;
292 f64 now = vlib_time_now (rm->vlib_main);
294 if (!clib_bihash_search_16_8
295 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, (clib_bihash_kv_16_8_t *) kv))
297 if (vm->thread_index != kv->v.thread_index)
302 reass = pool_elt_at_index (rt->pool, kv->v.reass_index);
304 if (now > reass->last_heard + rm->timeout)
306 ip4_sv_reass_free (vm, rm, rt, reass);
313 reass->last_heard = now;
317 if (rt->reass_n >= rm->max_reass_n && rm->max_reass_n)
319 reass = pool_elt_at_index (rt->pool, rt->lru_last);
320 ip4_sv_reass_free (vm, rm, rt, reass);
323 pool_get (rt->pool, reass);
324 clib_memset (reass, 0, sizeof (*reass));
325 reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
327 ip4_sv_reass_init (reass);
329 reass->lru_prev = reass->lru_next = ~0;
331 if (~0 != rt->lru_last)
333 ip4_sv_reass_t *lru_last = pool_elt_at_index (rt->pool, rt->lru_last);
334 reass->lru_prev = rt->lru_last;
335 lru_last->lru_next = rt->lru_last = reass - rt->pool;
338 if (~0 == rt->lru_first)
340 rt->lru_first = rt->lru_last = reass - rt->pool;
343 reass->key.as_u64[0] = ((clib_bihash_kv_16_8_t *) kv)->key[0];
344 reass->key.as_u64[1] = ((clib_bihash_kv_16_8_t *) kv)->key[1];
345 kv->v.reass_index = (reass - rt->pool);
346 kv->v.thread_index = vm->thread_index;
347 reass->last_heard = now;
349 if (clib_bihash_add_del_16_8 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, 1))
351 ip4_sv_reass_free (vm, rm, rt, reass);
358 always_inline ip4_sv_reass_rc_t
359 ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
360 ip4_sv_reass_main_t * rm, ip4_sv_reass_per_thread_t * rt,
361 ip4_sv_reass_t * reass, u32 bi0)
363 vlib_buffer_t *fb = vlib_get_buffer (vm, bi0);
364 ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK;
365 ip4_header_t *fip = vlib_buffer_get_current (fb);
366 const u32 fragment_first = ip4_get_fragment_offset_bytes (fip);
367 if (0 == fragment_first)
369 reass->ip_proto = fip->protocol;
370 reass->l4_src_port = ip4_get_port (fip, 1);
371 reass->l4_dst_port = ip4_get_port (fip, 0);
372 if (!reass->l4_src_port || !reass->l4_dst_port)
373 return IP4_SV_REASS_RC_UNSUPP_IP_PROTO;
374 reass->is_complete = true;
375 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
376 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
378 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0, REASS_FINISH,
379 reass->ip_proto, reass->l4_src_port,
383 vec_add1 (reass->cached_buffers, bi0);
384 if (!reass->is_complete)
386 if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
388 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
389 REASS_FRAGMENT_CACHE, ~0, ~0, ~0);
391 if (vec_len (reass->cached_buffers) > rm->max_reass_len)
393 rc = IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS;
400 ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
401 vlib_frame_t * frame, bool is_feature, bool is_custom)
403 u32 *from = vlib_frame_vector_args (frame);
404 u32 n_left_from, n_left_to_next, *to_next, next_index;
405 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
406 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
407 clib_spinlock_lock (&rt->lock);
409 n_left_from = frame->n_vectors;
410 next_index = node->cached_next_index;
412 while (n_left_from > 0)
414 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
416 while (n_left_from > 0 && n_left_to_next > 0)
421 u32 error0 = IP4_ERROR_NONE;
424 b0 = vlib_get_buffer (vm, bi0);
426 ip4_header_t *ip0 = vlib_buffer_get_current (b0);
427 if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
429 // this is a regular packet - no fragmentation
430 vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
431 vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
432 vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
433 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
436 const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
437 const u32 fragment_length =
438 clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
439 const u32 fragment_last = fragment_first + fragment_length - 1;
440 if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791
442 next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
443 error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
446 ip4_sv_reass_kv_t kv;
450 (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
451 vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
452 (u64) ip0->src_address.as_u32 << 32;
454 (u64) ip0->dst_address.
455 as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
457 ip4_sv_reass_t *reass =
458 ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
460 if (PREDICT_FALSE (do_handoff))
462 next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF;
463 vnet_buffer (b0)->ip.reass.owner_thread_index =
470 next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
471 error0 = IP4_ERROR_REASS_LIMIT_REACHED;
475 if (reass->is_complete)
477 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
478 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
479 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
480 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
481 error0 = IP4_ERROR_NONE;
482 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
484 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
485 REASS_FRAGMENT_FORWARD,
493 ip4_sv_reass_rc_t rc =
494 ip4_sv_reass_update (vm, node, rm, rt, reass, bi0);
497 case IP4_SV_REASS_RC_OK:
498 /* nothing to do here */
500 case IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS:
501 vlib_node_increment_counter (vm, node->node_index,
502 IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
504 ip4_sv_reass_free (vm, rm, rt, reass);
507 case IP4_SV_REASS_RC_UNSUPP_IP_PROTO:
508 vlib_node_increment_counter (vm, node->node_index,
509 IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
511 ip4_sv_reass_free (vm, rm, rt, reass);
515 if (reass->is_complete)
518 vec_foreach_index (idx, reass->cached_buffers)
520 u32 bi0 = vec_elt (reass->cached_buffers, idx);
521 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
522 u32 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
525 vnet_feature_next (&next0, b0);
529 next0 = vnet_buffer (b0)->ip.reass.next_index;
531 if (0 == n_left_to_next)
533 vlib_put_next_frame (vm, node, next_index,
535 vlib_get_next_frame (vm, node, next_index, to_next,
541 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
542 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
543 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
544 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
546 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
547 REASS_FRAGMENT_FORWARD,
552 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
553 to_next, n_left_to_next, bi0,
556 _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now
561 b0->error = node->errors[error0];
566 if (is_feature && IP4_ERROR_NONE == error0)
568 b0 = vlib_get_buffer (vm, bi0);
569 vnet_feature_next (&next0, b0);
573 next0 = vnet_buffer (b0)->ip.reass.next_index;
575 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
576 to_next, n_left_to_next,
584 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
587 clib_spinlock_unlock (&rt->lock);
588 return frame->n_vectors;
591 static char *ip4_sv_reass_error_strings[] = {
592 #define _(sym, string) string,
597 VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm,
598 vlib_node_runtime_t * node,
599 vlib_frame_t * frame)
601 return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
602 false /* is_custom */ );
606 VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
607 .name = "ip4-sv-reassembly",
608 .vector_size = sizeof (u32),
609 .format_trace = format_ip4_sv_reass_trace,
610 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
611 .error_strings = ip4_sv_reass_error_strings,
612 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
615 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
616 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
617 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-handoff",
623 VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm,
624 vlib_node_runtime_t * node,
625 vlib_frame_t * frame)
627 return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
628 false /* is_custom */ );
632 VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
633 .name = "ip4-sv-reassembly-feature",
634 .vector_size = sizeof (u32),
635 .format_trace = format_ip4_sv_reass_trace,
636 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
637 .error_strings = ip4_sv_reass_error_strings,
638 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
641 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
642 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
643 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
649 VNET_FEATURE_INIT (ip4_sv_reass_feature) = {
650 .arc_name = "ip4-unicast",
651 .node_name = "ip4-sv-reassembly-feature",
652 .runs_before = VNET_FEATURES ("ip4-lookup"),
658 VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
659 .name = "ip4-sv-reassembly-custom-next",
660 .vector_size = sizeof (u32),
661 .format_trace = format_ip4_sv_reass_trace,
662 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
663 .error_strings = ip4_sv_reass_error_strings,
664 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
667 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
668 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
669 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-handoff",
675 VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm,
676 vlib_node_runtime_t * node,
677 vlib_frame_t * frame)
679 return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
680 true /* is_custom */ );
683 #ifndef CLIB_MARCH_VARIANT
685 ip4_sv_reass_get_nbuckets ()
687 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
691 nbuckets = (u32) (rm->max_reass_n / IP4_SV_REASS_HT_LOAD_FACTOR);
693 for (i = 0; i < 31; i++)
694 if ((1 << i) >= nbuckets)
700 #endif /* CLIB_MARCH_VARIANT */
704 IP4_EVENT_CONFIG_CHANGED = 1,
705 } ip4_sv_reass_event_t;
710 clib_bihash_16_8_t *new_hash;
713 #ifndef CLIB_MARCH_VARIANT
715 ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx)
717 ip4_rehash_cb_ctx *ctx = _ctx;
718 if (clib_bihash_add_del_16_8 (ctx->new_hash, kv, 1))
722 return (BIHASH_WALK_CONTINUE);
726 ip4_sv_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
727 u32 max_reassembly_length,
728 u32 expire_walk_interval_ms)
730 ip4_sv_reass_main.timeout_ms = timeout_ms;
731 ip4_sv_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
732 ip4_sv_reass_main.max_reass_n = max_reassemblies;
733 ip4_sv_reass_main.max_reass_len = max_reassembly_length;
734 ip4_sv_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
738 ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
739 u32 max_reassembly_length, u32 expire_walk_interval_ms)
741 u32 old_nbuckets = ip4_sv_reass_get_nbuckets ();
742 ip4_sv_reass_set_params (timeout_ms, max_reassemblies,
743 max_reassembly_length, expire_walk_interval_ms);
744 vlib_process_signal_event (ip4_sv_reass_main.vlib_main,
745 ip4_sv_reass_main.ip4_sv_reass_expire_node_idx,
746 IP4_EVENT_CONFIG_CHANGED, 0);
747 u32 new_nbuckets = ip4_sv_reass_get_nbuckets ();
748 if (ip4_sv_reass_main.max_reass_n > 0 && new_nbuckets > old_nbuckets)
750 clib_bihash_16_8_t new_hash;
751 clib_memset (&new_hash, 0, sizeof (new_hash));
752 ip4_rehash_cb_ctx ctx;
754 ctx.new_hash = &new_hash;
755 clib_bihash_init_16_8 (&new_hash, "ip4-dr", new_nbuckets,
756 new_nbuckets * 1024);
757 clib_bihash_foreach_key_value_pair_16_8 (&ip4_sv_reass_main.hash,
758 ip4_rehash_cb, &ctx);
761 clib_bihash_free_16_8 (&new_hash);
766 clib_bihash_free_16_8 (&ip4_sv_reass_main.hash);
767 clib_memcpy_fast (&ip4_sv_reass_main.hash, &new_hash,
768 sizeof (ip4_sv_reass_main.hash));
769 clib_bihash_copied (&ip4_sv_reass_main.hash, &new_hash);
776 ip4_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
777 u32 * max_reassembly_length, u32 * expire_walk_interval_ms)
779 *timeout_ms = ip4_sv_reass_main.timeout_ms;
780 *max_reassemblies = ip4_sv_reass_main.max_reass_n;
781 *max_reassembly_length = ip4_sv_reass_main.max_reass_len;
782 *expire_walk_interval_ms = ip4_sv_reass_main.expire_walk_interval_ms;
786 static clib_error_t *
787 ip4_sv_reass_init_function (vlib_main_t * vm)
789 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
790 clib_error_t *error = 0;
795 rm->vnet_main = vnet_get_main ();
797 vec_validate (rm->per_thread_data, vlib_num_workers ());
798 ip4_sv_reass_per_thread_t *rt;
799 vec_foreach (rt, rm->per_thread_data)
801 clib_spinlock_init (&rt->lock);
802 pool_alloc (rt->pool, rm->max_reass_n);
803 rt->lru_first = rt->lru_last = ~0;
806 node = vlib_get_node_by_name (vm, (u8 *) "ip4-sv-reassembly-expire-walk");
808 rm->ip4_sv_reass_expire_node_idx = node->index;
810 ip4_sv_reass_set_params (IP4_SV_REASS_TIMEOUT_DEFAULT_MS,
811 IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT,
812 IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
813 IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
815 nbuckets = ip4_sv_reass_get_nbuckets ();
816 clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024);
818 node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop");
820 rm->ip4_drop_idx = node->index;
822 rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0);
823 rm->fq_feature_index =
824 vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0);
826 rm->feature_use_refcount_per_intf = NULL;
831 VLIB_INIT_FUNCTION (ip4_sv_reass_init_function);
832 #endif /* CLIB_MARCH_VARIANT */
835 ip4_sv_reass_walk_expired (vlib_main_t * vm,
836 vlib_node_runtime_t * node, vlib_frame_t * f)
838 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
839 uword event_type, *event_data = 0;
843 vlib_process_wait_for_event_or_clock (vm,
845 rm->expire_walk_interval_ms /
847 event_type = vlib_process_get_events (vm, &event_data);
851 case ~0: /* no events => timeout */
852 /* nothing to do here */
854 case IP4_EVENT_CONFIG_CHANGED:
857 clib_warning ("BUG: event type 0x%wx", event_type);
860 f64 now = vlib_time_now (vm);
862 ip4_sv_reass_t *reass;
863 int *pool_indexes_to_free = NULL;
865 uword thread_index = 0;
867 const uword nthreads = vlib_num_workers () + 1;
868 for (thread_index = 0; thread_index < nthreads; ++thread_index)
870 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
871 clib_spinlock_lock (&rt->lock);
873 vec_reset_length (pool_indexes_to_free);
875 pool_foreach_index (index, rt->pool, ({
876 reass = pool_elt_at_index (rt->pool, index);
877 if (now > reass->last_heard + rm->timeout)
879 vec_add1 (pool_indexes_to_free, index);
885 vec_foreach (i, pool_indexes_to_free)
887 ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
888 ip4_sv_reass_free (vm, rm, rt, reass);
892 clib_spinlock_unlock (&rt->lock);
895 vec_free (pool_indexes_to_free);
898 _vec_len (event_data) = 0;
906 VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = {
907 .function = ip4_sv_reass_walk_expired,
908 .type = VLIB_NODE_TYPE_PROCESS,
909 .name = "ip4-sv-reassembly-expire-walk",
910 .format_trace = format_ip4_sv_reass_trace,
911 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
912 .error_strings = ip4_sv_reass_error_strings,
918 format_ip4_sv_reass_key (u8 * s, va_list * args)
920 ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *);
923 "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
924 key->xx_id, format_ip4_address, &key->src, format_ip4_address,
925 &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
930 format_ip4_sv_reass (u8 * s, va_list * args)
932 vlib_main_t *vm = va_arg (*args, vlib_main_t *);
933 ip4_sv_reass_t *reass = va_arg (*args, ip4_sv_reass_t *);
935 s = format (s, "ID: %lu, key: %U trace_op_counter: %u\n",
936 reass->id, format_ip4_sv_reass_key, &reass->key,
937 reass->trace_op_counter);
942 vec_foreach (bip, reass->cached_buffers)
947 b = vlib_get_buffer (vm, bi);
948 s = format (s, " #%03u: bi: %u, ", counter, bi);
952 while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
957 static clib_error_t *
958 show_ip4_reass (vlib_main_t * vm,
959 unformat_input_t * input,
960 CLIB_UNUSED (vlib_cli_command_t * lmd))
962 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
964 vlib_cli_output (vm, "---------------------");
965 vlib_cli_output (vm, "IP4 reassembly status");
966 vlib_cli_output (vm, "---------------------");
967 bool details = false;
968 if (unformat (input, "details"))
974 ip4_sv_reass_t *reass;
976 const uword nthreads = vlib_num_workers () + 1;
977 for (thread_index = 0; thread_index < nthreads; ++thread_index)
979 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
980 clib_spinlock_lock (&rt->lock);
984 pool_foreach (reass, rt->pool, {
985 vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass);
989 sum_reass_n += rt->reass_n;
990 clib_spinlock_unlock (&rt->lock);
992 vlib_cli_output (vm, "---------------------");
993 vlib_cli_output (vm, "Current IP4 reassemblies count: %lu\n",
994 (long unsigned) sum_reass_n);
996 "Maximum configured concurrent IP4 reassemblies per worker-thread: %lu\n",
997 (long unsigned) rm->max_reass_n);
1002 VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = {
1003 .path = "show ip4-sv-reassembly",
1004 .short_help = "show ip4-sv-reassembly [details]",
1005 .function = show_ip4_reass,
1009 #ifndef CLIB_MARCH_VARIANT
1011 ip4_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
1013 return vnet_feature_enable_disable ("ip4-unicast",
1014 "ip4-sv-reassembly-feature",
1015 sw_if_index, enable_disable, 0, 0);
1017 #endif /* CLIB_MARCH_VARIANT */
1020 #define foreach_ip4_sv_reass_handoff_error \
1021 _(CONGESTION_DROP, "congestion drop")
1026 #define _(sym,str) IP4_SV_REASSEMBLY_HANDOFF_ERROR_##sym,
1027 foreach_ip4_sv_reass_handoff_error
1029 IP4_SV_REASSEMBLY_HANDOFF_N_ERROR,
1030 } ip4_sv_reass_handoff_error_t;
1032 static char *ip4_sv_reass_handoff_error_strings[] = {
1033 #define _(sym,string) string,
1034 foreach_ip4_sv_reass_handoff_error
1040 u32 next_worker_index;
1041 } ip4_sv_reass_handoff_trace_t;
1044 format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args)
1046 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1047 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1048 ip4_sv_reass_handoff_trace_t *t =
1049 va_arg (*args, ip4_sv_reass_handoff_trace_t *);
1052 format (s, "ip4-sv-reassembly-handoff: next-worker %d",
1053 t->next_worker_index);
1059 ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
1060 vlib_node_runtime_t * node,
1061 vlib_frame_t * frame, bool is_feature)
1063 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1065 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1066 u32 n_enq, n_left_from, *from;
1067 u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1070 from = vlib_frame_vector_args (frame);
1071 n_left_from = frame->n_vectors;
1072 vlib_get_buffers (vm, from, bufs, n_left_from);
1075 ti = thread_indices;
1077 fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
1079 while (n_left_from > 0)
1081 ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
1084 ((node->flags & VLIB_NODE_FLAG_TRACE)
1085 && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1087 ip4_sv_reass_handoff_trace_t *t =
1088 vlib_add_trace (vm, node, b[0], sizeof (*t));
1089 t->next_worker_index = ti[0];
1097 vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1098 frame->n_vectors, 1);
1100 if (n_enq < frame->n_vectors)
1101 vlib_node_increment_counter (vm, node->node_index,
1102 IP4_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
1103 frame->n_vectors - n_enq);
1104 return frame->n_vectors;
1107 VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm,
1108 vlib_node_runtime_t * node,
1109 vlib_frame_t * frame)
1111 return ip4_sv_reass_handoff_node_inline (vm, node, frame,
1112 false /* is_feature */ );
1117 VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
1118 .name = "ip4-sv-reassembly-handoff",
1119 .vector_size = sizeof (u32),
1120 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1121 .error_strings = ip4_sv_reass_handoff_error_strings,
1122 .format_trace = format_ip4_sv_reass_handoff_trace,
1134 VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm,
1135 vlib_node_runtime_t *
1137 vlib_frame_t * frame)
1139 return ip4_sv_reass_handoff_node_inline (vm, node, frame,
1140 true /* is_feature */ );
1146 VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
1147 .name = "ip4-sv-reass-feature-hoff",
1148 .vector_size = sizeof (u32),
1149 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1150 .error_strings = ip4_sv_reass_handoff_error_strings,
1151 .format_trace = format_ip4_sv_reass_handoff_trace,
1161 #ifndef CLIB_MARCH_VARIANT
1163 ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
1165 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1166 vec_validate (rm->feature_use_refcount_per_intf, sw_if_index);
1169 if (!rm->feature_use_refcount_per_intf[sw_if_index])
1171 ++rm->feature_use_refcount_per_intf[sw_if_index];
1172 return vnet_feature_enable_disable ("ip4-unicast",
1173 "ip4-sv-reassembly-feature",
1174 sw_if_index, 1, 0, 0);
1176 ++rm->feature_use_refcount_per_intf[sw_if_index];
1180 --rm->feature_use_refcount_per_intf[sw_if_index];
1181 if (!rm->feature_use_refcount_per_intf[sw_if_index])
1182 return vnet_feature_enable_disable ("ip4-unicast",
1183 "ip4-sv-reassembly-feature",
1184 sw_if_index, 0, 0, 0);
1190 ip4_sv_reass_custom_register_next_node (uword node_index)
1192 return vlib_node_add_next (vlib_get_main (), ip4_sv_reass_custom_node.index,
1198 * fd.io coding-style-patch-verification: ON
1201 * eval: (c-set-style "gnu")