2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
18 * @brief IPv4 Shallow Virtual Reassembly.
20 * This file contains the source code for IPv4 Shallow Virtual reassembly.
23 #include <vppinfra/vec.h>
24 #include <vnet/vnet.h>
25 #include <vnet/ip/ip.h>
26 #include <vnet/ip/ip4_to_ip6.h>
27 #include <vppinfra/fifo.h>
28 #include <vppinfra/bihash_16_8.h>
29 #include <vnet/ip/reass/ip4_sv_reass.h>
31 #define MSEC_PER_SEC 1000
32 #define IP4_SV_REASS_TIMEOUT_DEFAULT_MS 100
33 #define IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
34 #define IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024
35 #define IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
36 #define IP4_SV_REASS_HT_LOAD_FACTOR (0.75)
41 IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS,
42 IP4_SV_REASS_RC_UNSUPP_IP_PROTO,
79 clib_bihash_kv_16_8_t kv;
85 ip4_sv_reass_key_t key;
86 // time when last packet was received
88 // internal id of this reassembly
90 // trace operation counter
92 // minimum fragment length for this reassembly - used to estimate MTU
93 u16 min_fragment_length;
94 // buffer indexes of buffers in this reassembly in chronological order -
95 // including overlaps and duplicate fragments
97 // set to true when this reassembly is completed
113 ip4_sv_reass_t *pool;
116 clib_spinlock_t lock;
121 } ip4_sv_reass_per_thread_t;
128 u32 expire_walk_interval_ms;
129 // maximum number of fragments in one reassembly
131 // maximum number of reassemblies
135 clib_bihash_16_8_t hash;
137 ip4_sv_reass_per_thread_t *per_thread_data;
140 vlib_main_t *vlib_main;
141 vnet_main_t *vnet_main;
143 // node index of ip4-drop node
145 u32 ip4_sv_reass_expire_node_idx;
147 /** Worker handoff */
149 u32 fq_feature_index;
151 // reference count for enabling/disabling feature
152 u32 feature_use_refcount;
154 } ip4_sv_reass_main_t;
156 extern ip4_sv_reass_main_t ip4_sv_reass_main;
158 #ifndef CLIB_MARCH_VARIANT
159 ip4_sv_reass_main_t ip4_sv_reass_main;
160 #endif /* CLIB_MARCH_VARIANT */
164 IP4_SV_REASSEMBLY_NEXT_INPUT,
165 IP4_SV_REASSEMBLY_NEXT_DROP,
166 IP4_SV_REASSEMBLY_NEXT_HANDOFF,
167 IP4_SV_REASSEMBLY_N_NEXT,
168 } ip4_sv_reass_next_t;
172 REASS_FRAGMENT_CACHE,
174 REASS_FRAGMENT_FORWARD,
175 } ip4_sv_reass_trace_operation_e;
179 ip4_sv_reass_trace_operation_e action;
185 } ip4_sv_reass_trace_t;
187 extern vlib_node_registration_t ip4_sv_reass_node;
188 extern vlib_node_registration_t ip4_sv_reass_node_feature;
191 format_ip4_sv_reass_trace (u8 * s, va_list * args)
193 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
194 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
195 ip4_sv_reass_trace_t *t = va_arg (*args, ip4_sv_reass_trace_t *);
196 s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
199 case REASS_FRAGMENT_CACHE:
200 s = format (s, "[cached]");
204 format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]",
205 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
206 clib_net_to_host_u16 (t->l4_dst_port));
208 case REASS_FRAGMENT_FORWARD:
210 format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]",
211 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
212 clib_net_to_host_u16 (t->l4_dst_port));
219 ip4_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
220 ip4_sv_reass_main_t * rm, ip4_sv_reass_t * reass,
221 u32 bi, ip4_sv_reass_trace_operation_e action,
222 u32 ip_proto, u16 l4_src_port, u16 l4_dst_port)
224 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
225 ip4_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
226 t->reass_id = reass->id;
228 t->op_id = reass->trace_op_counter;
229 t->ip_proto = ip_proto;
230 t->l4_src_port = l4_src_port;
231 t->l4_dst_port = l4_dst_port;
232 ++reass->trace_op_counter;
235 s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t);
236 printf ("%.*s\n", vec_len (s), s);
238 vec_reset_length (s);
244 ip4_sv_reass_free (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
245 ip4_sv_reass_per_thread_t * rt, ip4_sv_reass_t * reass)
247 clib_bihash_kv_16_8_t kv;
248 kv.key[0] = reass->key.as_u64[0];
249 kv.key[1] = reass->key.as_u64[1];
250 clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
251 vlib_buffer_free (vm, reass->cached_buffers,
252 vec_len (reass->cached_buffers));
253 vec_free (reass->cached_buffers);
254 reass->cached_buffers = NULL;
255 if (~0 != reass->lru_prev)
257 ip4_sv_reass_t *lru_prev =
258 pool_elt_at_index (rt->pool, reass->lru_prev);
259 lru_prev->lru_next = reass->lru_next;
261 if (~0 != reass->lru_next)
263 ip4_sv_reass_t *lru_next =
264 pool_elt_at_index (rt->pool, reass->lru_next);
265 lru_next->lru_prev = reass->lru_prev;
267 if (rt->lru_first == reass - rt->pool)
269 rt->lru_first = reass->lru_next;
271 if (rt->lru_last == reass - rt->pool)
273 rt->lru_last = reass->lru_prev;
275 pool_put (rt->pool, reass);
280 ip4_sv_reass_init (ip4_sv_reass_t * reass)
282 reass->cached_buffers = NULL;
283 reass->is_complete = false;
286 always_inline ip4_sv_reass_t *
287 ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
288 ip4_sv_reass_per_thread_t * rt,
289 ip4_sv_reass_kv_t * kv, u8 * do_handoff)
291 ip4_sv_reass_t *reass = NULL;
292 f64 now = vlib_time_now (rm->vlib_main);
294 if (!clib_bihash_search_16_8
295 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, (clib_bihash_kv_16_8_t *) kv))
297 if (vm->thread_index != kv->v.thread_index)
302 reass = pool_elt_at_index (rt->pool, kv->v.reass_index);
304 if (now > reass->last_heard + rm->timeout)
306 ip4_sv_reass_free (vm, rm, rt, reass);
313 reass->last_heard = now;
317 if (rt->reass_n >= rm->max_reass_n && rm->max_reass_n)
319 reass = pool_elt_at_index (rt->pool, rt->lru_last);
320 ip4_sv_reass_free (vm, rm, rt, reass);
323 pool_get (rt->pool, reass);
324 clib_memset (reass, 0, sizeof (*reass));
325 reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
327 ip4_sv_reass_init (reass);
329 reass->lru_prev = reass->lru_next = ~0;
331 if (~0 != rt->lru_last)
333 ip4_sv_reass_t *lru_last = pool_elt_at_index (rt->pool, rt->lru_last);
334 reass->lru_prev = rt->lru_last;
335 lru_last->lru_next = rt->lru_last = reass - rt->pool;
338 if (~0 == rt->lru_first)
340 rt->lru_first = rt->lru_last = reass - rt->pool;
343 reass->key.as_u64[0] = ((clib_bihash_kv_16_8_t *) kv)->key[0];
344 reass->key.as_u64[1] = ((clib_bihash_kv_16_8_t *) kv)->key[1];
345 kv->v.reass_index = (reass - rt->pool);
346 kv->v.thread_index = vm->thread_index;
347 reass->last_heard = now;
349 if (clib_bihash_add_del_16_8 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, 1))
351 ip4_sv_reass_free (vm, rm, rt, reass);
358 always_inline ip4_sv_reass_rc_t
359 ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
360 ip4_sv_reass_main_t * rm, ip4_sv_reass_per_thread_t * rt,
361 ip4_sv_reass_t * reass, u32 bi0)
363 vlib_buffer_t *fb = vlib_get_buffer (vm, bi0);
364 ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK;
365 ip4_header_t *fip = vlib_buffer_get_current (fb);
366 const u32 fragment_first = ip4_get_fragment_offset_bytes (fip);
367 if (0 == fragment_first)
369 reass->ip_proto = fip->protocol;
370 reass->l4_src_port = ip4_get_port (fip, 1);
371 reass->l4_dst_port = ip4_get_port (fip, 0);
372 if (!reass->l4_src_port || !reass->l4_dst_port)
373 return IP4_SV_REASS_RC_UNSUPP_IP_PROTO;
374 reass->is_complete = true;
375 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
376 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
378 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0, REASS_FINISH,
379 reass->ip_proto, reass->l4_src_port,
383 vec_add1 (reass->cached_buffers, bi0);
384 if (!reass->is_complete)
386 if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
388 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
389 REASS_FRAGMENT_CACHE, ~0, ~0, ~0);
391 if (vec_len (reass->cached_buffers) > rm->max_reass_len)
393 rc = IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS;
400 ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
401 vlib_frame_t * frame, bool is_feature, bool is_custom)
403 u32 *from = vlib_frame_vector_args (frame);
404 u32 n_left_from, n_left_to_next, *to_next, next_index;
405 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
406 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
407 clib_spinlock_lock (&rt->lock);
409 n_left_from = frame->n_vectors;
410 next_index = node->cached_next_index;
412 while (n_left_from > 0)
414 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
416 while (n_left_from > 0 && n_left_to_next > 0)
421 u32 error0 = IP4_ERROR_NONE;
424 b0 = vlib_get_buffer (vm, bi0);
426 ip4_header_t *ip0 = vlib_buffer_get_current (b0);
427 if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
429 // this is a regular packet - no fragmentation
430 vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
431 vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
432 vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
433 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
436 const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
437 const u32 fragment_length =
438 clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
439 const u32 fragment_last = fragment_first + fragment_length - 1;
440 if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791
442 next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
443 error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
446 ip4_sv_reass_kv_t kv;
450 (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
451 vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
452 (u64) ip0->src_address.as_u32 << 32;
454 (u64) ip0->dst_address.
455 as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
457 ip4_sv_reass_t *reass =
458 ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
460 if (PREDICT_FALSE (do_handoff))
462 next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF;
463 vnet_buffer (b0)->ip.reass.owner_thread_index =
470 next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
471 error0 = IP4_ERROR_REASS_LIMIT_REACHED;
475 if (reass->is_complete)
477 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
478 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
479 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
480 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
481 error0 = IP4_ERROR_NONE;
482 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
484 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
485 REASS_FRAGMENT_FORWARD,
493 ip4_sv_reass_rc_t rc =
494 ip4_sv_reass_update (vm, node, rm, rt, reass, bi0);
497 case IP4_SV_REASS_RC_OK:
498 /* nothing to do here */
500 case IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS:
501 vlib_node_increment_counter (vm, node->node_index,
502 IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
504 ip4_sv_reass_free (vm, rm, rt, reass);
507 case IP4_SV_REASS_RC_UNSUPP_IP_PROTO:
508 vlib_node_increment_counter (vm, node->node_index,
509 IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
511 ip4_sv_reass_free (vm, rm, rt, reass);
515 if (reass->is_complete)
518 vec_foreach_index (idx, reass->cached_buffers)
520 u32 bi0 = vec_elt (reass->cached_buffers, idx);
521 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
522 u32 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
525 vnet_feature_next (&next0, b0);
529 next0 = vnet_buffer (b0)->ip.reass.next_index;
531 if (0 == n_left_to_next)
533 vlib_put_next_frame (vm, node, next_index,
535 vlib_get_next_frame (vm, node, next_index, to_next,
541 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
542 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
543 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
544 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
546 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
547 REASS_FRAGMENT_FORWARD,
552 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
553 to_next, n_left_to_next, bi0,
556 _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now
561 b0->error = node->errors[error0];
566 if (is_feature && IP4_ERROR_NONE == error0)
568 b0 = vlib_get_buffer (vm, bi0);
569 vnet_feature_next (&next0, b0);
573 next0 = vnet_buffer (b0)->ip.reass.next_index;
575 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
576 to_next, n_left_to_next,
584 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
587 clib_spinlock_unlock (&rt->lock);
588 return frame->n_vectors;
591 static char *ip4_sv_reass_error_strings[] = {
592 #define _(sym, string) string,
597 VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm,
598 vlib_node_runtime_t * node,
599 vlib_frame_t * frame)
601 return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
602 false /* is_custom */ );
606 VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
607 .name = "ip4-sv-reassembly",
608 .vector_size = sizeof (u32),
609 .format_trace = format_ip4_sv_reass_trace,
610 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
611 .error_strings = ip4_sv_reass_error_strings,
612 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
615 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
616 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
617 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-handoff",
623 VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm,
624 vlib_node_runtime_t * node,
625 vlib_frame_t * frame)
627 return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
628 false /* is_custom */ );
632 VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
633 .name = "ip4-sv-reassembly-feature",
634 .vector_size = sizeof (u32),
635 .format_trace = format_ip4_sv_reass_trace,
636 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
637 .error_strings = ip4_sv_reass_error_strings,
638 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
641 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
642 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
643 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
649 VNET_FEATURE_INIT (ip4_sv_reass_feature) = {
650 .arc_name = "ip4-unicast",
651 .node_name = "ip4-sv-reassembly-feature",
652 .runs_before = VNET_FEATURES ("ip4-lookup"),
658 VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
659 .name = "ip4-sv-reassembly-custom-next",
660 .vector_size = sizeof (u32),
661 .format_trace = format_ip4_sv_reass_trace,
662 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
663 .error_strings = ip4_sv_reass_error_strings,
664 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
667 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
668 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
669 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-handoff",
675 VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm,
676 vlib_node_runtime_t * node,
677 vlib_frame_t * frame)
679 return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
680 true /* is_custom */ );
683 #ifndef CLIB_MARCH_VARIANT
685 ip4_sv_reass_get_nbuckets ()
687 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
691 nbuckets = (u32) (rm->max_reass_n / IP4_SV_REASS_HT_LOAD_FACTOR);
693 for (i = 0; i < 31; i++)
694 if ((1 << i) >= nbuckets)
700 #endif /* CLIB_MARCH_VARIANT */
704 IP4_EVENT_CONFIG_CHANGED = 1,
705 } ip4_sv_reass_event_t;
710 clib_bihash_16_8_t *new_hash;
713 #ifndef CLIB_MARCH_VARIANT
715 ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx)
717 ip4_rehash_cb_ctx *ctx = _ctx;
718 if (clib_bihash_add_del_16_8 (ctx->new_hash, kv, 1))
725 ip4_sv_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
726 u32 max_reassembly_length,
727 u32 expire_walk_interval_ms)
729 ip4_sv_reass_main.timeout_ms = timeout_ms;
730 ip4_sv_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
731 ip4_sv_reass_main.max_reass_n = max_reassemblies;
732 ip4_sv_reass_main.max_reass_len = max_reassembly_length;
733 ip4_sv_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
737 ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
738 u32 max_reassembly_length, u32 expire_walk_interval_ms)
740 u32 old_nbuckets = ip4_sv_reass_get_nbuckets ();
741 ip4_sv_reass_set_params (timeout_ms, max_reassemblies,
742 max_reassembly_length, expire_walk_interval_ms);
743 vlib_process_signal_event (ip4_sv_reass_main.vlib_main,
744 ip4_sv_reass_main.ip4_sv_reass_expire_node_idx,
745 IP4_EVENT_CONFIG_CHANGED, 0);
746 u32 new_nbuckets = ip4_sv_reass_get_nbuckets ();
747 if (ip4_sv_reass_main.max_reass_n > 0 && new_nbuckets > old_nbuckets)
749 clib_bihash_16_8_t new_hash;
750 clib_memset (&new_hash, 0, sizeof (new_hash));
751 ip4_rehash_cb_ctx ctx;
753 ctx.new_hash = &new_hash;
754 clib_bihash_init_16_8 (&new_hash, "ip4-dr", new_nbuckets,
755 new_nbuckets * 1024);
756 clib_bihash_foreach_key_value_pair_16_8 (&ip4_sv_reass_main.hash,
757 ip4_rehash_cb, &ctx);
760 clib_bihash_free_16_8 (&new_hash);
765 clib_bihash_free_16_8 (&ip4_sv_reass_main.hash);
766 clib_memcpy_fast (&ip4_sv_reass_main.hash, &new_hash,
767 sizeof (ip4_sv_reass_main.hash));
768 clib_bihash_copied (&ip4_sv_reass_main.hash, &new_hash);
775 ip4_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
776 u32 * max_reassembly_length, u32 * expire_walk_interval_ms)
778 *timeout_ms = ip4_sv_reass_main.timeout_ms;
779 *max_reassemblies = ip4_sv_reass_main.max_reass_n;
780 *max_reassembly_length = ip4_sv_reass_main.max_reass_len;
781 *expire_walk_interval_ms = ip4_sv_reass_main.expire_walk_interval_ms;
785 static clib_error_t *
786 ip4_sv_reass_init_function (vlib_main_t * vm)
788 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
789 clib_error_t *error = 0;
794 rm->vnet_main = vnet_get_main ();
796 vec_validate (rm->per_thread_data, vlib_num_workers ());
797 ip4_sv_reass_per_thread_t *rt;
798 vec_foreach (rt, rm->per_thread_data)
800 clib_spinlock_init (&rt->lock);
801 pool_alloc (rt->pool, rm->max_reass_n);
802 rt->lru_first = rt->lru_last = ~0;
805 node = vlib_get_node_by_name (vm, (u8 *) "ip4-sv-reassembly-expire-walk");
807 rm->ip4_sv_reass_expire_node_idx = node->index;
809 ip4_sv_reass_set_params (IP4_SV_REASS_TIMEOUT_DEFAULT_MS,
810 IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT,
811 IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
812 IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
814 nbuckets = ip4_sv_reass_get_nbuckets ();
815 clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024);
817 node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop");
819 rm->ip4_drop_idx = node->index;
821 rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0);
822 rm->fq_feature_index =
823 vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0);
828 VLIB_INIT_FUNCTION (ip4_sv_reass_init_function);
829 #endif /* CLIB_MARCH_VARIANT */
832 ip4_sv_reass_walk_expired (vlib_main_t * vm,
833 vlib_node_runtime_t * node, vlib_frame_t * f)
835 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
836 uword event_type, *event_data = 0;
840 vlib_process_wait_for_event_or_clock (vm,
842 rm->expire_walk_interval_ms /
844 event_type = vlib_process_get_events (vm, &event_data);
848 case ~0: /* no events => timeout */
849 /* nothing to do here */
851 case IP4_EVENT_CONFIG_CHANGED:
854 clib_warning ("BUG: event type 0x%wx", event_type);
857 f64 now = vlib_time_now (vm);
859 ip4_sv_reass_t *reass;
860 int *pool_indexes_to_free = NULL;
862 uword thread_index = 0;
864 const uword nthreads = vlib_num_workers () + 1;
865 for (thread_index = 0; thread_index < nthreads; ++thread_index)
867 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
868 clib_spinlock_lock (&rt->lock);
870 vec_reset_length (pool_indexes_to_free);
872 pool_foreach_index (index, rt->pool, ({
873 reass = pool_elt_at_index (rt->pool, index);
874 if (now > reass->last_heard + rm->timeout)
876 vec_add1 (pool_indexes_to_free, index);
882 vec_foreach (i, pool_indexes_to_free)
884 ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
885 ip4_sv_reass_free (vm, rm, rt, reass);
889 clib_spinlock_unlock (&rt->lock);
892 vec_free (pool_indexes_to_free);
895 _vec_len (event_data) = 0;
903 VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = {
904 .function = ip4_sv_reass_walk_expired,
905 .type = VLIB_NODE_TYPE_PROCESS,
906 .name = "ip4-sv-reassembly-expire-walk",
907 .format_trace = format_ip4_sv_reass_trace,
908 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
909 .error_strings = ip4_sv_reass_error_strings,
915 format_ip4_sv_reass_key (u8 * s, va_list * args)
917 ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *);
920 "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
921 key->xx_id, format_ip4_address, &key->src, format_ip4_address,
922 &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
927 format_ip4_sv_reass (u8 * s, va_list * args)
929 vlib_main_t *vm = va_arg (*args, vlib_main_t *);
930 ip4_sv_reass_t *reass = va_arg (*args, ip4_sv_reass_t *);
932 s = format (s, "ID: %lu, key: %U trace_op_counter: %u\n",
933 reass->id, format_ip4_sv_reass_key, &reass->key,
934 reass->trace_op_counter);
939 vec_foreach (bip, reass->cached_buffers)
944 b = vlib_get_buffer (vm, bi);
945 s = format (s, " #%03u: bi: %u, ", counter, bi);
949 while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
954 static clib_error_t *
955 show_ip4_reass (vlib_main_t * vm,
956 unformat_input_t * input,
957 CLIB_UNUSED (vlib_cli_command_t * lmd))
959 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
961 vlib_cli_output (vm, "---------------------");
962 vlib_cli_output (vm, "IP4 reassembly status");
963 vlib_cli_output (vm, "---------------------");
964 bool details = false;
965 if (unformat (input, "details"))
971 ip4_sv_reass_t *reass;
973 const uword nthreads = vlib_num_workers () + 1;
974 for (thread_index = 0; thread_index < nthreads; ++thread_index)
976 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
977 clib_spinlock_lock (&rt->lock);
981 pool_foreach (reass, rt->pool, {
982 vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass);
986 sum_reass_n += rt->reass_n;
987 clib_spinlock_unlock (&rt->lock);
989 vlib_cli_output (vm, "---------------------");
990 vlib_cli_output (vm, "Current IP4 reassemblies count: %lu\n",
991 (long unsigned) sum_reass_n);
993 "Maximum configured concurrent IP4 reassemblies per worker-thread: %lu\n",
994 (long unsigned) rm->max_reass_n);
999 VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = {
1000 .path = "show ip4-sv-reassembly",
1001 .short_help = "show ip4-sv-reassembly [details]",
1002 .function = show_ip4_reass,
1006 #ifndef CLIB_MARCH_VARIANT
1008 ip4_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
1010 return vnet_feature_enable_disable ("ip4-unicast",
1011 "ip4-sv-reassembly-feature",
1012 sw_if_index, enable_disable, 0, 0);
1014 #endif /* CLIB_MARCH_VARIANT */
1017 #define foreach_ip4_sv_reass_handoff_error \
1018 _(CONGESTION_DROP, "congestion drop")
1023 #define _(sym,str) IP4_SV_REASSEMBLY_HANDOFF_ERROR_##sym,
1024 foreach_ip4_sv_reass_handoff_error
1026 IP4_SV_REASSEMBLY_HANDOFF_N_ERROR,
1027 } ip4_sv_reass_handoff_error_t;
1029 static char *ip4_sv_reass_handoff_error_strings[] = {
1030 #define _(sym,string) string,
1031 foreach_ip4_sv_reass_handoff_error
1037 u32 next_worker_index;
1038 } ip4_sv_reass_handoff_trace_t;
1041 format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args)
1043 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1044 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1045 ip4_sv_reass_handoff_trace_t *t =
1046 va_arg (*args, ip4_sv_reass_handoff_trace_t *);
1049 format (s, "ip4-sv-reassembly-handoff: next-worker %d",
1050 t->next_worker_index);
1056 ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
1057 vlib_node_runtime_t * node,
1058 vlib_frame_t * frame, bool is_feature)
1060 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1062 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1063 u32 n_enq, n_left_from, *from;
1064 u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1067 from = vlib_frame_vector_args (frame);
1068 n_left_from = frame->n_vectors;
1069 vlib_get_buffers (vm, from, bufs, n_left_from);
1072 ti = thread_indices;
1074 fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
1076 while (n_left_from > 0)
1078 ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
1081 ((node->flags & VLIB_NODE_FLAG_TRACE)
1082 && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1084 ip4_sv_reass_handoff_trace_t *t =
1085 vlib_add_trace (vm, node, b[0], sizeof (*t));
1086 t->next_worker_index = ti[0];
1094 vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1095 frame->n_vectors, 1);
1097 if (n_enq < frame->n_vectors)
1098 vlib_node_increment_counter (vm, node->node_index,
1099 IP4_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
1100 frame->n_vectors - n_enq);
1101 return frame->n_vectors;
1104 VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm,
1105 vlib_node_runtime_t * node,
1106 vlib_frame_t * frame)
1108 return ip4_sv_reass_handoff_node_inline (vm, node, frame,
1109 false /* is_feature */ );
1114 VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
1115 .name = "ip4-sv-reassembly-handoff",
1116 .vector_size = sizeof (u32),
1117 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1118 .error_strings = ip4_sv_reass_handoff_error_strings,
1119 .format_trace = format_ip4_sv_reass_handoff_trace,
1131 VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm,
1132 vlib_node_runtime_t *
1134 vlib_frame_t * frame)
1136 return ip4_sv_reass_handoff_node_inline (vm, node, frame,
1137 true /* is_feature */ );
1143 VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
1144 .name = "ip4-sv-reass-feature-hoff",
1145 .vector_size = sizeof (u32),
1146 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1147 .error_strings = ip4_sv_reass_handoff_error_strings,
1148 .format_trace = format_ip4_sv_reass_handoff_trace,
1158 #ifndef CLIB_MARCH_VARIANT
1160 ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
1162 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1165 if (!rm->feature_use_refcount)
1167 ++rm->feature_use_refcount;
1168 return vnet_feature_enable_disable ("ip4-unicast",
1169 "ip4-sv-reassembly-feature",
1170 sw_if_index, 1, 0, 0);
1172 ++rm->feature_use_refcount;
1176 --rm->feature_use_refcount;
1177 if (!rm->feature_use_refcount)
1178 return vnet_feature_enable_disable ("ip4-unicast",
1179 "ip4-sv-reassembly-feature",
1180 sw_if_index, 0, 0, 0);
1186 ip4_sv_reass_custom_register_next_node (uword node_index)
1188 return vlib_node_add_next (vlib_get_main (), ip4_sv_reass_custom_node.index,
1194 * fd.io coding-style-patch-verification: ON
1197 * eval: (c-set-style "gnu")