2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
18 * @brief IPv4 Full Reassembly.
20 * This file contains the source code for IPv4 full reassembly.
23 #include <vppinfra/vec.h>
24 #include <vnet/vnet.h>
25 #include <vnet/ip/ip.h>
26 #include <vppinfra/fifo.h>
27 #include <vppinfra/bihash_16_8.h>
28 #include <vnet/ip/reass/ip4_full_reass.h>
31 #define MSEC_PER_SEC 1000
32 #define IP4_REASS_TIMEOUT_DEFAULT_MS 100
33 #define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
34 #define IP4_REASS_MAX_REASSEMBLIES_DEFAULT 1024
35 #define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
36 #define IP4_REASS_HT_LOAD_FACTOR (0.75)
38 #define IP4_REASS_DEBUG_BUFFERS 0
39 #if IP4_REASS_DEBUG_BUFFERS
40 #define IP4_REASS_DEBUG_BUFFER(bi, what) \
44 printf (#what "buffer %u", _bi); \
45 vlib_buffer_t *_b = vlib_get_buffer (vm, _bi); \
46 while (_b->flags & VLIB_BUFFER_NEXT_PRESENT) \
48 _bi = _b->next_buffer; \
49 printf ("[%u]", _bi); \
50 _b = vlib_get_buffer (vm, _bi); \
57 #define IP4_REASS_DEBUG_BUFFER(...)
63 IP4_REASS_RC_TOO_MANY_FRAGMENTS,
64 IP4_REASS_RC_INTERNAL_ERROR,
67 } ip4_full_reass_rc_t;
84 } ip4_full_reass_key_t;
91 u32 memory_owner_thread_index;
94 } ip4_full_reass_val_t;
100 ip4_full_reass_key_t k;
101 ip4_full_reass_val_t v;
103 clib_bihash_kv_16_8_t kv;
104 } ip4_full_reass_kv_t;
107 ip4_full_reass_buffer_get_data_offset (vlib_buffer_t * b)
109 vnet_buffer_opaque_t *vnb = vnet_buffer (b);
110 return vnb->ip.reass.range_first - vnb->ip.reass.fragment_first;
114 ip4_full_reass_buffer_get_data_len (vlib_buffer_t * b)
116 vnet_buffer_opaque_t *vnb = vnet_buffer (b);
117 return clib_min (vnb->ip.reass.range_last, vnb->ip.reass.fragment_last) -
118 (vnb->ip.reass.fragment_first +
119 ip4_full_reass_buffer_get_data_offset (b)) + 1;
125 ip4_full_reass_key_t key;
126 // time when last packet was received
128 // internal id of this reassembly
130 // buffer index of first buffer in this reassembly context
132 // last octet of packet, ~0 until fragment without more_fragments arrives
133 u32 last_packet_octet;
134 // length of data collected so far
136 // trace operation counter
137 u32 trace_op_counter;
138 // next index - used by non-feature node
140 // error next index - used by custom apps (~0 if not used)
141 u32 error_next_index;
142 // minimum fragment length for this reassembly - used to estimate MTU
143 u16 min_fragment_length;
144 // number of fragments in this reassembly
146 // thread owning memory for this context (whose pool contains this ctx)
147 u32 memory_owner_thread_index;
148 // thread which received fragment with offset 0 and which sends out the
149 // completed reassembly
150 u32 sendout_thread_index;
155 ip4_full_reass_t *pool;
158 clib_spinlock_t lock;
159 } ip4_full_reass_per_thread_t;
166 u32 expire_walk_interval_ms;
167 // maximum number of fragments in one reassembly
169 // maximum number of reassemblies
173 clib_bihash_16_8_t hash;
175 ip4_full_reass_per_thread_t *per_thread_data;
178 vlib_main_t *vlib_main;
180 u32 ip4_full_reass_expire_node_idx;
182 /** Worker handoff */
185 u32 fq_feature_index;
188 // reference count for enabling/disabling feature - per interface
189 u32 *feature_use_refcount_per_intf;
191 // whether local fragmented packets are reassembled or not
192 int is_local_reass_enabled;
193 } ip4_full_reass_main_t;
195 extern ip4_full_reass_main_t ip4_full_reass_main;
197 #ifndef CLIB_MARCH_VARIANT
198 ip4_full_reass_main_t ip4_full_reass_main;
199 #endif /* CLIB_MARCH_VARIANT */
203 IP4_FULL_REASS_NEXT_INPUT,
204 IP4_FULL_REASS_NEXT_DROP,
205 IP4_FULL_REASS_NEXT_HANDOFF,
206 IP4_FULL_REASS_N_NEXT,
207 } ip4_full_reass_next_t;
214 } ip4_full_reass_node_type_t;
225 } ip4_full_reass_trace_operation_e;
235 } ip4_full_reass_range_trace_t;
239 ip4_full_reass_trace_operation_e action;
241 ip4_full_reass_range_trace_t trace_range;
249 bool is_after_handoff;
250 ip4_header_t ip4_header;
251 } ip4_full_reass_trace_t;
253 extern vlib_node_registration_t ip4_full_reass_node;
254 extern vlib_node_registration_t ip4_full_reass_node_feature;
255 extern vlib_node_registration_t ip4_full_reass_node_custom;
258 ip4_full_reass_trace_details (vlib_main_t * vm, u32 bi,
259 ip4_full_reass_range_trace_t * trace)
261 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
262 vnet_buffer_opaque_t *vnb = vnet_buffer (b);
263 trace->range_first = vnb->ip.reass.range_first;
264 trace->range_last = vnb->ip.reass.range_last;
265 trace->data_offset = ip4_full_reass_buffer_get_data_offset (b);
266 trace->data_len = ip4_full_reass_buffer_get_data_len (b);
267 trace->range_bi = bi;
271 format_ip4_full_reass_range_trace (u8 * s, va_list * args)
273 ip4_full_reass_range_trace_t *trace =
274 va_arg (*args, ip4_full_reass_range_trace_t *);
276 format (s, "range: [%u, %u], off %d, len %u, bi %u", trace->range_first,
277 trace->range_last, trace->data_offset, trace->data_len,
283 format_ip4_full_reass_trace (u8 * s, va_list * args)
285 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
286 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
287 ip4_full_reass_trace_t *t = va_arg (*args, ip4_full_reass_trace_t *);
289 if (~0 != t->reass_id)
291 if (t->is_after_handoff)
294 format (s, "%U\n", format_ip4_header, &t->ip4_header,
295 sizeof (t->ip4_header));
299 format (s, "%Ureass id: %u, op id: %u, ", format_white_space, indent,
300 t->reass_id, t->op_id);
301 indent = format_get_indent (s);
304 "first bi: %u, data len: %u, ip/fragment[%u, %u]",
305 t->trace_range.first_bi, t->total_data_len, t->fragment_first,
311 s = format (s, "\n%Ushrink %U by %u", format_white_space, indent,
312 format_ip4_full_reass_range_trace, &t->trace_range,
316 s = format (s, "\n%Udiscard %U", format_white_space, indent,
317 format_ip4_full_reass_range_trace, &t->trace_range);
320 s = format (s, "\n%Unew %U", format_white_space, indent,
321 format_ip4_full_reass_range_trace, &t->trace_range);
324 s = format (s, "\n%Uoverlapping/ignored %U", format_white_space, indent,
325 format_ip4_full_reass_range_trace, &t->trace_range);
328 s = format (s, "\n%Ufinalize reassembly", format_white_space, indent);
332 format (s, "handoff from thread #%u to thread #%u", t->thread_id,
336 s = format (s, "passthrough - not a fragment");
343 ip4_full_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
344 ip4_full_reass_t * reass, u32 bi,
345 ip4_full_reass_trace_operation_e action,
346 u32 size_diff, u32 thread_id_to)
348 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
349 vnet_buffer_opaque_t *vnb = vnet_buffer (b);
350 if (pool_is_free_index
351 (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b)))
353 // this buffer's trace is gone
354 b->flags &= ~VLIB_BUFFER_IS_TRACED;
357 bool is_after_handoff = false;
358 if (vlib_buffer_get_trace_thread (b) != vm->thread_index)
360 is_after_handoff = true;
362 ip4_full_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
363 t->is_after_handoff = is_after_handoff;
364 if (t->is_after_handoff)
366 clib_memcpy (&t->ip4_header, vlib_buffer_get_current (b),
367 clib_min (sizeof (t->ip4_header), b->current_length));
371 t->reass_id = reass->id;
372 t->op_id = reass->trace_op_counter;
373 t->trace_range.first_bi = reass->first_bi;
374 t->total_data_len = reass->data_len;
375 ++reass->trace_op_counter;
381 t->trace_range.first_bi = 0;
382 t->total_data_len = 0;
385 ip4_full_reass_trace_details (vm, bi, &t->trace_range);
386 t->size_diff = size_diff;
387 t->thread_id = vm->thread_index;
388 t->thread_id_to = thread_id_to;
389 t->fragment_first = vnb->ip.reass.fragment_first;
390 t->fragment_last = vnb->ip.reass.fragment_last;
393 s = format (s, "%U", format_ip4_full_reass_trace, NULL, NULL, t);
394 printf ("%.*s\n", vec_len (s), s);
396 vec_reset_length (s);
401 ip4_full_reass_free_ctx (ip4_full_reass_per_thread_t * rt,
402 ip4_full_reass_t * reass)
404 pool_put (rt->pool, reass);
409 ip4_full_reass_free (ip4_full_reass_main_t * rm,
410 ip4_full_reass_per_thread_t * rt,
411 ip4_full_reass_t * reass)
413 clib_bihash_kv_16_8_t kv;
414 kv.key[0] = reass->key.as_u64[0];
415 kv.key[1] = reass->key.as_u64[1];
416 clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
417 return ip4_full_reass_free_ctx (rt, reass);
421 ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
422 ip4_full_reass_t *reass, u32 offending_bi)
424 u32 range_bi = reass->first_bi;
425 vlib_buffer_t *range_b;
426 vnet_buffer_opaque_t *range_vnb;
428 while (~0 != range_bi)
430 range_b = vlib_get_buffer (vm, range_bi);
431 range_vnb = vnet_buffer (range_b);
435 vec_add1 (to_free, bi);
436 if (offending_bi == bi)
440 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
441 if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
444 b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
451 range_bi = range_vnb->ip.reass.next_range_bi;
453 if (~0 != offending_bi)
455 vec_add1 (to_free, offending_bi);
457 /* send to next_error_index */
458 if (~0 != reass->error_next_index)
460 u32 n_left_to_next, *to_next, next_index;
462 next_index = reass->error_next_index;
465 while (vec_len (to_free) > 0)
467 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
469 while (vec_len (to_free) > 0 && n_left_to_next > 0)
471 bi = vec_pop (to_free);
480 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
485 vlib_buffer_free (vm, to_free, vec_len (to_free));
491 ip4_full_reass_init (ip4_full_reass_t * reass)
493 reass->first_bi = ~0;
494 reass->last_packet_octet = ~0;
496 reass->next_index = ~0;
497 reass->error_next_index = ~0;
500 always_inline ip4_full_reass_t *
501 ip4_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
502 ip4_full_reass_main_t * rm,
503 ip4_full_reass_per_thread_t * rt,
504 ip4_full_reass_kv_t * kv, u8 * do_handoff)
506 ip4_full_reass_t *reass;
512 now = vlib_time_now (vm);
513 if (!clib_bihash_search_16_8 (&rm->hash, &kv->kv, &kv->kv))
515 if (vm->thread_index != kv->v.memory_owner_thread_index)
521 pool_elt_at_index (rm->per_thread_data
522 [kv->v.memory_owner_thread_index].pool,
525 if (now > reass->last_heard + rm->timeout)
527 ip4_full_reass_drop_all (vm, node, reass, ~0);
528 ip4_full_reass_free (rm, rt, reass);
535 reass->last_heard = now;
539 if (rt->reass_n >= rm->max_reass_n)
546 pool_get (rt->pool, reass);
547 clib_memset (reass, 0, sizeof (*reass));
548 reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
549 reass->memory_owner_thread_index = vm->thread_index;
551 ip4_full_reass_init (reass);
555 reass->key.as_u64[0] = kv->kv.key[0];
556 reass->key.as_u64[1] = kv->kv.key[1];
557 kv->v.reass_index = (reass - rt->pool);
558 kv->v.memory_owner_thread_index = vm->thread_index;
559 reass->last_heard = now;
561 int rv = clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 2);
564 ip4_full_reass_free_ctx (rt, reass);
566 // if other worker created a context already work with the other copy
574 always_inline ip4_full_reass_rc_t
575 ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
576 ip4_full_reass_main_t * rm,
577 ip4_full_reass_per_thread_t * rt,
578 ip4_full_reass_t * reass, u32 * bi0,
579 u32 * next0, u32 * error0, bool is_custom)
581 vlib_buffer_t *first_b = vlib_get_buffer (vm, reass->first_bi);
582 vlib_buffer_t *last_b = NULL;
583 u32 sub_chain_bi = reass->first_bi;
584 u32 total_length = 0;
588 u32 tmp_bi = sub_chain_bi;
589 vlib_buffer_t *tmp = vlib_get_buffer (vm, tmp_bi);
590 ip4_header_t *ip = vlib_buffer_get_current (tmp);
591 vnet_buffer_opaque_t *vnb = vnet_buffer (tmp);
592 if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) &&
593 !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first))
595 return IP4_REASS_RC_INTERNAL_ERROR;
598 u32 data_len = ip4_full_reass_buffer_get_data_len (tmp);
600 ip4_header_bytes (ip) + ip4_full_reass_buffer_get_data_offset (tmp);
602 vlib_buffer_length_in_chain (vm, tmp) - trim_front - data_len;
603 if (tmp_bi == reass->first_bi)
605 /* first buffer - keep ip4 header */
606 if (0 != ip4_full_reass_buffer_get_data_offset (tmp))
608 return IP4_REASS_RC_INTERNAL_ERROR;
611 trim_end = vlib_buffer_length_in_chain (vm, tmp) - data_len -
612 ip4_header_bytes (ip);
613 if (!(vlib_buffer_length_in_chain (vm, tmp) - trim_end > 0))
615 return IP4_REASS_RC_INTERNAL_ERROR;
619 vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
625 if (trim_front > tmp->current_length)
627 /* drop whole buffer */
628 u32 to_be_freed_bi = tmp_bi;
629 trim_front -= tmp->current_length;
630 if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
632 return IP4_REASS_RC_INTERNAL_ERROR;
634 tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
635 tmp_bi = tmp->next_buffer;
636 tmp->next_buffer = 0;
637 tmp = vlib_get_buffer (vm, tmp_bi);
638 vlib_buffer_free_one (vm, to_be_freed_bi);
643 vlib_buffer_advance (tmp, trim_front);
651 last_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
652 last_b->next_buffer = tmp_bi;
655 if (keep_data <= tmp->current_length)
657 tmp->current_length = keep_data;
662 keep_data -= tmp->current_length;
663 if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
665 return IP4_REASS_RC_INTERNAL_ERROR;
668 total_length += tmp->current_length;
669 if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
671 tmp_bi = tmp->next_buffer;
672 tmp = vlib_get_buffer (vm, tmp->next_buffer);
681 u32 to_be_freed_bi = tmp_bi;
682 if (reass->first_bi == tmp_bi)
684 return IP4_REASS_RC_INTERNAL_ERROR;
686 if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
688 tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
689 tmp_bi = tmp->next_buffer;
690 tmp->next_buffer = 0;
691 tmp = vlib_get_buffer (vm, tmp_bi);
692 vlib_buffer_free_one (vm, to_be_freed_bi);
696 tmp->next_buffer = 0;
697 vlib_buffer_free_one (vm, to_be_freed_bi);
703 vnet_buffer (vlib_get_buffer (vm, sub_chain_bi))->ip.
706 while (~0 != sub_chain_bi);
710 return IP4_REASS_RC_INTERNAL_ERROR;
712 last_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
714 if (total_length < first_b->current_length)
716 return IP4_REASS_RC_INTERNAL_ERROR;
718 total_length -= first_b->current_length;
719 first_b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
720 first_b->total_length_not_including_first_buffer = total_length;
721 ip4_header_t *ip = vlib_buffer_get_current (first_b);
722 ip->flags_and_fragment_offset = 0;
723 ip->length = clib_host_to_net_u16 (first_b->current_length + total_length);
724 ip->checksum = ip4_header_checksum (ip);
725 if (!vlib_buffer_chain_linearize (vm, first_b))
727 return IP4_REASS_RC_NO_BUF;
729 // reset to reconstruct the mbuf linking
730 first_b->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
731 if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED))
733 ip4_full_reass_add_trace (vm, node, reass, reass->first_bi, FINALIZE, 0,
736 // following code does a hexdump of packet fragments to stdout ...
739 u32 bi = reass->first_bi;
743 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
744 s = format (s, "%u: %U\n", bi, format_hexdump,
745 vlib_buffer_get_current (b), b->current_length);
746 if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
755 printf ("%.*s\n", vec_len (s), s);
762 *bi0 = reass->first_bi;
765 *next0 = IP4_FULL_REASS_NEXT_INPUT;
769 *next0 = reass->next_index;
771 vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length;
772 *error0 = IP4_ERROR_NONE;
773 ip4_full_reass_free (rm, rt, reass);
775 return IP4_REASS_RC_OK;
778 always_inline ip4_full_reass_rc_t
779 ip4_full_reass_insert_range_in_chain (vlib_main_t * vm,
780 ip4_full_reass_t * reass,
781 u32 prev_range_bi, u32 new_next_bi)
783 vlib_buffer_t *new_next_b = vlib_get_buffer (vm, new_next_bi);
784 vnet_buffer_opaque_t *new_next_vnb = vnet_buffer (new_next_b);
785 if (~0 != prev_range_bi)
787 vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_range_bi);
788 vnet_buffer_opaque_t *prev_vnb = vnet_buffer (prev_b);
789 new_next_vnb->ip.reass.next_range_bi = prev_vnb->ip.reass.next_range_bi;
790 prev_vnb->ip.reass.next_range_bi = new_next_bi;
794 if (~0 != reass->first_bi)
796 new_next_vnb->ip.reass.next_range_bi = reass->first_bi;
798 reass->first_bi = new_next_bi;
800 vnet_buffer_opaque_t *vnb = vnet_buffer (new_next_b);
801 if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) &&
802 !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first))
804 return IP4_REASS_RC_INTERNAL_ERROR;
806 reass->data_len += ip4_full_reass_buffer_get_data_len (new_next_b);
807 return IP4_REASS_RC_OK;
810 always_inline ip4_full_reass_rc_t
811 ip4_full_reass_remove_range_from_chain (vlib_main_t * vm,
812 vlib_node_runtime_t * node,
813 ip4_full_reass_t * reass,
814 u32 prev_range_bi, u32 discard_bi)
816 vlib_buffer_t *discard_b = vlib_get_buffer (vm, discard_bi);
817 vnet_buffer_opaque_t *discard_vnb = vnet_buffer (discard_b);
818 if (~0 != prev_range_bi)
820 vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_range_bi);
821 vnet_buffer_opaque_t *prev_vnb = vnet_buffer (prev_b);
822 if (!(prev_vnb->ip.reass.next_range_bi == discard_bi))
824 return IP4_REASS_RC_INTERNAL_ERROR;
826 prev_vnb->ip.reass.next_range_bi = discard_vnb->ip.reass.next_range_bi;
830 reass->first_bi = discard_vnb->ip.reass.next_range_bi;
832 vnet_buffer_opaque_t *vnb = vnet_buffer (discard_b);
833 if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) &&
834 !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first))
836 return IP4_REASS_RC_INTERNAL_ERROR;
838 reass->data_len -= ip4_full_reass_buffer_get_data_len (discard_b);
841 u32 to_be_freed_bi = discard_bi;
842 if (PREDICT_FALSE (discard_b->flags & VLIB_BUFFER_IS_TRACED))
844 ip4_full_reass_add_trace (vm, node, reass, discard_bi, RANGE_DISCARD,
847 if (discard_b->flags & VLIB_BUFFER_NEXT_PRESENT)
849 discard_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
850 discard_bi = discard_b->next_buffer;
851 discard_b->next_buffer = 0;
852 discard_b = vlib_get_buffer (vm, discard_bi);
853 vlib_buffer_free_one (vm, to_be_freed_bi);
857 discard_b->next_buffer = 0;
858 vlib_buffer_free_one (vm, to_be_freed_bi);
862 return IP4_REASS_RC_OK;
865 always_inline ip4_full_reass_rc_t
866 ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
867 ip4_full_reass_main_t * rm,
868 ip4_full_reass_per_thread_t * rt,
869 ip4_full_reass_t * reass, u32 * bi0, u32 * next0,
870 u32 * error0, bool is_custom, u32 * handoff_thread_idx)
872 vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
873 vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
876 // store (error_)next_index before it's overwritten
877 reass->next_index = fvnb->ip.reass.next_index;
878 reass->error_next_index = fvnb->ip.reass.error_next_index;
880 ip4_full_reass_rc_t rc = IP4_REASS_RC_OK;
882 ip4_header_t *fip = vlib_buffer_get_current (fb);
883 const u32 fragment_first = ip4_get_fragment_offset_bytes (fip);
884 const u32 fragment_length =
885 clib_net_to_host_u16 (fip->length) - ip4_header_bytes (fip);
886 const u32 fragment_last = fragment_first + fragment_length - 1;
887 fvnb->ip.reass.fragment_first = fragment_first;
888 fvnb->ip.reass.fragment_last = fragment_last;
889 int more_fragments = ip4_get_fragment_more (fip);
890 u32 candidate_range_bi = reass->first_bi;
891 u32 prev_range_bi = ~0;
892 fvnb->ip.reass.range_first = fragment_first;
893 fvnb->ip.reass.range_last = fragment_last;
894 fvnb->ip.reass.next_range_bi = ~0;
897 reass->last_packet_octet = fragment_last;
899 if (~0 == reass->first_bi)
901 // starting a new reassembly
903 ip4_full_reass_insert_range_in_chain (vm, reass, prev_range_bi, *bi0);
904 if (IP4_REASS_RC_OK != rc)
908 if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
910 ip4_full_reass_add_trace (vm, node, reass, *bi0, RANGE_NEW, 0, ~0);
913 reass->min_fragment_length = clib_net_to_host_u16 (fip->length);
914 reass->fragments_n = 1;
915 return IP4_REASS_RC_OK;
917 reass->min_fragment_length =
918 clib_min (clib_net_to_host_u16 (fip->length),
919 fvnb->ip.reass.estimated_mtu);
920 while (~0 != candidate_range_bi)
922 vlib_buffer_t *candidate_b = vlib_get_buffer (vm, candidate_range_bi);
923 vnet_buffer_opaque_t *candidate_vnb = vnet_buffer (candidate_b);
924 if (fragment_first > candidate_vnb->ip.reass.range_last)
926 // this fragments starts after candidate range
927 prev_range_bi = candidate_range_bi;
928 candidate_range_bi = candidate_vnb->ip.reass.next_range_bi;
929 if (candidate_vnb->ip.reass.range_last < fragment_last &&
930 ~0 == candidate_range_bi)
932 // special case - this fragment falls beyond all known ranges
933 rc = ip4_full_reass_insert_range_in_chain (vm, reass,
934 prev_range_bi, *bi0);
935 if (IP4_REASS_RC_OK != rc)
944 if (fragment_last < candidate_vnb->ip.reass.range_first)
946 // this fragment ends before candidate range without any overlap
947 rc = ip4_full_reass_insert_range_in_chain (vm, reass, prev_range_bi,
949 if (IP4_REASS_RC_OK != rc)
957 if (fragment_first >= candidate_vnb->ip.reass.range_first &&
958 fragment_last <= candidate_vnb->ip.reass.range_last)
960 // this fragment is a (sub)part of existing range, ignore it
961 if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
963 ip4_full_reass_add_trace (vm, node, reass, *bi0,
964 RANGE_OVERLAP, 0, ~0);
968 int discard_candidate = 0;
969 if (fragment_first < candidate_vnb->ip.reass.range_first)
972 fragment_last - candidate_vnb->ip.reass.range_first + 1;
973 if (overlap < ip4_full_reass_buffer_get_data_len (candidate_b))
975 candidate_vnb->ip.reass.range_first += overlap;
976 if (reass->data_len < overlap)
978 return IP4_REASS_RC_INTERNAL_ERROR;
980 reass->data_len -= overlap;
981 if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
983 ip4_full_reass_add_trace (vm, node, reass,
985 RANGE_SHRINK, 0, ~0);
987 rc = ip4_full_reass_insert_range_in_chain (
988 vm, reass, prev_range_bi, *bi0);
989 if (IP4_REASS_RC_OK != rc)
997 discard_candidate = 1;
1000 else if (fragment_last > candidate_vnb->ip.reass.range_last)
1003 candidate_vnb->ip.reass.range_last - fragment_first + 1;
1004 if (overlap < ip4_full_reass_buffer_get_data_len (candidate_b))
1006 fvnb->ip.reass.range_first += overlap;
1007 if (~0 != candidate_vnb->ip.reass.next_range_bi)
1009 prev_range_bi = candidate_range_bi;
1010 candidate_range_bi =
1011 candidate_vnb->ip.reass.next_range_bi;
1016 // special case - last range discarded
1017 rc = ip4_full_reass_insert_range_in_chain (
1018 vm, reass, candidate_range_bi, *bi0);
1019 if (IP4_REASS_RC_OK != rc)
1028 discard_candidate = 1;
1033 discard_candidate = 1;
1035 if (discard_candidate)
1037 u32 next_range_bi = candidate_vnb->ip.reass.next_range_bi;
1038 // discard candidate range, probe next range
1039 rc = ip4_full_reass_remove_range_from_chain (
1040 vm, node, reass, prev_range_bi, candidate_range_bi);
1041 if (IP4_REASS_RC_OK != rc)
1045 if (~0 != next_range_bi)
1047 candidate_range_bi = next_range_bi;
1052 // special case - last range discarded
1053 rc = ip4_full_reass_insert_range_in_chain (
1054 vm, reass, prev_range_bi, *bi0);
1055 if (IP4_REASS_RC_OK != rc)
1065 ++reass->fragments_n;
1068 if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
1070 ip4_full_reass_add_trace (vm, node, reass, *bi0, RANGE_NEW, 0, ~0);
1073 if (~0 != reass->last_packet_octet &&
1074 reass->data_len == reass->last_packet_octet + 1)
1076 *handoff_thread_idx = reass->sendout_thread_index;
1078 reass->memory_owner_thread_index != reass->sendout_thread_index;
1080 ip4_full_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0,
1082 if (IP4_REASS_RC_OK == rc && handoff)
1084 rc = IP4_REASS_RC_HANDOFF;
1092 if (reass->fragments_n > rm->max_reass_len)
1094 rc = IP4_REASS_RC_TOO_MANY_FRAGMENTS;
1099 *next0 = IP4_FULL_REASS_NEXT_DROP;
1100 *error0 = IP4_ERROR_REASS_DUPLICATE_FRAGMENT;
1107 ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1108 vlib_frame_t *frame, ip4_full_reass_node_type_t type,
1111 u32 *from = vlib_frame_vector_args (frame);
1112 u32 n_left_from, n_left_to_next, *to_next, next_index;
1113 ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1114 ip4_full_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
1115 clib_spinlock_lock (&rt->lock);
1117 n_left_from = frame->n_vectors;
1118 next_index = node->cached_next_index;
1119 while (n_left_from > 0)
1121 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1123 while (n_left_from > 0 && n_left_to_next > 0)
1128 u32 error0 = IP4_ERROR_NONE;
1131 b0 = vlib_get_buffer (vm, bi0);
1133 ip4_header_t *ip0 = vlib_buffer_get_current (b0);
1134 if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
1136 // this is a whole packet - no fragmentation
1139 next0 = IP4_FULL_REASS_NEXT_INPUT;
1143 next0 = vnet_buffer (b0)->ip.reass.next_index;
1145 ip4_full_reass_add_trace (vm, node, NULL, bi0, PASSTHROUGH, 0,
1147 goto packet_enqueue;
1150 if (is_local && !rm->is_local_reass_enabled)
1152 next0 = IP4_FULL_REASS_NEXT_DROP;
1153 goto packet_enqueue;
1156 const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
1157 const u32 fragment_length =
1158 clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
1159 const u32 fragment_last = fragment_first + fragment_length - 1;
1160 if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791
1162 next0 = IP4_FULL_REASS_NEXT_DROP;
1163 error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
1164 goto packet_enqueue;
1166 ip4_full_reass_kv_t kv;
1170 (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
1171 vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
1172 (u64) ip0->src_address.as_u32 << 32;
1174 (u64) ip0->dst_address.
1175 as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
1177 ip4_full_reass_t *reass =
1178 ip4_full_reass_find_or_create (vm, node, rm, rt, &kv,
1183 const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
1184 if (0 == fragment_first)
1186 reass->sendout_thread_index = vm->thread_index;
1190 if (PREDICT_FALSE (do_handoff))
1192 next0 = IP4_FULL_REASS_NEXT_HANDOFF;
1193 vnet_buffer (b0)->ip.reass.owner_thread_index =
1194 kv.v.memory_owner_thread_index;
1198 u32 handoff_thread_idx;
1200 switch (ip4_full_reass_update
1201 (vm, node, rm, rt, reass, &bi0, &next0,
1202 &error0, CUSTOM == type, &handoff_thread_idx))
1204 case IP4_REASS_RC_OK:
1205 /* nothing to do here */
1207 case IP4_REASS_RC_HANDOFF:
1208 next0 = IP4_FULL_REASS_NEXT_HANDOFF;
1209 b0 = vlib_get_buffer (vm, bi0);
1210 vnet_buffer (b0)->ip.reass.owner_thread_index =
1213 case IP4_REASS_RC_TOO_MANY_FRAGMENTS:
1214 counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
1216 case IP4_REASS_RC_NO_BUF:
1217 counter = IP4_ERROR_REASS_NO_BUF;
1219 case IP4_REASS_RC_INTERNAL_ERROR:
1220 counter = IP4_ERROR_REASS_INTERNAL_ERROR;
1226 vlib_node_increment_counter (vm, node->node_index, counter,
1228 ip4_full_reass_drop_all (vm, node, reass, bi0);
1229 ip4_full_reass_free (rm, rt, reass);
1235 next0 = IP4_FULL_REASS_NEXT_DROP;
1236 error0 = IP4_ERROR_REASS_LIMIT_REACHED;
1246 n_left_to_next -= 1;
1248 /* bi0 might have been updated by reass_finalize, reload */
1249 b0 = vlib_get_buffer (vm, bi0);
1250 if (IP4_ERROR_NONE != error0)
1252 b0->error = node->errors[error0];
1255 if (next0 == IP4_FULL_REASS_NEXT_HANDOFF)
1257 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1259 ip4_full_reass_add_trace (
1260 vm, node, NULL, bi0, HANDOFF, 0,
1261 vnet_buffer (b0)->ip.reass.owner_thread_index);
1264 else if (FEATURE == type && IP4_ERROR_NONE == error0)
1266 vnet_feature_next (&next0, b0);
1268 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1269 to_next, n_left_to_next,
1271 IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next);
1279 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1282 clib_spinlock_unlock (&rt->lock);
1283 return frame->n_vectors;
1286 static char *ip4_full_reass_error_strings[] = {
1287 #define _(sym, string) string,
1292 VLIB_NODE_FN (ip4_full_reass_node) (vlib_main_t * vm,
1293 vlib_node_runtime_t * node,
1294 vlib_frame_t * frame)
1296 return ip4_full_reass_inline (vm, node, frame, NORMAL, false /* is_local */);
1299 VLIB_REGISTER_NODE (ip4_full_reass_node) = {
1300 .name = "ip4-full-reassembly",
1301 .vector_size = sizeof (u32),
1302 .format_trace = format_ip4_full_reass_trace,
1303 .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
1304 .error_strings = ip4_full_reass_error_strings,
1305 .n_next_nodes = IP4_FULL_REASS_N_NEXT,
1308 [IP4_FULL_REASS_NEXT_INPUT] = "ip4-input",
1309 [IP4_FULL_REASS_NEXT_DROP] = "ip4-drop",
1310 [IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-full-reassembly-handoff",
1315 VLIB_NODE_FN (ip4_local_full_reass_node)
1316 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1318 return ip4_full_reass_inline (vm, node, frame, NORMAL, true /* is_local */);
1321 VLIB_REGISTER_NODE (ip4_local_full_reass_node) = {
1322 .name = "ip4-local-full-reassembly",
1323 .vector_size = sizeof (u32),
1324 .format_trace = format_ip4_full_reass_trace,
1325 .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
1326 .error_strings = ip4_full_reass_error_strings,
1327 .n_next_nodes = IP4_FULL_REASS_N_NEXT,
1330 [IP4_FULL_REASS_NEXT_INPUT] = "ip4-input",
1331 [IP4_FULL_REASS_NEXT_DROP] = "ip4-drop",
1332 [IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-local-full-reassembly-handoff",
1337 VLIB_NODE_FN (ip4_full_reass_node_feature) (vlib_main_t * vm,
1338 vlib_node_runtime_t * node,
1339 vlib_frame_t * frame)
1341 return ip4_full_reass_inline (vm, node, frame, FEATURE,
1342 false /* is_local */);
1345 VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = {
1346 .name = "ip4-full-reassembly-feature",
1347 .vector_size = sizeof (u32),
1348 .format_trace = format_ip4_full_reass_trace,
1349 .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
1350 .error_strings = ip4_full_reass_error_strings,
1351 .n_next_nodes = IP4_FULL_REASS_N_NEXT,
1354 [IP4_FULL_REASS_NEXT_INPUT] = "ip4-input",
1355 [IP4_FULL_REASS_NEXT_DROP] = "ip4-drop",
1356 [IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-full-reass-feature-hoff",
1360 VNET_FEATURE_INIT (ip4_full_reass_feature, static) = {
1361 .arc_name = "ip4-unicast",
1362 .node_name = "ip4-full-reassembly-feature",
1363 .runs_before = VNET_FEATURES ("ip4-lookup",
1364 "ipsec4-input-feature"),
1368 VLIB_NODE_FN (ip4_full_reass_node_custom) (vlib_main_t * vm,
1369 vlib_node_runtime_t * node,
1370 vlib_frame_t * frame)
1372 return ip4_full_reass_inline (vm, node, frame, CUSTOM, false /* is_local */);
1375 VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = {
1376 .name = "ip4-full-reassembly-custom",
1377 .vector_size = sizeof (u32),
1378 .format_trace = format_ip4_full_reass_trace,
1379 .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
1380 .error_strings = ip4_full_reass_error_strings,
1381 .n_next_nodes = IP4_FULL_REASS_N_NEXT,
1384 [IP4_FULL_REASS_NEXT_INPUT] = "ip4-input",
1385 [IP4_FULL_REASS_NEXT_DROP] = "ip4-drop",
1386 [IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-full-reass-custom-hoff",
1390 VNET_FEATURE_INIT (ip4_full_reass_custom, static) = {
1391 .arc_name = "ip4-unicast",
1392 .node_name = "ip4-full-reassembly-feature",
1393 .runs_before = VNET_FEATURES ("ip4-lookup",
1394 "ipsec4-input-feature"),
1399 #ifndef CLIB_MARCH_VARIANT
1401 ip4_full_reass_custom_register_next_node (uword node_index)
1403 return vlib_node_add_next (vlib_get_main (),
1404 ip4_full_reass_node_custom.index, node_index);
1408 ip4_full_reass_get_nbuckets ()
1410 ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1414 nbuckets = (u32) (rm->max_reass_n / IP4_REASS_HT_LOAD_FACTOR);
1416 for (i = 0; i < 31; i++)
1417 if ((1 << i) >= nbuckets)
1423 #endif /* CLIB_MARCH_VARIANT */
1427 IP4_EVENT_CONFIG_CHANGED = 1,
1428 } ip4_full_reass_event_t;
1433 clib_bihash_16_8_t *new_hash;
1434 } ip4_rehash_cb_ctx;
1436 #ifndef CLIB_MARCH_VARIANT
1438 ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx)
1440 ip4_rehash_cb_ctx *ctx = _ctx;
1441 if (clib_bihash_add_del_16_8 (ctx->new_hash, kv, 1))
1445 return (BIHASH_WALK_CONTINUE);
1449 ip4_full_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
1450 u32 max_reassembly_length,
1451 u32 expire_walk_interval_ms)
1453 ip4_full_reass_main.timeout_ms = timeout_ms;
1454 ip4_full_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
1455 ip4_full_reass_main.max_reass_n = max_reassemblies;
1456 ip4_full_reass_main.max_reass_len = max_reassembly_length;
1457 ip4_full_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
1461 ip4_full_reass_set (u32 timeout_ms, u32 max_reassemblies,
1462 u32 max_reassembly_length, u32 expire_walk_interval_ms)
1464 u32 old_nbuckets = ip4_full_reass_get_nbuckets ();
1465 ip4_full_reass_set_params (timeout_ms, max_reassemblies,
1466 max_reassembly_length, expire_walk_interval_ms);
1467 vlib_process_signal_event (ip4_full_reass_main.vlib_main,
1468 ip4_full_reass_main.ip4_full_reass_expire_node_idx,
1469 IP4_EVENT_CONFIG_CHANGED, 0);
1470 u32 new_nbuckets = ip4_full_reass_get_nbuckets ();
1471 if (ip4_full_reass_main.max_reass_n > 0 && new_nbuckets > old_nbuckets)
1473 clib_bihash_16_8_t new_hash;
1474 clib_memset (&new_hash, 0, sizeof (new_hash));
1475 ip4_rehash_cb_ctx ctx;
1477 ctx.new_hash = &new_hash;
1478 clib_bihash_init_16_8 (&new_hash, "ip4-dr", new_nbuckets,
1479 new_nbuckets * 1024);
1480 clib_bihash_foreach_key_value_pair_16_8 (&ip4_full_reass_main.hash,
1481 ip4_rehash_cb, &ctx);
1484 clib_bihash_free_16_8 (&new_hash);
1489 clib_bihash_free_16_8 (&ip4_full_reass_main.hash);
1490 clib_memcpy_fast (&ip4_full_reass_main.hash, &new_hash,
1491 sizeof (ip4_full_reass_main.hash));
1492 clib_bihash_copied (&ip4_full_reass_main.hash, &new_hash);
1499 ip4_full_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
1500 u32 * max_reassembly_length,
1501 u32 * expire_walk_interval_ms)
1503 *timeout_ms = ip4_full_reass_main.timeout_ms;
1504 *max_reassemblies = ip4_full_reass_main.max_reass_n;
1505 *max_reassembly_length = ip4_full_reass_main.max_reass_len;
1506 *expire_walk_interval_ms = ip4_full_reass_main.expire_walk_interval_ms;
1510 static clib_error_t *
1511 ip4_full_reass_init_function (vlib_main_t * vm)
1513 ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1514 clib_error_t *error = 0;
1520 vec_validate (rm->per_thread_data, vlib_num_workers ());
1521 ip4_full_reass_per_thread_t *rt;
1522 vec_foreach (rt, rm->per_thread_data)
1524 clib_spinlock_init (&rt->lock);
1525 pool_alloc (rt->pool, rm->max_reass_n);
1528 node = vlib_get_node_by_name (vm, (u8 *) "ip4-full-reassembly-expire-walk");
1530 rm->ip4_full_reass_expire_node_idx = node->index;
1532 ip4_full_reass_set_params (IP4_REASS_TIMEOUT_DEFAULT_MS,
1533 IP4_REASS_MAX_REASSEMBLIES_DEFAULT,
1534 IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
1535 IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
1537 nbuckets = ip4_full_reass_get_nbuckets ();
1538 clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024);
1540 rm->fq_index = vlib_frame_queue_main_init (ip4_full_reass_node.index, 0);
1541 rm->fq_local_index =
1542 vlib_frame_queue_main_init (ip4_local_full_reass_node.index, 0);
1543 rm->fq_feature_index =
1544 vlib_frame_queue_main_init (ip4_full_reass_node_feature.index, 0);
1545 rm->fq_custom_index =
1546 vlib_frame_queue_main_init (ip4_full_reass_node_custom.index, 0);
1548 rm->feature_use_refcount_per_intf = NULL;
1549 rm->is_local_reass_enabled = 1;
1554 VLIB_INIT_FUNCTION (ip4_full_reass_init_function);
1555 #endif /* CLIB_MARCH_VARIANT */
1558 ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
1559 CLIB_UNUSED (vlib_frame_t *f))
1561 ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1562 uword event_type, *event_data = 0;
1566 vlib_process_wait_for_event_or_clock (vm,
1568 rm->expire_walk_interval_ms /
1569 (f64) MSEC_PER_SEC);
1570 event_type = vlib_process_get_events (vm, &event_data);
1575 /* no events => timeout */
1577 case IP4_EVENT_CONFIG_CHANGED:
1578 /* nothing to do here */
1581 clib_warning ("BUG: event type 0x%wx", event_type);
1584 f64 now = vlib_time_now (vm);
1586 ip4_full_reass_t *reass;
1587 int *pool_indexes_to_free = NULL;
1589 uword thread_index = 0;
1591 const uword nthreads = vlib_num_workers () + 1;
1592 for (thread_index = 0; thread_index < nthreads; ++thread_index)
1594 ip4_full_reass_per_thread_t *rt =
1595 &rm->per_thread_data[thread_index];
1596 clib_spinlock_lock (&rt->lock);
1598 vec_reset_length (pool_indexes_to_free);
1599 pool_foreach_index (index, rt->pool) {
1600 reass = pool_elt_at_index (rt->pool, index);
1601 if (now > reass->last_heard + rm->timeout)
1603 vec_add1 (pool_indexes_to_free, index);
1607 vec_foreach (i, pool_indexes_to_free)
1609 ip4_full_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
1610 ip4_full_reass_drop_all (vm, node, reass, ~0);
1611 ip4_full_reass_free (rm, rt, reass);
1614 clib_spinlock_unlock (&rt->lock);
1617 vec_free (pool_indexes_to_free);
1620 vec_set_len (event_data, 0);
1627 VLIB_REGISTER_NODE (ip4_full_reass_expire_node) = {
1628 .function = ip4_full_reass_walk_expired,
1629 .type = VLIB_NODE_TYPE_PROCESS,
1630 .name = "ip4-full-reassembly-expire-walk",
1631 .format_trace = format_ip4_full_reass_trace,
1632 .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
1633 .error_strings = ip4_full_reass_error_strings,
1638 format_ip4_full_reass_key (u8 * s, va_list * args)
1640 ip4_full_reass_key_t *key = va_arg (*args, ip4_full_reass_key_t *);
1643 "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
1644 key->xx_id, format_ip4_address, &key->src, format_ip4_address,
1645 &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
1650 format_ip4_reass (u8 * s, va_list * args)
1652 vlib_main_t *vm = va_arg (*args, vlib_main_t *);
1653 ip4_full_reass_t *reass = va_arg (*args, ip4_full_reass_t *);
1655 s = format (s, "ID: %lu, key: %U\n first_bi: %u, data_len: %u, "
1656 "last_packet_octet: %u, trace_op_counter: %u\n",
1657 reass->id, format_ip4_full_reass_key, &reass->key,
1658 reass->first_bi, reass->data_len,
1659 reass->last_packet_octet, reass->trace_op_counter);
1661 u32 bi = reass->first_bi;
1665 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
1666 vnet_buffer_opaque_t *vnb = vnet_buffer (b);
1669 " #%03u: range: [%u, %u], bi: %u, off: %d, len: %u, "
1670 "fragment[%u, %u]\n", counter, vnb->ip.reass.range_first,
1671 vnb->ip.reass.range_last, bi,
1672 ip4_full_reass_buffer_get_data_offset (b),
1673 ip4_full_reass_buffer_get_data_len (b),
1674 vnb->ip.reass.fragment_first, vnb->ip.reass.fragment_last);
1675 if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
1677 bi = b->next_buffer;
1687 static clib_error_t *
1688 show_ip4_reass (vlib_main_t * vm,
1689 unformat_input_t * input,
1690 CLIB_UNUSED (vlib_cli_command_t * lmd))
1692 ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1694 vlib_cli_output (vm, "---------------------");
1695 vlib_cli_output (vm, "IP4 reassembly status");
1696 vlib_cli_output (vm, "---------------------");
1697 bool details = false;
1698 if (unformat (input, "details"))
1703 u32 sum_reass_n = 0;
1704 ip4_full_reass_t *reass;
1706 const uword nthreads = vlib_num_workers () + 1;
1707 for (thread_index = 0; thread_index < nthreads; ++thread_index)
1709 ip4_full_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
1710 clib_spinlock_lock (&rt->lock);
1713 pool_foreach (reass, rt->pool) {
1714 vlib_cli_output (vm, "%U", format_ip4_reass, vm, reass);
1717 sum_reass_n += rt->reass_n;
1718 clib_spinlock_unlock (&rt->lock);
1720 vlib_cli_output (vm, "---------------------");
1721 vlib_cli_output (vm, "Current full IP4 reassemblies count: %lu\n",
1722 (long unsigned) sum_reass_n);
1723 vlib_cli_output (vm,
1724 "Maximum configured concurrent full IP4 reassemblies per worker-thread: %lu\n",
1725 (long unsigned) rm->max_reass_n);
1726 vlib_cli_output (vm,
1727 "Maximum configured amount of fragments "
1728 "per full IP4 reassembly: %lu\n",
1729 (long unsigned) rm->max_reass_len);
1730 vlib_cli_output (vm,
1731 "Maximum configured full IP4 reassembly timeout: %lums\n",
1732 (long unsigned) rm->timeout_ms);
1733 vlib_cli_output (vm,
1734 "Maximum configured full IP4 reassembly expire walk interval: %lums\n",
1735 (long unsigned) rm->expire_walk_interval_ms);
1739 VLIB_CLI_COMMAND (show_ip4_full_reass_cmd, static) = {
1740 .path = "show ip4-full-reassembly",
1741 .short_help = "show ip4-full-reassembly [details]",
1742 .function = show_ip4_reass,
1745 #ifndef CLIB_MARCH_VARIANT
1747 ip4_full_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
1749 return vnet_feature_enable_disable ("ip4-unicast",
1750 "ip4-full-reassembly-feature",
1751 sw_if_index, enable_disable, 0, 0);
1753 #endif /* CLIB_MARCH_VARIANT */
1756 #define foreach_ip4_full_reass_handoff_error \
1757 _(CONGESTION_DROP, "congestion drop")
1762 #define _(sym,str) IP4_FULL_REASS_HANDOFF_ERROR_##sym,
1763 foreach_ip4_full_reass_handoff_error
1765 IP4_FULL_REASS_HANDOFF_N_ERROR,
1766 } ip4_full_reass_handoff_error_t;
1768 static char *ip4_full_reass_handoff_error_strings[] = {
1769 #define _(sym,string) string,
1770 foreach_ip4_full_reass_handoff_error
1776 u32 next_worker_index;
1777 } ip4_full_reass_handoff_trace_t;
1780 format_ip4_full_reass_handoff_trace (u8 * s, va_list * args)
1782 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1783 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1784 ip4_full_reass_handoff_trace_t *t =
1785 va_arg (*args, ip4_full_reass_handoff_trace_t *);
1788 format (s, "ip4-full-reassembly-handoff: next-worker %d",
1789 t->next_worker_index);
1795 ip4_full_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1796 vlib_frame_t *frame,
1797 ip4_full_reass_node_type_t type,
1800 ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1802 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1803 u32 n_enq, n_left_from, *from;
1804 u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1807 from = vlib_frame_vector_args (frame);
1808 n_left_from = frame->n_vectors;
1809 vlib_get_buffers (vm, from, bufs, n_left_from);
1812 ti = thread_indices;
1819 fq_index = rm->fq_local_index;
1823 fq_index = rm->fq_index;
1827 fq_index = rm->fq_feature_index;
1830 fq_index = rm->fq_custom_index;
1833 clib_warning ("Unexpected `type' (%d)!", type);
1836 while (n_left_from > 0)
1838 ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
1841 ((node->flags & VLIB_NODE_FLAG_TRACE)
1842 && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1844 ip4_full_reass_handoff_trace_t *t =
1845 vlib_add_trace (vm, node, b[0], sizeof (*t));
1846 t->next_worker_index = ti[0];
1853 n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
1854 thread_indices, frame->n_vectors, 1);
1856 if (n_enq < frame->n_vectors)
1857 vlib_node_increment_counter (vm, node->node_index,
1858 IP4_FULL_REASS_HANDOFF_ERROR_CONGESTION_DROP,
1859 frame->n_vectors - n_enq);
1860 return frame->n_vectors;
1863 VLIB_NODE_FN (ip4_full_reass_handoff_node) (vlib_main_t * vm,
1864 vlib_node_runtime_t * node,
1865 vlib_frame_t * frame)
1867 return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL,
1868 false /* is_local */);
1872 VLIB_REGISTER_NODE (ip4_full_reass_handoff_node) = {
1873 .name = "ip4-full-reassembly-handoff",
1874 .vector_size = sizeof (u32),
1875 .n_errors = ARRAY_LEN(ip4_full_reass_handoff_error_strings),
1876 .error_strings = ip4_full_reass_handoff_error_strings,
1877 .format_trace = format_ip4_full_reass_handoff_trace,
1886 VLIB_NODE_FN (ip4_local_full_reass_handoff_node)
1887 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1889 return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL,
1890 true /* is_local */);
1893 VLIB_REGISTER_NODE (ip4_local_full_reass_handoff_node) = {
1894 .name = "ip4-local-full-reassembly-handoff",
1895 .vector_size = sizeof (u32),
1896 .n_errors = ARRAY_LEN(ip4_full_reass_handoff_error_strings),
1897 .error_strings = ip4_full_reass_handoff_error_strings,
1898 .format_trace = format_ip4_full_reass_handoff_trace,
1907 VLIB_NODE_FN (ip4_full_reass_feature_handoff_node) (vlib_main_t * vm,
1908 vlib_node_runtime_t *
1910 vlib_frame_t * frame)
1912 return ip4_full_reass_handoff_node_inline (vm, node, frame, FEATURE,
1913 false /* is_local */);
1916 VLIB_REGISTER_NODE (ip4_full_reass_feature_handoff_node) = {
1917 .name = "ip4-full-reass-feature-hoff",
1918 .vector_size = sizeof (u32),
1919 .n_errors = ARRAY_LEN(ip4_full_reass_handoff_error_strings),
1920 .error_strings = ip4_full_reass_handoff_error_strings,
1921 .format_trace = format_ip4_full_reass_handoff_trace,
1930 VLIB_NODE_FN (ip4_full_reass_custom_handoff_node) (vlib_main_t * vm,
1931 vlib_node_runtime_t *
1933 vlib_frame_t * frame)
1935 return ip4_full_reass_handoff_node_inline (vm, node, frame, CUSTOM,
1936 false /* is_local */);
1939 VLIB_REGISTER_NODE (ip4_full_reass_custom_handoff_node) = {
1940 .name = "ip4-full-reass-custom-hoff",
1941 .vector_size = sizeof (u32),
1942 .n_errors = ARRAY_LEN(ip4_full_reass_handoff_error_strings),
1943 .error_strings = ip4_full_reass_handoff_error_strings,
1944 .format_trace = format_ip4_full_reass_handoff_trace,
1953 #ifndef CLIB_MARCH_VARIANT
1955 ip4_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
1957 ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1958 vec_validate (rm->feature_use_refcount_per_intf, sw_if_index);
1961 if (!rm->feature_use_refcount_per_intf[sw_if_index])
1963 ++rm->feature_use_refcount_per_intf[sw_if_index];
1964 return vnet_feature_enable_disable ("ip4-unicast",
1965 "ip4-full-reassembly-feature",
1966 sw_if_index, 1, 0, 0);
1968 ++rm->feature_use_refcount_per_intf[sw_if_index];
1972 --rm->feature_use_refcount_per_intf[sw_if_index];
1973 if (!rm->feature_use_refcount_per_intf[sw_if_index])
1974 return vnet_feature_enable_disable ("ip4-unicast",
1975 "ip4-full-reassembly-feature",
1976 sw_if_index, 0, 0, 0);
1982 ip4_local_full_reass_enable_disable (int enable)
1986 ip4_full_reass_main.is_local_reass_enabled = 1;
1990 ip4_full_reass_main.is_local_reass_enabled = 0;
1995 ip4_local_full_reass_enabled ()
1997 return ip4_full_reass_main.is_local_reass_enabled;
2003 * fd.io coding-style-patch-verification: ON
2006 * eval: (c-set-style "gnu")