2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
18 * @brief IPv4 Shallow Virtual Reassembly.
20 * This file contains the source code for IPv4 Shallow Virtual reassembly.
23 #include <vppinfra/vec.h>
24 #include <vnet/vnet.h>
25 #include <vnet/ip/ip.h>
26 #include <vnet/ip/ip4_to_ip6.h>
27 #include <vppinfra/fifo.h>
28 #include <vppinfra/bihash_16_8.h>
29 #include <vnet/ip/reass/ip4_sv_reass.h>
31 #define MSEC_PER_SEC 1000
32 #define IP4_SV_REASS_TIMEOUT_DEFAULT_MS 100
33 #define IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
34 #define IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024
35 #define IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
36 #define IP4_SV_REASS_HT_LOAD_FACTOR (0.75)
41 IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS,
42 IP4_SV_REASS_RC_UNSUPP_IP_PROTO,
79 clib_bihash_kv_16_8_t kv;
85 ip4_sv_reass_key_t key;
86 // time when last packet was received
88 // internal id of this reassembly
90 // trace operation counter
92 // minimum fragment length for this reassembly - used to estimate MTU
93 u16 min_fragment_length;
94 // buffer indexes of buffers in this reassembly in chronological order -
95 // including overlaps and duplicate fragments
97 // set to true when this reassembly is completed
101 u8 icmp_type_or_tcp_flags;
116 ip4_sv_reass_t *pool;
119 clib_spinlock_t lock;
124 } ip4_sv_reass_per_thread_t;
131 u32 expire_walk_interval_ms;
132 // maximum number of fragments in one reassembly
134 // maximum number of reassemblies
138 clib_bihash_16_8_t hash;
140 ip4_sv_reass_per_thread_t *per_thread_data;
143 vlib_main_t *vlib_main;
144 vnet_main_t *vnet_main;
146 // node index of ip4-drop node
148 u32 ip4_sv_reass_expire_node_idx;
150 /** Worker handoff */
152 u32 fq_feature_index;
153 u32 fq_custom_context_index;
155 // reference count for enabling/disabling feature - per interface
156 u32 *feature_use_refcount_per_intf;
158 // reference count for enabling/disabling feature - per interface
159 u32 *output_feature_use_refcount_per_intf;
161 } ip4_sv_reass_main_t;
163 extern ip4_sv_reass_main_t ip4_sv_reass_main;
165 #ifndef CLIB_MARCH_VARIANT
166 ip4_sv_reass_main_t ip4_sv_reass_main;
167 #endif /* CLIB_MARCH_VARIANT */
171 IP4_SV_REASSEMBLY_NEXT_INPUT,
172 IP4_SV_REASSEMBLY_NEXT_DROP,
173 IP4_SV_REASSEMBLY_NEXT_HANDOFF,
174 IP4_SV_REASSEMBLY_N_NEXT,
175 } ip4_sv_reass_next_t;
179 REASS_FRAGMENT_CACHE,
181 REASS_FRAGMENT_FORWARD,
183 } ip4_sv_reass_trace_operation_e;
187 ip4_sv_reass_trace_operation_e action;
193 int l4_layer_truncated;
194 } ip4_sv_reass_trace_t;
196 extern vlib_node_registration_t ip4_sv_reass_node;
197 extern vlib_node_registration_t ip4_sv_reass_node_feature;
200 format_ip4_sv_reass_trace (u8 * s, va_list * args)
202 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
203 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
204 ip4_sv_reass_trace_t *t = va_arg (*args, ip4_sv_reass_trace_t *);
205 if (REASS_PASSTHROUGH != t->action)
207 s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
211 case REASS_FRAGMENT_CACHE:
212 s = format (s, "[cached]");
216 format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]",
217 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
218 clib_net_to_host_u16 (t->l4_dst_port));
220 case REASS_FRAGMENT_FORWARD:
222 format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]",
223 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
224 clib_net_to_host_u16 (t->l4_dst_port));
226 case REASS_PASSTHROUGH:
227 s = format (s, "[not-fragmented]");
230 if (t->l4_layer_truncated)
232 s = format (s, " [l4-layer-truncated]");
238 ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
239 ip4_sv_reass_t *reass, u32 bi,
240 ip4_sv_reass_trace_operation_e action, u32 ip_proto,
241 u16 l4_src_port, u16 l4_dst_port,
242 int l4_layer_truncated)
244 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
245 if (pool_is_free_index
246 (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b)))
248 // this buffer's trace is gone
249 b->flags &= ~VLIB_BUFFER_IS_TRACED;
252 ip4_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
255 t->reass_id = reass->id;
256 t->op_id = reass->trace_op_counter;
257 ++reass->trace_op_counter;
260 t->ip_proto = ip_proto;
261 t->l4_src_port = l4_src_port;
262 t->l4_dst_port = l4_dst_port;
263 t->l4_layer_truncated = l4_layer_truncated;
266 s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t);
267 printf ("%.*s\n", vec_len (s), s);
269 vec_reset_length (s);
275 ip4_sv_reass_free (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
276 ip4_sv_reass_per_thread_t * rt, ip4_sv_reass_t * reass)
278 clib_bihash_kv_16_8_t kv;
279 kv.key[0] = reass->key.as_u64[0];
280 kv.key[1] = reass->key.as_u64[1];
281 clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
282 vlib_buffer_free (vm, reass->cached_buffers,
283 vec_len (reass->cached_buffers));
284 vec_free (reass->cached_buffers);
285 reass->cached_buffers = NULL;
286 if (~0 != reass->lru_prev)
288 ip4_sv_reass_t *lru_prev =
289 pool_elt_at_index (rt->pool, reass->lru_prev);
290 lru_prev->lru_next = reass->lru_next;
292 if (~0 != reass->lru_next)
294 ip4_sv_reass_t *lru_next =
295 pool_elt_at_index (rt->pool, reass->lru_next);
296 lru_next->lru_prev = reass->lru_prev;
298 if (rt->lru_first == reass - rt->pool)
300 rt->lru_first = reass->lru_next;
302 if (rt->lru_last == reass - rt->pool)
304 rt->lru_last = reass->lru_prev;
306 pool_put (rt->pool, reass);
311 ip4_sv_reass_init (ip4_sv_reass_t * reass)
313 reass->cached_buffers = NULL;
314 reass->is_complete = false;
317 always_inline ip4_sv_reass_t *
318 ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
319 ip4_sv_reass_per_thread_t * rt,
320 ip4_sv_reass_kv_t * kv, u8 * do_handoff)
322 ip4_sv_reass_t *reass = NULL;
323 f64 now = vlib_time_now (vm);
327 if (!clib_bihash_search_16_8 (&rm->hash, &kv->kv, &kv->kv))
329 if (vm->thread_index != kv->v.thread_index)
334 reass = pool_elt_at_index (rt->pool, kv->v.reass_index);
336 if (now > reass->last_heard + rm->timeout)
338 ip4_sv_reass_free (vm, rm, rt, reass);
345 reass->last_heard = now;
349 if (rt->reass_n >= rm->max_reass_n && rm->max_reass_n)
351 reass = pool_elt_at_index (rt->pool, rt->lru_first);
352 ip4_sv_reass_free (vm, rm, rt, reass);
355 pool_get (rt->pool, reass);
356 clib_memset (reass, 0, sizeof (*reass));
357 reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
359 ip4_sv_reass_init (reass);
361 reass->lru_prev = reass->lru_next = ~0;
363 if (~0 != rt->lru_last)
365 ip4_sv_reass_t *lru_last = pool_elt_at_index (rt->pool, rt->lru_last);
366 reass->lru_prev = rt->lru_last;
367 lru_last->lru_next = rt->lru_last = reass - rt->pool;
370 if (~0 == rt->lru_first)
372 rt->lru_first = rt->lru_last = reass - rt->pool;
375 reass->key.as_u64[0] = kv->kv.key[0];
376 reass->key.as_u64[1] = kv->kv.key[1];
377 kv->v.reass_index = (reass - rt->pool);
378 kv->v.thread_index = vm->thread_index;
379 reass->last_heard = now;
381 int rv = clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 2);
384 ip4_sv_reass_free (vm, rm, rt, reass);
386 // if other worker created a context already work with the other copy
394 always_inline ip4_sv_reass_rc_t
395 ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
396 ip4_sv_reass_main_t *rm, ip4_header_t *ip0,
397 ip4_sv_reass_t *reass, u32 bi0)
399 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
400 ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK;
401 const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
402 if (0 == fragment_first)
404 reass->ip_proto = ip0->protocol;
405 reass->l4_src_port = ip4_get_port (ip0, 1);
406 reass->l4_dst_port = ip4_get_port (ip0, 0);
407 if (!reass->l4_src_port || !reass->l4_dst_port)
408 return IP4_SV_REASS_RC_UNSUPP_IP_PROTO;
409 if (IP_PROTOCOL_TCP == reass->ip_proto)
411 reass->icmp_type_or_tcp_flags = ((tcp_header_t *) (ip0 + 1))->flags;
412 reass->tcp_ack_number = ((tcp_header_t *) (ip0 + 1))->ack_number;
413 reass->tcp_seq_number = ((tcp_header_t *) (ip0 + 1))->seq_number;
415 else if (IP_PROTOCOL_ICMP == reass->ip_proto)
417 reass->icmp_type_or_tcp_flags =
418 ((icmp46_header_t *) (ip0 + 1))->type;
420 reass->is_complete = true;
421 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
422 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
424 ip4_sv_reass_add_trace (
425 vm, node, reass, bi0, REASS_FINISH, reass->ip_proto,
426 reass->l4_src_port, reass->l4_dst_port,
427 vnet_buffer (b0)->ip.reass.l4_layer_truncated);
430 vec_add1 (reass->cached_buffers, bi0);
431 if (!reass->is_complete)
433 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
435 ip4_sv_reass_add_trace (
436 vm, node, reass, bi0, REASS_FRAGMENT_CACHE, ~0, ~0, ~0,
437 vnet_buffer (b0)->ip.reass.l4_layer_truncated);
439 if (vec_len (reass->cached_buffers) > rm->max_reass_len)
441 rc = IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS;
448 l4_layer_truncated (ip4_header_t *ip)
450 static const int l4_layer_length[256] = {
451 [IP_PROTOCOL_TCP] = sizeof (tcp_header_t),
452 [IP_PROTOCOL_UDP] = sizeof (udp_header_t),
453 [IP_PROTOCOL_ICMP] = sizeof (icmp46_header_t),
456 return ((u8 *) ip + ip4_header_bytes (ip) + l4_layer_length[ip->protocol] >
457 (u8 *) ip + clib_net_to_host_u16 (ip->length));
461 ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
462 vlib_frame_t *frame, bool is_feature,
463 bool is_output_feature, bool is_custom,
464 bool with_custom_context)
466 u32 *from = vlib_frame_vector_args (frame);
467 u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index;
468 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
469 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
471 if (with_custom_context)
472 context = vlib_frame_aux_args (frame);
474 clib_spinlock_lock (&rt->lock);
476 n_left_from = frame->n_vectors;
477 next_index = node->cached_next_index;
479 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
480 vlib_get_buffers (vm, from, bufs, n_left_from);
481 u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
484 /* optimistic case first - no fragments */
485 while (n_left_from >= 2)
487 vlib_buffer_t *b0, *b1;
494 /* Prefetch next iteration. */
495 if (PREDICT_TRUE (n_left_from >= 4))
497 vlib_buffer_t *p2, *p3;
502 vlib_prefetch_buffer_header (p2, LOAD);
503 vlib_prefetch_buffer_header (p3, LOAD);
505 clib_prefetch_load (p2->data);
506 clib_prefetch_load (p3->data);
510 (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0),
511 (is_output_feature ? 1 : 0) *
513 ip.save_rewrite_length);
515 (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b1),
516 (is_output_feature ? 1 : 0) *
518 ip.save_rewrite_length);
521 (ip4_get_fragment_more (ip0) || ip4_get_fragment_offset (ip0))
522 || (ip4_get_fragment_more (ip1) || ip4_get_fragment_offset (ip1)))
524 // fragment found, go slow path
528 vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
535 vnet_feature_next (&next0, b0);
539 next0 = is_custom ? vnet_buffer (b0)->ip.reass.next_index :
540 IP4_SV_REASSEMBLY_NEXT_INPUT;
542 vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
543 vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
544 if (l4_layer_truncated (ip0))
546 vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
547 vnet_buffer (b0)->ip.reass.l4_src_port = 0;
548 vnet_buffer (b0)->ip.reass.l4_dst_port = 0;
552 vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
553 if (IP_PROTOCOL_TCP == ip0->protocol)
555 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
556 ((tcp_header_t *) (ip0 + 1))->flags;
557 vnet_buffer (b0)->ip.reass.tcp_ack_number =
558 ((tcp_header_t *) (ip0 + 1))->ack_number;
559 vnet_buffer (b0)->ip.reass.tcp_seq_number =
560 ((tcp_header_t *) (ip0 + 1))->seq_number;
562 else if (IP_PROTOCOL_ICMP == ip0->protocol)
564 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
565 ((icmp46_header_t *) (ip0 + 1))->type;
567 vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
568 vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
570 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
572 ip4_sv_reass_add_trace (
573 vm, node, NULL, from[(b - 2) - bufs], REASS_PASSTHROUGH,
574 vnet_buffer (b0)->ip.reass.ip_proto,
575 vnet_buffer (b0)->ip.reass.l4_src_port,
576 vnet_buffer (b0)->ip.reass.l4_dst_port,
577 vnet_buffer (b0)->ip.reass.l4_layer_truncated);
581 vnet_feature_next (&next1, b1);
585 next1 = is_custom ? vnet_buffer (b1)->ip.reass.next_index :
586 IP4_SV_REASSEMBLY_NEXT_INPUT;
588 vnet_buffer (b1)->ip.reass.is_non_first_fragment = 0;
589 vnet_buffer (b1)->ip.reass.ip_proto = ip1->protocol;
590 if (l4_layer_truncated (ip1))
592 vnet_buffer (b1)->ip.reass.l4_layer_truncated = 1;
593 vnet_buffer (b1)->ip.reass.l4_src_port = 0;
594 vnet_buffer (b1)->ip.reass.l4_dst_port = 0;
598 vnet_buffer (b1)->ip.reass.l4_layer_truncated = 0;
599 if (IP_PROTOCOL_TCP == ip1->protocol)
601 vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
602 ((tcp_header_t *) (ip1 + 1))->flags;
603 vnet_buffer (b1)->ip.reass.tcp_ack_number =
604 ((tcp_header_t *) (ip1 + 1))->ack_number;
605 vnet_buffer (b1)->ip.reass.tcp_seq_number =
606 ((tcp_header_t *) (ip1 + 1))->seq_number;
608 else if (IP_PROTOCOL_ICMP == ip1->protocol)
610 vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
611 ((icmp46_header_t *) (ip1 + 1))->type;
613 vnet_buffer (b1)->ip.reass.l4_src_port = ip4_get_port (ip1, 1);
614 vnet_buffer (b1)->ip.reass.l4_dst_port = ip4_get_port (ip1, 0);
616 if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
618 ip4_sv_reass_add_trace (
619 vm, node, NULL, from[(b - 1) - bufs], REASS_PASSTHROUGH,
620 vnet_buffer (b1)->ip.reass.ip_proto,
621 vnet_buffer (b1)->ip.reass.l4_src_port,
622 vnet_buffer (b1)->ip.reass.l4_dst_port,
623 vnet_buffer (b1)->ip.reass.l4_layer_truncated);
630 if (with_custom_context)
634 while (n_left_from > 0)
642 (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0),
643 (is_output_feature ? 1 : 0) *
645 ip.save_rewrite_length);
647 (ip4_get_fragment_more (ip0) || ip4_get_fragment_offset (ip0)))
649 // fragment found, go slow path
653 vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
660 vnet_feature_next (&next0, b0);
665 is_custom ? vnet_buffer (b0)->ip.
666 reass.next_index : IP4_SV_REASSEMBLY_NEXT_INPUT;
668 vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
669 vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
670 if (l4_layer_truncated (ip0))
672 vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
676 vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
677 if (IP_PROTOCOL_TCP == ip0->protocol)
679 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
680 ((tcp_header_t *) (ip0 + 1))->flags;
681 vnet_buffer (b0)->ip.reass.tcp_ack_number =
682 ((tcp_header_t *) (ip0 + 1))->ack_number;
683 vnet_buffer (b0)->ip.reass.tcp_seq_number =
684 ((tcp_header_t *) (ip0 + 1))->seq_number;
686 else if (IP_PROTOCOL_ICMP == ip0->protocol)
688 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
689 ((icmp46_header_t *) (ip0 + 1))->type;
691 vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
692 vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
694 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
696 ip4_sv_reass_add_trace (
697 vm, node, NULL, from[(b - 1) - bufs], REASS_PASSTHROUGH,
698 vnet_buffer (b0)->ip.reass.ip_proto,
699 vnet_buffer (b0)->ip.reass.l4_src_port,
700 vnet_buffer (b0)->ip.reass.l4_dst_port,
701 vnet_buffer (b0)->ip.reass.l4_layer_truncated);
707 if (with_custom_context)
711 vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
720 while (n_left_from > 0)
722 if (with_custom_context)
723 vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next,
724 to_next_aux, n_left_to_next);
726 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
728 while (n_left_from > 0 && n_left_to_next > 0)
733 u32 error0 = IP4_ERROR_NONE;
734 u8 forward_context = 0;
737 b0 = vlib_get_buffer (vm, bi0);
740 (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0),
741 (is_output_feature ? 1 : 0) *
743 ip.save_rewrite_length);
744 if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
746 // this is a regular packet - no fragmentation
749 next0 = vnet_buffer (b0)->ip.reass.next_index;
753 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
755 vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
756 vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
757 if (l4_layer_truncated (ip0))
759 vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
760 vnet_buffer (b0)->ip.reass.l4_src_port = 0;
761 vnet_buffer (b0)->ip.reass.l4_dst_port = 0;
765 vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
766 if (IP_PROTOCOL_TCP == ip0->protocol)
768 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
769 ((tcp_header_t *) (ip0 + 1))->flags;
770 vnet_buffer (b0)->ip.reass.tcp_ack_number =
771 ((tcp_header_t *) (ip0 + 1))->ack_number;
772 vnet_buffer (b0)->ip.reass.tcp_seq_number =
773 ((tcp_header_t *) (ip0 + 1))->seq_number;
775 else if (IP_PROTOCOL_ICMP == ip0->protocol)
777 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
778 ((icmp46_header_t *) (ip0 + 1))->type;
780 vnet_buffer (b0)->ip.reass.l4_src_port =
781 ip4_get_port (ip0, 1);
782 vnet_buffer (b0)->ip.reass.l4_dst_port =
783 ip4_get_port (ip0, 0);
785 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
787 ip4_sv_reass_add_trace (
788 vm, node, NULL, bi0, REASS_PASSTHROUGH,
789 vnet_buffer (b0)->ip.reass.ip_proto,
790 vnet_buffer (b0)->ip.reass.l4_src_port,
791 vnet_buffer (b0)->ip.reass.l4_dst_port,
792 vnet_buffer (b0)->ip.reass.l4_layer_truncated);
796 const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
797 const u32 fragment_length =
798 clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
799 const u32 fragment_last = fragment_first + fragment_length - 1;
800 if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791
802 next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
803 error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
804 b0->error = node->errors[error0];
807 ip4_sv_reass_kv_t kv;
810 if (with_custom_context)
811 kv.k.as_u64[0] = (u64) *context | (u64) ip0->src_address.as_u32
815 (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
816 vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
817 (u64) ip0->src_address.as_u32 << 32;
818 kv.k.as_u64[1] = (u64) ip0->dst_address.as_u32 |
819 (u64) ip0->fragment_id << 32 |
820 (u64) ip0->protocol << 48;
822 ip4_sv_reass_t *reass =
823 ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
825 if (PREDICT_FALSE (do_handoff))
827 next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF;
828 vnet_buffer (b0)->ip.reass.owner_thread_index =
830 if (with_custom_context)
837 next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
838 error0 = IP4_ERROR_REASS_LIMIT_REACHED;
839 b0->error = node->errors[error0];
843 if (reass->is_complete)
847 next0 = vnet_buffer (b0)->ip.reass.next_index;
851 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
853 vnet_buffer (b0)->ip.reass.is_non_first_fragment =
855 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
856 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
857 reass->icmp_type_or_tcp_flags;
858 vnet_buffer (b0)->ip.reass.tcp_ack_number =
859 reass->tcp_ack_number;
860 vnet_buffer (b0)->ip.reass.tcp_seq_number =
861 reass->tcp_seq_number;
862 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
863 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
864 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
866 ip4_sv_reass_add_trace (
867 vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
868 reass->ip_proto, reass->l4_src_port, reass->l4_dst_port,
869 vnet_buffer (b0)->ip.reass.l4_layer_truncated);
874 ip4_sv_reass_rc_t rc =
875 ip4_sv_reass_update (vm, node, rm, ip0, reass, bi0);
879 case IP4_SV_REASS_RC_OK:
880 /* nothing to do here */
882 case IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS:
883 counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
885 case IP4_SV_REASS_RC_UNSUPP_IP_PROTO:
886 counter = IP4_ERROR_REASS_UNSUPP_IP_PROT;
891 vlib_node_increment_counter (vm, node->node_index, counter, 1);
892 ip4_sv_reass_free (vm, rm, rt, reass);
895 if (reass->is_complete)
898 vec_foreach_index (idx, reass->cached_buffers)
900 u32 bi0 = vec_elt (reass->cached_buffers, idx);
901 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
903 (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0),
904 (is_output_feature ? 1 : 0) *
906 ip.save_rewrite_length);
907 u32 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
910 vnet_feature_next (&next0, b0);
914 next0 = vnet_buffer (b0)->ip.reass.next_index;
916 if (0 == n_left_to_next)
918 vlib_put_next_frame (vm, node, next_index,
920 vlib_get_next_frame (vm, node, next_index, to_next,
926 vnet_buffer (b0)->ip.reass.is_non_first_fragment =
927 ! !ip4_get_fragment_offset (ip0);
928 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
929 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
930 reass->icmp_type_or_tcp_flags;
931 vnet_buffer (b0)->ip.reass.tcp_ack_number =
932 reass->tcp_ack_number;
933 vnet_buffer (b0)->ip.reass.tcp_seq_number =
934 reass->tcp_seq_number;
935 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
936 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
937 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
939 ip4_sv_reass_add_trace (
940 vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
941 reass->ip_proto, reass->l4_src_port, reass->l4_dst_port,
942 vnet_buffer (b0)->ip.reass.l4_layer_truncated);
944 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
945 to_next, n_left_to_next, bi0,
948 vec_set_len (reass->cached_buffers,
949 0); // buffers are owned by frame now
957 if (is_feature && IP4_ERROR_NONE == error0)
959 b0 = vlib_get_buffer (vm, bi0);
960 vnet_feature_next (&next0, b0);
962 if (with_custom_context && forward_context)
966 to_next_aux[0] = *context;
969 vlib_validate_buffer_enqueue_with_aux_x1 (
970 vm, node, next_index, to_next, to_next_aux, n_left_to_next,
971 bi0, *context, next0);
974 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
975 n_left_to_next, bi0, next0);
980 if (with_custom_context)
984 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
988 clib_spinlock_unlock (&rt->lock);
989 return frame->n_vectors;
992 VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm,
993 vlib_node_runtime_t * node,
994 vlib_frame_t * frame)
996 return ip4_sv_reass_inline (
997 vm, node, frame, false /* is_feature */, false /* is_output_feature */,
998 false /* is_custom */, false /* with_custom_context */);
1002 VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
1003 .name = "ip4-sv-reassembly",
1004 .vector_size = sizeof (u32),
1005 .format_trace = format_ip4_sv_reass_trace,
1006 .n_errors = IP4_N_ERROR,
1007 .error_counters = ip4_error_counters,
1008 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
1011 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
1012 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
1013 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-handoff",
1019 VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm,
1020 vlib_node_runtime_t * node,
1021 vlib_frame_t * frame)
1023 return ip4_sv_reass_inline (
1024 vm, node, frame, true /* is_feature */, false /* is_output_feature */,
1025 false /* is_custom */, false /* with_custom_context */);
1029 VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
1030 .name = "ip4-sv-reassembly-feature",
1031 .vector_size = sizeof (u32),
1032 .format_trace = format_ip4_sv_reass_trace,
1033 .n_errors = IP4_N_ERROR,
1034 .error_counters = ip4_error_counters,
1035 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
1038 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
1039 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
1040 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
1046 VNET_FEATURE_INIT (ip4_sv_reass_feature) = {
1047 .arc_name = "ip4-unicast",
1048 .node_name = "ip4-sv-reassembly-feature",
1049 .runs_before = VNET_FEATURES ("ip4-lookup"),
1054 VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm,
1055 vlib_node_runtime_t * node,
1056 vlib_frame_t * frame)
1058 return ip4_sv_reass_inline (
1059 vm, node, frame, true /* is_feature */, true /* is_output_feature */,
1060 false /* is_custom */, false /* with_custom_context */);
1065 VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = {
1066 .name = "ip4-sv-reassembly-output-feature",
1067 .vector_size = sizeof (u32),
1068 .format_trace = format_ip4_sv_reass_trace,
1069 .n_errors = IP4_N_ERROR,
1070 .error_counters = ip4_error_counters,
1071 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
1074 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
1075 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
1076 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
1082 VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = {
1083 .arc_name = "ip4-output",
1084 .node_name = "ip4-sv-reassembly-output-feature",
1090 VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
1091 .name = "ip4-sv-reassembly-custom-next",
1092 .vector_size = sizeof (u32),
1093 .format_trace = format_ip4_sv_reass_trace,
1094 .n_errors = IP4_N_ERROR,
1095 .error_counters = ip4_error_counters,
1096 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
1099 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
1100 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
1101 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-handoff",
1106 VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm,
1107 vlib_node_runtime_t * node,
1108 vlib_frame_t * frame)
1110 return ip4_sv_reass_inline (
1111 vm, node, frame, false /* is_feature */, false /* is_output_feature */,
1112 true /* is_custom */, false /* with_custom_context */);
1115 VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = {
1116 .name = "ip4-sv-reassembly-custom-context",
1117 .vector_size = sizeof (u32),
1118 .aux_size = sizeof(u32),
1119 .format_trace = format_ip4_sv_reass_trace,
1120 .n_errors = IP4_N_ERROR,
1121 .error_counters = ip4_error_counters,
1122 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
1125 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
1126 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
1127 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-custom-context-handoff",
1132 VLIB_NODE_FN (ip4_sv_reass_custom_context_node)
1133 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1135 return ip4_sv_reass_inline (
1136 vm, node, frame, false /* is_feature */, false /* is_output_feature */,
1137 true /* is_custom */, true /* with_custom_context */);
1140 #ifndef CLIB_MARCH_VARIANT
1142 ip4_sv_reass_get_nbuckets ()
1144 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1148 nbuckets = (u32) (rm->max_reass_n / IP4_SV_REASS_HT_LOAD_FACTOR);
1150 for (i = 0; i < 31; i++)
1151 if ((1 << i) >= nbuckets)
1157 #endif /* CLIB_MARCH_VARIANT */
1161 IP4_EVENT_CONFIG_CHANGED = 1,
1162 } ip4_sv_reass_event_t;
1167 clib_bihash_16_8_t *new_hash;
1168 } ip4_rehash_cb_ctx;
1170 #ifndef CLIB_MARCH_VARIANT
1172 ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx)
1174 ip4_rehash_cb_ctx *ctx = _ctx;
1175 if (clib_bihash_add_del_16_8 (ctx->new_hash, kv, 1))
1179 return (BIHASH_WALK_CONTINUE);
1183 ip4_sv_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
1184 u32 max_reassembly_length,
1185 u32 expire_walk_interval_ms)
1187 ip4_sv_reass_main.timeout_ms = timeout_ms;
1188 ip4_sv_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
1189 ip4_sv_reass_main.max_reass_n = max_reassemblies;
1190 ip4_sv_reass_main.max_reass_len = max_reassembly_length;
1191 ip4_sv_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
1195 ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
1196 u32 max_reassembly_length, u32 expire_walk_interval_ms)
1198 u32 old_nbuckets = ip4_sv_reass_get_nbuckets ();
1199 ip4_sv_reass_set_params (timeout_ms, max_reassemblies,
1200 max_reassembly_length, expire_walk_interval_ms);
1201 vlib_process_signal_event (ip4_sv_reass_main.vlib_main,
1202 ip4_sv_reass_main.ip4_sv_reass_expire_node_idx,
1203 IP4_EVENT_CONFIG_CHANGED, 0);
1204 u32 new_nbuckets = ip4_sv_reass_get_nbuckets ();
1205 if (ip4_sv_reass_main.max_reass_n > 0 && new_nbuckets > old_nbuckets)
1207 clib_bihash_16_8_t new_hash;
1208 clib_memset (&new_hash, 0, sizeof (new_hash));
1209 ip4_rehash_cb_ctx ctx;
1211 ctx.new_hash = &new_hash;
1212 clib_bihash_init_16_8 (&new_hash, "ip4-dr", new_nbuckets,
1213 new_nbuckets * 1024);
1214 clib_bihash_foreach_key_value_pair_16_8 (&ip4_sv_reass_main.hash,
1215 ip4_rehash_cb, &ctx);
1218 clib_bihash_free_16_8 (&new_hash);
1223 clib_bihash_free_16_8 (&ip4_sv_reass_main.hash);
1224 clib_memcpy_fast (&ip4_sv_reass_main.hash, &new_hash,
1225 sizeof (ip4_sv_reass_main.hash));
1226 clib_bihash_copied (&ip4_sv_reass_main.hash, &new_hash);
1233 ip4_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
1234 u32 * max_reassembly_length, u32 * expire_walk_interval_ms)
1236 *timeout_ms = ip4_sv_reass_main.timeout_ms;
1237 *max_reassemblies = ip4_sv_reass_main.max_reass_n;
1238 *max_reassembly_length = ip4_sv_reass_main.max_reass_len;
1239 *expire_walk_interval_ms = ip4_sv_reass_main.expire_walk_interval_ms;
1243 static clib_error_t *
1244 ip4_sv_reass_init_function (vlib_main_t * vm)
1246 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1247 clib_error_t *error = 0;
1252 rm->vnet_main = vnet_get_main ();
1254 vec_validate (rm->per_thread_data, vlib_num_workers ());
1255 ip4_sv_reass_per_thread_t *rt;
1256 vec_foreach (rt, rm->per_thread_data)
1258 clib_spinlock_init (&rt->lock);
1259 pool_alloc (rt->pool, rm->max_reass_n);
1260 rt->lru_first = rt->lru_last = ~0;
1263 node = vlib_get_node_by_name (vm, (u8 *) "ip4-sv-reassembly-expire-walk");
1265 rm->ip4_sv_reass_expire_node_idx = node->index;
1267 ip4_sv_reass_set_params (IP4_SV_REASS_TIMEOUT_DEFAULT_MS,
1268 IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT,
1269 IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
1270 IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
1272 nbuckets = ip4_sv_reass_get_nbuckets ();
1273 clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024);
1275 node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop");
1277 rm->ip4_drop_idx = node->index;
1279 rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0);
1280 rm->fq_feature_index =
1281 vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0);
1282 rm->fq_custom_context_index =
1283 vlib_frame_queue_main_init (ip4_sv_reass_custom_context_node.index, 0);
1285 rm->feature_use_refcount_per_intf = NULL;
1286 rm->output_feature_use_refcount_per_intf = NULL;
1291 VLIB_INIT_FUNCTION (ip4_sv_reass_init_function);
1292 #endif /* CLIB_MARCH_VARIANT */
1295 ip4_sv_reass_walk_expired (vlib_main_t *vm,
1296 CLIB_UNUSED (vlib_node_runtime_t *node),
1297 CLIB_UNUSED (vlib_frame_t *f))
1299 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1300 uword event_type, *event_data = 0;
1304 vlib_process_wait_for_event_or_clock (vm,
1306 rm->expire_walk_interval_ms /
1307 (f64) MSEC_PER_SEC);
1308 event_type = vlib_process_get_events (vm, &event_data);
1313 /* no events => timeout */
1315 case IP4_EVENT_CONFIG_CHANGED:
1316 /* nothing to do here */
1319 clib_warning ("BUG: event type 0x%wx", event_type);
1322 f64 now = vlib_time_now (vm);
1324 ip4_sv_reass_t *reass;
1325 int *pool_indexes_to_free = NULL;
1327 uword thread_index = 0;
1329 const uword nthreads = vlib_num_workers () + 1;
1330 for (thread_index = 0; thread_index < nthreads; ++thread_index)
1332 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
1333 clib_spinlock_lock (&rt->lock);
1335 vec_reset_length (pool_indexes_to_free);
1337 pool_foreach_index (index, rt->pool) {
1338 reass = pool_elt_at_index (rt->pool, index);
1339 if (now > reass->last_heard + rm->timeout)
1341 vec_add1 (pool_indexes_to_free, index);
1347 vec_foreach (i, pool_indexes_to_free)
1349 ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
1350 ip4_sv_reass_free (vm, rm, rt, reass);
1354 clib_spinlock_unlock (&rt->lock);
1357 vec_free (pool_indexes_to_free);
1360 vec_set_len (event_data, 0);
1368 VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = {
1369 .function = ip4_sv_reass_walk_expired,
1370 .type = VLIB_NODE_TYPE_PROCESS,
1371 .name = "ip4-sv-reassembly-expire-walk",
1372 .format_trace = format_ip4_sv_reass_trace,
1373 .n_errors = IP4_N_ERROR,
1374 .error_counters = ip4_error_counters,
1379 format_ip4_sv_reass_key (u8 * s, va_list * args)
1381 ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *);
1383 format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
1384 key->fib_index, format_ip4_address, &key->src, format_ip4_address,
1385 &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
1390 format_ip4_sv_reass (u8 * s, va_list * args)
1392 vlib_main_t *vm = va_arg (*args, vlib_main_t *);
1393 ip4_sv_reass_t *reass = va_arg (*args, ip4_sv_reass_t *);
1395 s = format (s, "ID: %lu, key: %U trace_op_counter: %u\n",
1396 reass->id, format_ip4_sv_reass_key, &reass->key,
1397 reass->trace_op_counter);
1402 vec_foreach (bip, reass->cached_buffers)
1407 b = vlib_get_buffer (vm, bi);
1408 s = format (s, " #%03u: bi: %u, ", counter, bi);
1410 bi = b->next_buffer;
1412 while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
1417 static clib_error_t *
1418 show_ip4_reass (vlib_main_t * vm,
1419 unformat_input_t * input,
1420 CLIB_UNUSED (vlib_cli_command_t * lmd))
1422 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1424 vlib_cli_output (vm, "---------------------");
1425 vlib_cli_output (vm, "IP4 reassembly status");
1426 vlib_cli_output (vm, "---------------------");
1427 bool details = false;
1428 if (unformat (input, "details"))
1433 u32 sum_reass_n = 0;
1434 ip4_sv_reass_t *reass;
1436 const uword nthreads = vlib_num_workers () + 1;
1437 for (thread_index = 0; thread_index < nthreads; ++thread_index)
1439 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
1440 clib_spinlock_lock (&rt->lock);
1444 pool_foreach (reass, rt->pool) {
1445 vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass);
1449 sum_reass_n += rt->reass_n;
1450 clib_spinlock_unlock (&rt->lock);
1452 vlib_cli_output (vm, "---------------------");
1453 vlib_cli_output (vm, "Current IP4 reassemblies count: %lu\n",
1454 (long unsigned) sum_reass_n);
1455 vlib_cli_output (vm,
1456 "Maximum configured concurrent shallow virtual IP4 reassemblies per worker-thread: %lu\n",
1457 (long unsigned) rm->max_reass_n);
1458 vlib_cli_output (vm,
1459 "Maximum configured amount of fragments per shallow "
1460 "virtual IP4 reassembly: %lu\n",
1461 (long unsigned) rm->max_reass_len);
1462 vlib_cli_output (vm,
1463 "Maximum configured shallow virtual IP4 reassembly timeout: %lums\n",
1464 (long unsigned) rm->timeout_ms);
1465 vlib_cli_output (vm,
1466 "Maximum configured shallow virtual IP4 reassembly expire walk interval: %lums\n",
1467 (long unsigned) rm->expire_walk_interval_ms);
1472 VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = {
1473 .path = "show ip4-sv-reassembly",
1474 .short_help = "show ip4-sv-reassembly [details]",
1475 .function = show_ip4_reass,
1479 #ifndef CLIB_MARCH_VARIANT
1481 ip4_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
1483 return ip4_sv_reass_enable_disable_with_refcnt (sw_if_index,
1486 #endif /* CLIB_MARCH_VARIANT */
1489 #define foreach_ip4_sv_reass_handoff_error \
1490 _(CONGESTION_DROP, "congestion drop")
1495 #define _(sym,str) IP4_SV_REASSEMBLY_HANDOFF_ERROR_##sym,
1496 foreach_ip4_sv_reass_handoff_error
1498 IP4_SV_REASSEMBLY_HANDOFF_N_ERROR,
1499 } ip4_sv_reass_handoff_error_t;
1501 static char *ip4_sv_reass_handoff_error_strings[] = {
1502 #define _(sym,string) string,
1503 foreach_ip4_sv_reass_handoff_error
1509 u32 next_worker_index;
1510 } ip4_sv_reass_handoff_trace_t;
1513 format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args)
1515 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1516 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1517 ip4_sv_reass_handoff_trace_t *t =
1518 va_arg (*args, ip4_sv_reass_handoff_trace_t *);
1521 format (s, "ip4-sv-reassembly-handoff: next-worker %d",
1522 t->next_worker_index);
1528 ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1529 vlib_frame_t *frame, bool is_feature,
1530 bool is_custom_context)
1532 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1534 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1535 u32 n_enq, n_left_from, *from, *context;
1536 u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1539 from = vlib_frame_vector_args (frame);
1540 if (is_custom_context)
1541 context = vlib_frame_aux_args (frame);
1543 n_left_from = frame->n_vectors;
1544 vlib_get_buffers (vm, from, bufs, n_left_from);
1547 ti = thread_indices;
1549 fq_index = (is_feature) ? rm->fq_feature_index :
1550 (is_custom_context ? rm->fq_custom_context_index :
1553 while (n_left_from > 0)
1555 ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
1558 ((node->flags & VLIB_NODE_FLAG_TRACE)
1559 && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1561 ip4_sv_reass_handoff_trace_t *t =
1562 vlib_add_trace (vm, node, b[0], sizeof (*t));
1563 t->next_worker_index = ti[0];
1570 if (is_custom_context)
1571 n_enq = vlib_buffer_enqueue_to_thread_with_aux (
1572 vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1);
1574 n_enq = vlib_buffer_enqueue_to_thread (
1575 vm, node, fq_index, from, thread_indices, frame->n_vectors, 1);
1577 if (n_enq < frame->n_vectors)
1578 vlib_node_increment_counter (vm, node->node_index,
1579 IP4_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
1580 frame->n_vectors - n_enq);
1581 return frame->n_vectors;
1584 VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm,
1585 vlib_node_runtime_t * node,
1586 vlib_frame_t * frame)
1588 return ip4_sv_reass_handoff_node_inline (
1589 vm, node, frame, false /* is_feature */, false /* is_custom_context */);
1594 VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
1595 .name = "ip4-sv-reassembly-handoff",
1596 .vector_size = sizeof (u32),
1597 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1598 .error_strings = ip4_sv_reass_handoff_error_strings,
1599 .format_trace = format_ip4_sv_reass_handoff_trace,
1609 VLIB_NODE_FN (ip4_sv_reass_custom_context_handoff_node)
1610 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1612 return ip4_sv_reass_handoff_node_inline (
1613 vm, node, frame, false /* is_feature */, true /* is_custom_context */);
1616 VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = {
1617 .name = "ip4-sv-reassembly-custom-context-handoff",
1618 .vector_size = sizeof (u32),
1619 .aux_size = sizeof (u32),
1620 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1621 .error_strings = ip4_sv_reass_handoff_error_strings,
1622 .format_trace = format_ip4_sv_reass_handoff_trace,
1632 VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm,
1633 vlib_node_runtime_t *
1635 vlib_frame_t * frame)
1637 return ip4_sv_reass_handoff_node_inline (
1638 vm, node, frame, true /* is_feature */, false /* is_custom_context */);
1644 VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
1645 .name = "ip4-sv-reass-feature-hoff",
1646 .vector_size = sizeof (u32),
1647 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1648 .error_strings = ip4_sv_reass_handoff_error_strings,
1649 .format_trace = format_ip4_sv_reass_handoff_trace,
1659 #ifndef CLIB_MARCH_VARIANT
1661 ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
1663 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1664 vec_validate (rm->feature_use_refcount_per_intf, sw_if_index);
1667 if (!rm->feature_use_refcount_per_intf[sw_if_index])
1669 ++rm->feature_use_refcount_per_intf[sw_if_index];
1670 return vnet_feature_enable_disable ("ip4-unicast",
1671 "ip4-sv-reassembly-feature",
1672 sw_if_index, 1, 0, 0);
1674 ++rm->feature_use_refcount_per_intf[sw_if_index];
1678 if (rm->feature_use_refcount_per_intf[sw_if_index])
1679 --rm->feature_use_refcount_per_intf[sw_if_index];
1680 if (!rm->feature_use_refcount_per_intf[sw_if_index])
1681 return vnet_feature_enable_disable ("ip4-unicast",
1682 "ip4-sv-reassembly-feature",
1683 sw_if_index, 0, 0, 0);
1689 ip4_sv_reass_custom_register_next_node (uword node_index)
1691 return vlib_node_add_next (vlib_get_main (), ip4_sv_reass_custom_node.index,
1696 ip4_sv_reass_custom_context_register_next_node (uword node_index)
1698 return vlib_node_add_next (
1699 vlib_get_main (), ip4_sv_reass_custom_context_node.index, node_index);
1703 ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
1706 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1707 vec_validate (rm->output_feature_use_refcount_per_intf, sw_if_index);
1710 if (!rm->output_feature_use_refcount_per_intf[sw_if_index])
1712 ++rm->output_feature_use_refcount_per_intf[sw_if_index];
1713 return vnet_feature_enable_disable ("ip4-output",
1714 "ip4-sv-reassembly-output-feature",
1715 sw_if_index, 1, 0, 0);
1717 ++rm->output_feature_use_refcount_per_intf[sw_if_index];
1721 if (rm->output_feature_use_refcount_per_intf[sw_if_index])
1722 --rm->output_feature_use_refcount_per_intf[sw_if_index];
1723 if (!rm->output_feature_use_refcount_per_intf[sw_if_index])
1724 return vnet_feature_enable_disable ("ip4-output",
1725 "ip4-sv-reassembly-output-feature",
1726 sw_if_index, 0, 0, 0);
1733 * fd.io coding-style-patch-verification: ON
1736 * eval: (c-set-style "gnu")