2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 * @brief NAT plugin virtual fragmentation reassembly
20 #include <vnet/vnet.h>
21 #include <nat/nat_reass.h>
22 #include <nat/nat_ipfix_logging.h>
24 nat_reass_main_t nat_reass_main;
27 nat_reass_get_nbuckets (u8 is_ip6)
29 nat_reass_main_t *srm = &nat_reass_main;
34 nbuckets = (u32) (srm->ip6_max_reass / NAT_REASS_HT_LOAD_FACTOR);
36 nbuckets = (u32) (srm->ip4_max_reass / NAT_REASS_HT_LOAD_FACTOR);
38 for (i = 0; i < 31; i++)
39 if ((1 << i) >= nbuckets)
46 static_always_inline void
47 nat_ip4_reass_get_frags_inline (nat_reass_ip4_t * reass, u32 ** bi)
49 nat_reass_main_t *srm = &nat_reass_main;
54 clib_dlist_remove_head (srm->ip4_frags_list_pool,
55 reass->frags_per_reass_list_head_index)) !=
58 elt = pool_elt_at_index (srm->ip4_frags_list_pool, elt_index);
59 vec_add1 (*bi, elt->value);
61 pool_put_index (srm->ip4_frags_list_pool, elt_index);
65 static_always_inline void
66 nat_ip6_reass_get_frags_inline (nat_reass_ip6_t * reass, u32 ** bi)
68 nat_reass_main_t *srm = &nat_reass_main;
73 clib_dlist_remove_head (srm->ip6_frags_list_pool,
74 reass->frags_per_reass_list_head_index)) !=
77 elt = pool_elt_at_index (srm->ip6_frags_list_pool, elt_index);
78 vec_add1 (*bi, elt->value);
80 pool_put_index (srm->ip6_frags_list_pool, elt_index);
85 nat_reass_set (u32 timeout, u16 max_reass, u8 max_frag, u8 drop_frag,
88 nat_reass_main_t *srm = &nat_reass_main;
93 if (srm->ip6_max_reass != max_reass)
95 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
97 srm->ip6_max_reass = max_reass;
98 pool_free (srm->ip6_reass_pool);
99 pool_alloc (srm->ip6_reass_pool, srm->ip4_max_reass);
100 nbuckets = nat_reass_get_nbuckets (0);
101 clib_bihash_free_48_8 (&srm->ip6_reass_hash);
102 clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass",
103 nbuckets, nbuckets * 1024);
105 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
107 srm->ip6_timeout = timeout;
108 srm->ip6_max_frag = max_frag;
109 srm->ip6_drop_frag = drop_frag;
113 if (srm->ip4_max_reass != max_reass)
115 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
117 srm->ip4_max_reass = max_reass;
118 pool_free (srm->ip4_reass_pool);
119 pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
120 nbuckets = nat_reass_get_nbuckets (0);
121 clib_bihash_free_16_8 (&srm->ip4_reass_hash);
122 clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass",
123 nbuckets, nbuckets * 1024);
124 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
126 srm->ip4_timeout = timeout;
127 srm->ip4_max_frag = max_frag;
128 srm->ip4_drop_frag = drop_frag;
135 nat_reass_get_timeout (u8 is_ip6)
137 nat_reass_main_t *srm = &nat_reass_main;
140 return srm->ip6_timeout;
142 return srm->ip4_timeout;
146 nat_reass_get_max_reass (u8 is_ip6)
148 nat_reass_main_t *srm = &nat_reass_main;
151 return srm->ip6_max_reass;
153 return srm->ip4_max_reass;
157 nat_reass_get_max_frag (u8 is_ip6)
159 nat_reass_main_t *srm = &nat_reass_main;
162 return srm->ip6_max_frag;
164 return srm->ip4_max_frag;
168 nat_reass_is_drop_frag (u8 is_ip6)
170 nat_reass_main_t *srm = &nat_reass_main;
173 return srm->ip6_drop_frag;
175 return srm->ip4_drop_frag;
178 static_always_inline nat_reass_ip4_t *
179 nat_ip4_reass_lookup (nat_reass_ip4_key_t * k, f64 now)
181 nat_reass_main_t *srm = &nat_reass_main;
182 clib_bihash_kv_16_8_t kv, value;
183 nat_reass_ip4_t *reass;
185 kv.key[0] = k->as_u64[0];
186 kv.key[1] = k->as_u64[1];
188 if (clib_bihash_search_16_8 (&srm->ip4_reass_hash, &kv, &value))
191 reass = pool_elt_at_index (srm->ip4_reass_pool, value.value);
192 if (now < reass->last_heard + (f64) srm->ip4_timeout)
199 nat_ip4_reass_find (ip4_address_t src, ip4_address_t dst, u16 frag_id,
202 nat_reass_main_t *srm = &nat_reass_main;
203 nat_reass_ip4_t *reass = 0;
204 nat_reass_ip4_key_t k;
205 f64 now = vlib_time_now (srm->vlib_main);
207 k.src.as_u32 = src.as_u32;
208 k.dst.as_u32 = dst.as_u32;
212 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
213 reass = nat_ip4_reass_lookup (&k, now);
214 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
220 nat_ip4_reass_find_or_create (ip4_address_t src, ip4_address_t dst,
221 u16 frag_id, u8 proto, u8 reset_timeout,
224 nat_reass_main_t *srm = &nat_reass_main;
225 nat_reass_ip4_t *reass = 0;
226 nat_reass_ip4_key_t k;
227 f64 now = vlib_time_now (srm->vlib_main);
228 dlist_elt_t *oldest_elt, *elt;
229 dlist_elt_t *per_reass_list_head_elt;
230 u32 oldest_index, elt_index;
231 clib_bihash_kv_16_8_t kv, value;
233 k.src.as_u32 = src.as_u32;
234 k.dst.as_u32 = dst.as_u32;
238 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
240 reass = nat_ip4_reass_lookup (&k, now);
245 reass->last_heard = now;
246 clib_dlist_remove (srm->ip4_reass_lru_list_pool,
247 reass->lru_list_index);
248 clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
249 srm->ip4_reass_head_index,
250 reass->lru_list_index);
255 if (srm->ip4_reass_n >= srm->ip4_max_reass)
258 clib_dlist_remove_head (srm->ip4_reass_lru_list_pool,
259 srm->ip4_reass_head_index);
260 ASSERT (oldest_index != ~0);
262 pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
263 reass = pool_elt_at_index (srm->ip4_reass_pool, oldest_elt->value);
264 if (now < reass->last_heard + (f64) srm->ip4_timeout)
266 clib_dlist_addhead (srm->ip4_reass_lru_list_pool,
267 srm->ip4_reass_head_index, oldest_index);
268 clib_warning ("no free resassembly slot");
273 clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
274 srm->ip4_reass_head_index, oldest_index);
276 kv.key[0] = reass->key.as_u64[0];
277 kv.key[1] = reass->key.as_u64[1];
278 if (!clib_bihash_search_16_8 (&srm->ip4_reass_hash, &kv, &value))
280 if (value.value == (reass - srm->ip4_reass_pool))
282 if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 0))
290 nat_ip4_reass_get_frags_inline (reass, bi_to_drop);
294 pool_get (srm->ip4_reass_pool, reass);
295 pool_get (srm->ip4_reass_lru_list_pool, elt);
296 reass->lru_list_index = elt_index = elt - srm->ip4_reass_lru_list_pool;
297 clib_dlist_init (srm->ip4_reass_lru_list_pool, elt_index);
298 elt->value = reass - srm->ip4_reass_pool;
299 clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
300 srm->ip4_reass_head_index, elt_index);
301 pool_get (srm->ip4_frags_list_pool, per_reass_list_head_elt);
302 reass->frags_per_reass_list_head_index =
303 per_reass_list_head_elt - srm->ip4_frags_list_pool;
304 clib_dlist_init (srm->ip4_frags_list_pool,
305 reass->frags_per_reass_list_head_index);
309 reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
310 reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
311 kv.value = reass - srm->ip4_reass_pool;
312 reass->sess_index = (u32) ~ 0;
313 reass->thread_index = (u32) ~ 0;
314 reass->last_heard = now;
317 if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 1))
324 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
329 nat_ip4_reass_add_fragment (nat_reass_ip4_t * reass, u32 bi)
331 nat_reass_main_t *srm = &nat_reass_main;
335 if (reass->frag_n >= srm->ip4_max_frag)
337 nat_ipfix_logging_max_fragments_ip4 (srm->ip4_max_frag,
342 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
344 pool_get (srm->ip4_frags_list_pool, elt);
345 elt_index = elt - srm->ip4_frags_list_pool;
346 clib_dlist_init (srm->ip4_frags_list_pool, elt_index);
348 clib_dlist_addtail (srm->ip4_frags_list_pool,
349 reass->frags_per_reass_list_head_index, elt_index);
352 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
358 nat_ip4_reass_get_frags (nat_reass_ip4_t * reass, u32 ** bi)
360 nat_reass_main_t *srm = &nat_reass_main;
362 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
364 nat_ip4_reass_get_frags_inline (reass, bi);
366 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
370 nat_ip4_reass_walk (nat_ip4_reass_walk_fn_t fn, void *ctx)
372 nat_reass_ip4_t *reass;
373 nat_reass_main_t *srm = &nat_reass_main;
374 f64 now = vlib_time_now (srm->vlib_main);
377 pool_foreach (reass, srm->ip4_reass_pool,
379 if (now < reass->last_heard + (f64) srm->ip4_timeout)
388 static_always_inline nat_reass_ip6_t *
389 nat_ip6_reass_lookup (nat_reass_ip6_key_t * k, f64 now)
391 nat_reass_main_t *srm = &nat_reass_main;
392 clib_bihash_kv_48_8_t kv, value;
393 nat_reass_ip6_t *reass;
396 kv.key[0] = k->as_u64[0];
397 kv.key[1] = k->as_u64[1];
398 kv.key[2] = k->as_u64[2];
399 kv.key[3] = k->as_u64[3];
400 kv.key[4] = k->as_u64[4];
401 kv.key[5] = k->as_u64[5];
403 if (clib_bihash_search_48_8 (&srm->ip6_reass_hash, &kv, &value))
406 reass = pool_elt_at_index (srm->ip6_reass_pool, value.value);
407 if (now < reass->last_heard + (f64) srm->ip6_timeout)
414 nat_ip6_reass_find_or_create (ip6_address_t src, ip6_address_t dst,
415 u32 frag_id, u8 proto, u8 reset_timeout,
418 nat_reass_main_t *srm = &nat_reass_main;
419 nat_reass_ip6_t *reass = 0;
420 nat_reass_ip6_key_t k;
421 f64 now = vlib_time_now (srm->vlib_main);
422 dlist_elt_t *oldest_elt, *elt;
423 dlist_elt_t *per_reass_list_head_elt;
424 u32 oldest_index, elt_index;
425 clib_bihash_kv_48_8_t kv;
427 k.src.as_u64[0] = src.as_u64[0];
428 k.src.as_u64[1] = src.as_u64[1];
429 k.dst.as_u64[0] = dst.as_u64[0];
430 k.dst.as_u64[1] = dst.as_u64[1];
435 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
437 reass = nat_ip6_reass_lookup (&k, now);
442 reass->last_heard = now;
443 clib_dlist_remove (srm->ip6_reass_lru_list_pool,
444 reass->lru_list_index);
445 clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
446 srm->ip6_reass_head_index,
447 reass->lru_list_index);
452 if (srm->ip6_reass_n >= srm->ip6_max_reass)
455 clib_dlist_remove_head (srm->ip6_reass_lru_list_pool,
456 srm->ip6_reass_head_index);
457 ASSERT (oldest_index != ~0);
459 pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
460 reass = pool_elt_at_index (srm->ip6_reass_pool, oldest_elt->value);
461 if (now < reass->last_heard + (f64) srm->ip6_timeout)
463 clib_dlist_addhead (srm->ip6_reass_lru_list_pool,
464 srm->ip6_reass_head_index, oldest_index);
465 clib_warning ("no free resassembly slot");
470 clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
471 srm->ip6_reass_head_index, oldest_index);
473 kv.key[0] = k.as_u64[0];
474 kv.key[1] = k.as_u64[1];
475 kv.key[2] = k.as_u64[2];
476 kv.key[3] = k.as_u64[4];
477 kv.key[4] = k.as_u64[5];
478 if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 0))
484 nat_ip6_reass_get_frags_inline (reass, bi_to_drop);
488 pool_get (srm->ip6_reass_pool, reass);
489 pool_get (srm->ip6_reass_lru_list_pool, elt);
490 reass->lru_list_index = elt_index = elt - srm->ip6_reass_lru_list_pool;
491 clib_dlist_init (srm->ip6_reass_lru_list_pool, elt_index);
492 elt->value = reass - srm->ip6_reass_pool;
493 clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
494 srm->ip6_reass_head_index, elt_index);
495 pool_get (srm->ip6_frags_list_pool, per_reass_list_head_elt);
496 reass->frags_per_reass_list_head_index =
497 per_reass_list_head_elt - srm->ip6_frags_list_pool;
498 clib_dlist_init (srm->ip6_frags_list_pool,
499 reass->frags_per_reass_list_head_index);
503 reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
504 reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
505 reass->key.as_u64[2] = kv.key[2] = k.as_u64[2];
506 reass->key.as_u64[3] = kv.key[3] = k.as_u64[3];
507 reass->key.as_u64[4] = kv.key[4] = k.as_u64[4];
508 reass->key.as_u64[5] = kv.key[5] = k.as_u64[5];
509 kv.value = reass - srm->ip6_reass_pool;
510 reass->sess_index = (u32) ~ 0;
511 reass->last_heard = now;
513 if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 1))
520 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
525 nat_ip6_reass_add_fragment (nat_reass_ip6_t * reass, u32 bi)
527 nat_reass_main_t *srm = &nat_reass_main;
531 if (reass->frag_n >= srm->ip6_max_frag)
533 nat_ipfix_logging_max_fragments_ip6 (srm->ip6_max_frag,
538 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
540 pool_get (srm->ip6_frags_list_pool, elt);
541 elt_index = elt - srm->ip6_frags_list_pool;
542 clib_dlist_init (srm->ip6_frags_list_pool, elt_index);
544 clib_dlist_addtail (srm->ip6_frags_list_pool,
545 reass->frags_per_reass_list_head_index, elt_index);
548 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
554 nat_ip6_reass_get_frags (nat_reass_ip6_t * reass, u32 ** bi)
556 nat_reass_main_t *srm = &nat_reass_main;
558 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
560 nat_ip6_reass_get_frags_inline (reass, bi);
562 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
566 nat_ip6_reass_walk (nat_ip6_reass_walk_fn_t fn, void *ctx)
568 nat_reass_ip6_t *reass;
569 nat_reass_main_t *srm = &nat_reass_main;
570 f64 now = vlib_time_now (srm->vlib_main);
573 pool_foreach (reass, srm->ip6_reass_pool,
575 if (now < reass->last_heard + (f64) srm->ip4_timeout)
585 nat_reass_init (vlib_main_t * vm)
587 nat_reass_main_t *srm = &nat_reass_main;
588 vlib_thread_main_t *tm = vlib_get_thread_main ();
589 clib_error_t *error = 0;
591 u32 nbuckets, head_index;
594 srm->vnet_main = vnet_get_main ();
597 srm->ip4_timeout = NAT_REASS_TIMEOUT_DEFAULT;
598 srm->ip4_max_reass = NAT_MAX_REASS_DEAFULT;
599 srm->ip4_max_frag = NAT_MAX_FRAG_DEFAULT;
600 srm->ip4_drop_frag = 0;
601 srm->ip4_reass_n = 0;
603 if (tm->n_vlib_mains > 1)
604 clib_spinlock_init (&srm->ip4_reass_lock);
606 pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
608 nbuckets = nat_reass_get_nbuckets (0);
609 clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass", nbuckets,
612 pool_get (srm->ip4_reass_lru_list_pool, head);
613 srm->ip4_reass_head_index = head_index =
614 head - srm->ip4_reass_lru_list_pool;
615 clib_dlist_init (srm->ip4_reass_lru_list_pool, head_index);
618 srm->ip6_timeout = NAT_REASS_TIMEOUT_DEFAULT;
619 srm->ip6_max_reass = NAT_MAX_REASS_DEAFULT;
620 srm->ip6_max_frag = NAT_MAX_FRAG_DEFAULT;
621 srm->ip6_drop_frag = 0;
622 srm->ip6_reass_n = 0;
624 if (tm->n_vlib_mains > 1)
625 clib_spinlock_init (&srm->ip6_reass_lock);
627 pool_alloc (srm->ip6_reass_pool, srm->ip6_max_reass);
629 nbuckets = nat_reass_get_nbuckets (1);
630 clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass", nbuckets,
633 pool_get (srm->ip6_reass_lru_list_pool, head);
634 srm->ip6_reass_head_index = head_index =
635 head - srm->ip6_reass_lru_list_pool;
636 clib_dlist_init (srm->ip6_reass_lru_list_pool, head_index);
641 static clib_error_t *
642 nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
643 vlib_cli_command_t * cmd)
645 clib_error_t *error = 0;
646 unformat_input_t _line_input, *line_input = &_line_input;
647 u32 timeout = 0, max_reass = 0, max_frag = 0;
648 u8 drop_frag = (u8) ~ 0, is_ip6 = 0;
651 /* Get a line of input. */
652 if (!unformat_user (input, unformat_line_input, line_input))
655 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
657 if (unformat (line_input, "max-reassemblies %u", &max_reass))
659 else if (unformat (line_input, "max-fragments %u", &max_frag))
661 else if (unformat (line_input, "timeout %u", &timeout))
663 else if (unformat (line_input, "enable"))
665 else if (unformat (line_input, "disable"))
667 else if (unformat (line_input, "ip4"))
669 else if (unformat (line_input, "ip6"))
673 error = clib_error_return (0, "unknown input '%U'",
674 format_unformat_error, line_input);
680 timeout = nat_reass_get_timeout (is_ip6);
682 max_reass = nat_reass_get_max_reass (is_ip6);
684 max_frag = nat_reass_get_max_frag (is_ip6);
685 if (drop_frag == (u8) ~ 0)
686 drop_frag = nat_reass_is_drop_frag (is_ip6);
689 nat_reass_set (timeout, (u16) max_reass, (u8) max_frag, drop_frag,
693 error = clib_error_return (0, "nat_set_reass return %d", rv);
698 unformat_free (line_input);
704 nat_ip4_reass_walk_cli (nat_reass_ip4_t * reass, void *ctx)
706 vlib_main_t *vm = ctx;
708 vlib_cli_output (vm, " src %U dst %U proto %u id 0x%04x cached %u",
709 format_ip4_address, &reass->key.src,
710 format_ip4_address, &reass->key.dst,
712 clib_net_to_host_u16 (reass->key.frag_id), reass->frag_n);
718 nat_ip6_reass_walk_cli (nat_reass_ip6_t * reass, void *ctx)
720 vlib_main_t *vm = ctx;
722 vlib_cli_output (vm, " src %U dst %U proto %u id 0x%08x cached %u",
723 format_ip6_address, &reass->key.src,
724 format_ip6_address, &reass->key.dst,
726 clib_net_to_host_u32 (reass->key.frag_id), reass->frag_n);
731 static clib_error_t *
732 show_nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
733 vlib_cli_command_t * cmd)
735 vlib_cli_output (vm, "NAT IPv4 virtual fragmentation reassembly is %s",
736 nat_reass_is_drop_frag (0) ? "DISABLED" : "ENABLED");
737 vlib_cli_output (vm, " max-reassemblies %u", nat_reass_get_max_reass (0));
738 vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (0));
739 vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (0));
740 vlib_cli_output (vm, " reassemblies:");
741 nat_ip4_reass_walk (nat_ip4_reass_walk_cli, vm);
743 vlib_cli_output (vm, "NAT IPv6 virtual fragmentation reassembly is %s",
744 nat_reass_is_drop_frag (1) ? "DISABLED" : "ENABLED");
745 vlib_cli_output (vm, " max-reassemblies %u", nat_reass_get_max_reass (1));
746 vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (1));
747 vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (1));
748 vlib_cli_output (vm, " reassemblies:");
749 nat_ip6_reass_walk (nat_ip6_reass_walk_cli, vm);
755 VLIB_CLI_COMMAND (nat_reass_command, static) =
757 .path = "nat virtual-reassembly",
758 .short_help = "nat virtual-reassembly ip4|ip6 [max-reassemblies <n>] "
759 "[max-fragments <n>] [timeout <sec>] [enable|disable]",
760 .function = nat_reass_command_fn,
763 VLIB_CLI_COMMAND (show_nat_reass_command, static) =
765 .path = "show nat virtual-reassembly",
766 .short_help = "show nat virtual-reassembly",
767 .function = show_nat_reass_command_fn,
772 * fd.io coding-style-patch-verification: ON
775 * eval: (c-set-style "gnu")