2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 * @brief NAT plugin virtual fragmentation reassembly
20 #include <vnet/vnet.h>
21 #include <nat/nat_reass.h>
22 #include <nat/nat_ipfix_logging.h>
24 nat_reass_main_t nat_reass_main;
27 nat_reass_get_nbuckets (u8 is_ip6)
29 nat_reass_main_t *srm = &nat_reass_main;
34 nbuckets = (u32) (srm->ip6_max_reass / NAT_REASS_HT_LOAD_FACTOR);
36 nbuckets = (u32) (srm->ip4_max_reass / NAT_REASS_HT_LOAD_FACTOR);
38 for (i = 0; i < 31; i++)
39 if ((1 << i) >= nbuckets)
46 static_always_inline void
47 nat_ip4_reass_get_frags_inline (nat_reass_ip4_t * reass, u32 ** bi)
49 nat_reass_main_t *srm = &nat_reass_main;
54 clib_dlist_remove_head (srm->ip4_frags_list_pool,
55 reass->frags_per_reass_list_head_index)) !=
58 elt = pool_elt_at_index (srm->ip4_frags_list_pool, elt_index);
59 vec_add1 (*bi, elt->value);
61 pool_put_index (srm->ip4_frags_list_pool, elt_index);
65 static_always_inline void
66 nat_ip6_reass_get_frags_inline (nat_reass_ip6_t * reass, u32 ** bi)
68 nat_reass_main_t *srm = &nat_reass_main;
73 clib_dlist_remove_head (srm->ip6_frags_list_pool,
74 reass->frags_per_reass_list_head_index)) !=
77 elt = pool_elt_at_index (srm->ip6_frags_list_pool, elt_index);
78 vec_add1 (*bi, elt->value);
80 pool_put_index (srm->ip6_frags_list_pool, elt_index);
85 nat_reass_set (u32 timeout, u16 max_reass, u8 max_frag, u8 drop_frag,
88 nat_reass_main_t *srm = &nat_reass_main;
93 if (srm->ip6_max_reass != max_reass)
95 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
97 srm->ip6_max_reass = max_reass;
98 pool_free (srm->ip6_reass_pool);
99 pool_alloc (srm->ip6_reass_pool, srm->ip4_max_reass);
100 nbuckets = nat_reass_get_nbuckets (0);
101 clib_bihash_free_48_8 (&srm->ip6_reass_hash);
102 clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass",
103 nbuckets, nbuckets * 1024);
105 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
107 srm->ip6_timeout = timeout;
108 srm->ip6_max_frag = max_frag;
109 srm->ip6_drop_frag = drop_frag;
113 if (srm->ip4_max_reass != max_reass)
115 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
117 srm->ip4_max_reass = max_reass;
118 pool_free (srm->ip4_reass_pool);
119 pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
120 nbuckets = nat_reass_get_nbuckets (0);
121 clib_bihash_free_16_8 (&srm->ip4_reass_hash);
122 clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass",
123 nbuckets, nbuckets * 1024);
124 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
126 srm->ip4_timeout = timeout;
127 srm->ip4_max_frag = max_frag;
128 srm->ip4_drop_frag = drop_frag;
135 nat_reass_get_timeout (u8 is_ip6)
137 nat_reass_main_t *srm = &nat_reass_main;
140 return srm->ip6_timeout;
142 return srm->ip4_timeout;
146 nat_reass_get_max_reass (u8 is_ip6)
148 nat_reass_main_t *srm = &nat_reass_main;
151 return srm->ip6_max_reass;
153 return srm->ip4_max_reass;
157 nat_reass_get_max_frag (u8 is_ip6)
159 nat_reass_main_t *srm = &nat_reass_main;
162 return srm->ip6_max_frag;
164 return srm->ip4_max_frag;
168 nat_reass_is_drop_frag (u8 is_ip6)
170 nat_reass_main_t *srm = &nat_reass_main;
173 return srm->ip6_drop_frag;
175 return srm->ip4_drop_frag;
178 static_always_inline nat_reass_ip4_t *
179 nat_ip4_reass_lookup (nat_reass_ip4_key_t * k, f64 now)
181 nat_reass_main_t *srm = &nat_reass_main;
182 clib_bihash_kv_16_8_t kv, value;
183 nat_reass_ip4_t *reass;
185 kv.key[0] = k->as_u64[0];
186 kv.key[1] = k->as_u64[1];
188 if (clib_bihash_search_16_8 (&srm->ip4_reass_hash, &kv, &value))
191 reass = pool_elt_at_index (srm->ip4_reass_pool, value.value);
192 if (now < reass->last_heard + (f64) srm->ip4_timeout)
199 nat_ip4_reass_find (ip4_address_t src, ip4_address_t dst, u16 frag_id,
202 nat_reass_main_t *srm = &nat_reass_main;
203 nat_reass_ip4_t *reass = 0;
204 nat_reass_ip4_key_t k;
205 f64 now = vlib_time_now (srm->vlib_main);
207 k.src.as_u32 = src.as_u32;
208 k.dst.as_u32 = dst.as_u32;
212 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
213 reass = nat_ip4_reass_lookup (&k, now);
214 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
220 nat_ip4_reass_find_or_create (ip4_address_t src, ip4_address_t dst,
221 u16 frag_id, u8 proto, u8 reset_timeout,
224 nat_reass_main_t *srm = &nat_reass_main;
225 nat_reass_ip4_t *reass = 0;
226 nat_reass_ip4_key_t k;
227 f64 now = vlib_time_now (srm->vlib_main);
228 dlist_elt_t *oldest_elt, *elt;
229 dlist_elt_t *per_reass_list_head_elt;
230 u32 oldest_index, elt_index;
231 clib_bihash_kv_16_8_t kv;
233 k.src.as_u32 = src.as_u32;
234 k.dst.as_u32 = dst.as_u32;
238 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
240 reass = nat_ip4_reass_lookup (&k, now);
245 reass->last_heard = now;
246 clib_dlist_remove (srm->ip4_reass_lru_list_pool,
247 reass->lru_list_index);
248 clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
249 srm->ip4_reass_head_index,
250 reass->lru_list_index);
255 if (srm->ip4_reass_n >= srm->ip4_max_reass)
258 clib_dlist_remove_head (srm->ip4_reass_lru_list_pool,
259 srm->ip4_reass_head_index);
260 ASSERT (oldest_index != ~0);
262 pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
263 reass = pool_elt_at_index (srm->ip4_reass_pool, oldest_elt->value);
264 if (now < reass->last_heard + (f64) srm->ip4_timeout)
266 clib_dlist_addhead (srm->ip4_reass_lru_list_pool,
267 srm->ip4_reass_head_index, oldest_index);
268 clib_warning ("no free resassembly slot");
273 clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
274 srm->ip4_reass_head_index, oldest_index);
276 kv.key[0] = k.as_u64[0];
277 kv.key[1] = k.as_u64[1];
278 if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 0))
284 nat_ip4_reass_get_frags_inline (reass, bi_to_drop);
288 pool_get (srm->ip4_reass_pool, reass);
289 pool_get (srm->ip4_reass_lru_list_pool, elt);
290 reass->lru_list_index = elt_index = elt - srm->ip4_reass_lru_list_pool;
291 clib_dlist_init (srm->ip4_reass_lru_list_pool, elt_index);
292 elt->value = reass - srm->ip4_reass_pool;
293 clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
294 srm->ip4_reass_head_index, elt_index);
295 pool_get (srm->ip4_frags_list_pool, per_reass_list_head_elt);
296 reass->frags_per_reass_list_head_index =
297 per_reass_list_head_elt - srm->ip4_frags_list_pool;
298 clib_dlist_init (srm->ip4_frags_list_pool,
299 reass->frags_per_reass_list_head_index);
303 reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
304 reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
305 kv.value = reass - srm->ip4_reass_pool;
306 reass->sess_index = (u32) ~ 0;
307 reass->thread_index = (u32) ~ 0;
308 reass->last_heard = now;
310 if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 1))
317 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
322 nat_ip4_reass_add_fragment (nat_reass_ip4_t * reass, u32 bi)
324 nat_reass_main_t *srm = &nat_reass_main;
328 if (reass->frag_n >= srm->ip4_max_frag)
330 nat_ipfix_logging_max_fragments_ip4 (srm->ip4_max_frag,
335 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
337 pool_get (srm->ip4_frags_list_pool, elt);
338 elt_index = elt - srm->ip4_frags_list_pool;
339 clib_dlist_init (srm->ip4_frags_list_pool, elt_index);
341 clib_dlist_addtail (srm->ip4_frags_list_pool,
342 reass->frags_per_reass_list_head_index, elt_index);
345 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
351 nat_ip4_reass_get_frags (nat_reass_ip4_t * reass, u32 ** bi)
353 nat_reass_main_t *srm = &nat_reass_main;
355 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
357 nat_ip4_reass_get_frags_inline (reass, bi);
359 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
363 nat_ip4_reass_walk (nat_ip4_reass_walk_fn_t fn, void *ctx)
365 nat_reass_ip4_t *reass;
366 nat_reass_main_t *srm = &nat_reass_main;
367 f64 now = vlib_time_now (srm->vlib_main);
370 pool_foreach (reass, srm->ip4_reass_pool,
372 if (now < reass->last_heard + (f64) srm->ip4_timeout)
381 static_always_inline nat_reass_ip6_t *
382 nat_ip6_reass_lookup (nat_reass_ip6_key_t * k, f64 now)
384 nat_reass_main_t *srm = &nat_reass_main;
385 clib_bihash_kv_48_8_t kv, value;
386 nat_reass_ip6_t *reass;
389 kv.key[0] = k->as_u64[0];
390 kv.key[1] = k->as_u64[1];
391 kv.key[2] = k->as_u64[2];
392 kv.key[3] = k->as_u64[3];
393 kv.key[4] = k->as_u64[4];
394 kv.key[5] = k->as_u64[5];
396 if (clib_bihash_search_48_8 (&srm->ip6_reass_hash, &kv, &value))
399 reass = pool_elt_at_index (srm->ip6_reass_pool, value.value);
400 if (now < reass->last_heard + (f64) srm->ip6_timeout)
407 nat_ip6_reass_find_or_create (ip6_address_t src, ip6_address_t dst,
408 u32 frag_id, u8 proto, u8 reset_timeout,
411 nat_reass_main_t *srm = &nat_reass_main;
412 nat_reass_ip6_t *reass = 0;
413 nat_reass_ip6_key_t k;
414 f64 now = vlib_time_now (srm->vlib_main);
415 dlist_elt_t *oldest_elt, *elt;
416 dlist_elt_t *per_reass_list_head_elt;
417 u32 oldest_index, elt_index;
418 clib_bihash_kv_48_8_t kv;
420 k.src.as_u64[0] = src.as_u64[0];
421 k.src.as_u64[1] = src.as_u64[1];
422 k.dst.as_u64[0] = dst.as_u64[0];
423 k.dst.as_u64[1] = dst.as_u64[1];
428 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
430 reass = nat_ip6_reass_lookup (&k, now);
435 reass->last_heard = now;
436 clib_dlist_remove (srm->ip6_reass_lru_list_pool,
437 reass->lru_list_index);
438 clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
439 srm->ip6_reass_head_index,
440 reass->lru_list_index);
445 if (srm->ip6_reass_n >= srm->ip6_max_reass)
448 clib_dlist_remove_head (srm->ip6_reass_lru_list_pool,
449 srm->ip6_reass_head_index);
450 ASSERT (oldest_index != ~0);
452 pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
453 reass = pool_elt_at_index (srm->ip6_reass_pool, oldest_elt->value);
454 if (now < reass->last_heard + (f64) srm->ip6_timeout)
456 clib_dlist_addhead (srm->ip6_reass_lru_list_pool,
457 srm->ip6_reass_head_index, oldest_index);
458 clib_warning ("no free resassembly slot");
463 clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
464 srm->ip6_reass_head_index, oldest_index);
466 kv.key[0] = k.as_u64[0];
467 kv.key[1] = k.as_u64[1];
468 kv.key[2] = k.as_u64[2];
469 kv.key[3] = k.as_u64[4];
470 kv.key[4] = k.as_u64[5];
471 if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 0))
477 nat_ip6_reass_get_frags_inline (reass, bi_to_drop);
481 pool_get (srm->ip6_reass_pool, reass);
482 pool_get (srm->ip6_reass_lru_list_pool, elt);
483 reass->lru_list_index = elt_index = elt - srm->ip6_reass_lru_list_pool;
484 clib_dlist_init (srm->ip6_reass_lru_list_pool, elt_index);
485 elt->value = reass - srm->ip6_reass_pool;
486 clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
487 srm->ip6_reass_head_index, elt_index);
488 pool_get (srm->ip6_frags_list_pool, per_reass_list_head_elt);
489 reass->frags_per_reass_list_head_index =
490 per_reass_list_head_elt - srm->ip6_frags_list_pool;
491 clib_dlist_init (srm->ip6_frags_list_pool,
492 reass->frags_per_reass_list_head_index);
496 reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
497 reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
498 reass->key.as_u64[2] = kv.key[2] = k.as_u64[2];
499 reass->key.as_u64[3] = kv.key[3] = k.as_u64[3];
500 reass->key.as_u64[4] = kv.key[4] = k.as_u64[4];
501 reass->key.as_u64[5] = kv.key[5] = k.as_u64[5];
502 kv.value = reass - srm->ip6_reass_pool;
503 reass->sess_index = (u32) ~ 0;
504 reass->last_heard = now;
506 if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 1))
513 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
518 nat_ip6_reass_add_fragment (nat_reass_ip6_t * reass, u32 bi)
520 nat_reass_main_t *srm = &nat_reass_main;
524 if (reass->frag_n >= srm->ip6_max_frag)
526 nat_ipfix_logging_max_fragments_ip6 (srm->ip6_max_frag,
531 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
533 pool_get (srm->ip6_frags_list_pool, elt);
534 elt_index = elt - srm->ip6_frags_list_pool;
535 clib_dlist_init (srm->ip6_frags_list_pool, elt_index);
537 clib_dlist_addtail (srm->ip6_frags_list_pool,
538 reass->frags_per_reass_list_head_index, elt_index);
541 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
547 nat_ip6_reass_get_frags (nat_reass_ip6_t * reass, u32 ** bi)
549 nat_reass_main_t *srm = &nat_reass_main;
551 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
553 nat_ip6_reass_get_frags_inline (reass, bi);
555 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
559 nat_ip6_reass_walk (nat_ip6_reass_walk_fn_t fn, void *ctx)
561 nat_reass_ip6_t *reass;
562 nat_reass_main_t *srm = &nat_reass_main;
563 f64 now = vlib_time_now (srm->vlib_main);
566 pool_foreach (reass, srm->ip6_reass_pool,
568 if (now < reass->last_heard + (f64) srm->ip4_timeout)
578 nat_reass_init (vlib_main_t * vm)
580 nat_reass_main_t *srm = &nat_reass_main;
581 vlib_thread_main_t *tm = vlib_get_thread_main ();
582 clib_error_t *error = 0;
584 u32 nbuckets, head_index;
587 srm->vnet_main = vnet_get_main ();
590 srm->ip4_timeout = NAT_REASS_TIMEOUT_DEFAULT;
591 srm->ip4_max_reass = NAT_MAX_REASS_DEAFULT;
592 srm->ip4_max_frag = NAT_MAX_FRAG_DEFAULT;
593 srm->ip4_drop_frag = 0;
594 srm->ip4_reass_n = 0;
596 if (tm->n_vlib_mains > 1)
597 clib_spinlock_init (&srm->ip4_reass_lock);
599 pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
601 nbuckets = nat_reass_get_nbuckets (0);
602 clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass", nbuckets,
605 pool_get (srm->ip4_reass_lru_list_pool, head);
606 srm->ip4_reass_head_index = head_index =
607 head - srm->ip4_reass_lru_list_pool;
608 clib_dlist_init (srm->ip4_reass_lru_list_pool, head_index);
611 srm->ip6_timeout = NAT_REASS_TIMEOUT_DEFAULT;
612 srm->ip6_max_reass = NAT_MAX_REASS_DEAFULT;
613 srm->ip6_max_frag = NAT_MAX_FRAG_DEFAULT;
614 srm->ip6_drop_frag = 0;
615 srm->ip6_reass_n = 0;
617 if (tm->n_vlib_mains > 1)
618 clib_spinlock_init (&srm->ip6_reass_lock);
620 pool_alloc (srm->ip6_reass_pool, srm->ip6_max_reass);
622 nbuckets = nat_reass_get_nbuckets (1);
623 clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass", nbuckets,
626 pool_get (srm->ip6_reass_lru_list_pool, head);
627 srm->ip6_reass_head_index = head_index =
628 head - srm->ip6_reass_lru_list_pool;
629 clib_dlist_init (srm->ip6_reass_lru_list_pool, head_index);
634 static clib_error_t *
635 nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
636 vlib_cli_command_t * cmd)
638 clib_error_t *error = 0;
639 unformat_input_t _line_input, *line_input = &_line_input;
640 u32 timeout = 0, max_reass = 0, max_frag = 0;
641 u8 drop_frag = (u8) ~ 0, is_ip6 = 0;
644 /* Get a line of input. */
645 if (!unformat_user (input, unformat_line_input, line_input))
648 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
650 if (unformat (line_input, "max-reassemblies %u", &max_reass))
652 else if (unformat (line_input, "max-fragments %u", &max_frag))
654 else if (unformat (line_input, "timeout %u", &timeout))
656 else if (unformat (line_input, "enable"))
658 else if (unformat (line_input, "disable"))
660 else if (unformat (line_input, "ip4"))
662 else if (unformat (line_input, "ip6"))
666 error = clib_error_return (0, "unknown input '%U'",
667 format_unformat_error, line_input);
673 timeout = nat_reass_get_timeout (is_ip6);
675 max_reass = nat_reass_get_max_reass (is_ip6);
677 max_frag = nat_reass_get_max_frag (is_ip6);
678 if (drop_frag == (u8) ~ 0)
679 drop_frag = nat_reass_is_drop_frag (is_ip6);
682 nat_reass_set (timeout, (u16) max_reass, (u8) max_frag, drop_frag,
686 error = clib_error_return (0, "nat_set_reass return %d", rv);
691 unformat_free (line_input);
697 nat_ip4_reass_walk_cli (nat_reass_ip4_t * reass, void *ctx)
699 vlib_main_t *vm = ctx;
701 vlib_cli_output (vm, " src %U dst %U proto %u id 0x%04x cached %u",
702 format_ip4_address, &reass->key.src,
703 format_ip4_address, &reass->key.dst,
705 clib_net_to_host_u16 (reass->key.frag_id), reass->frag_n);
711 nat_ip6_reass_walk_cli (nat_reass_ip6_t * reass, void *ctx)
713 vlib_main_t *vm = ctx;
715 vlib_cli_output (vm, " src %U dst %U proto %u id 0x%08x cached %u",
716 format_ip6_address, &reass->key.src,
717 format_ip6_address, &reass->key.dst,
719 clib_net_to_host_u32 (reass->key.frag_id), reass->frag_n);
724 static clib_error_t *
725 show_nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
726 vlib_cli_command_t * cmd)
728 vlib_cli_output (vm, "NAT IPv4 virtual fragmentation reassembly is %s",
729 nat_reass_is_drop_frag (0) ? "DISABLED" : "ENABLED");
730 vlib_cli_output (vm, " max-reassemblies %u", nat_reass_get_max_reass (0));
731 vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (0));
732 vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (0));
733 vlib_cli_output (vm, " reassemblies:");
734 nat_ip4_reass_walk (nat_ip4_reass_walk_cli, vm);
736 vlib_cli_output (vm, "NAT IPv6 virtual fragmentation reassembly is %s",
737 nat_reass_is_drop_frag (1) ? "DISABLED" : "ENABLED");
738 vlib_cli_output (vm, " max-reassemblies %u", nat_reass_get_max_reass (1));
739 vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (1));
740 vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (1));
741 vlib_cli_output (vm, " reassemblies:");
742 nat_ip6_reass_walk (nat_ip6_reass_walk_cli, vm);
748 VLIB_CLI_COMMAND (nat_reass_command, static) =
750 .path = "nat virtual-reassembly",
751 .short_help = "nat virtual-reassembly ip4|ip6 [max-reassemblies <n>] "
752 "[max-fragments <n>] [timeout <sec>] [enable|disable]",
753 .function = nat_reass_command_fn,
756 VLIB_CLI_COMMAND (show_nat_reass_command, static) =
758 .path = "show nat virtual-reassembly",
759 .short_help = "show nat virtual-reassembly",
760 .function = show_nat_reass_command_fn,
765 * fd.io coding-style-patch-verification: ON
768 * eval: (c-set-style "gnu")