2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 * @brief NAT plugin virtual fragmentation reassembly
20 #include <vnet/vnet.h>
21 #include <nat/nat_reass.h>
23 nat_reass_main_t nat_reass_main;
26 nat_reass_get_nbuckets (u8 is_ip6)
28 nat_reass_main_t *srm = &nat_reass_main;
33 nbuckets = (u32) (srm->ip6_max_reass / NAT_REASS_HT_LOAD_FACTOR);
35 nbuckets = (u32) (srm->ip4_max_reass / NAT_REASS_HT_LOAD_FACTOR);
37 for (i = 0; i < 31; i++)
38 if ((1 << i) >= nbuckets)
45 static_always_inline void
46 nat_ip4_reass_get_frags_inline (nat_reass_ip4_t * reass, u32 ** bi)
48 nat_reass_main_t *srm = &nat_reass_main;
53 clib_dlist_remove_head (srm->ip4_frags_list_pool,
54 reass->frags_per_reass_list_head_index)) !=
57 elt = pool_elt_at_index (srm->ip4_frags_list_pool, elt_index);
58 vec_add1 (*bi, elt->value);
60 pool_put_index (srm->ip4_frags_list_pool, elt_index);
64 static_always_inline void
65 nat_ip6_reass_get_frags_inline (nat_reass_ip6_t * reass, u32 ** bi)
67 nat_reass_main_t *srm = &nat_reass_main;
72 clib_dlist_remove_head (srm->ip6_frags_list_pool,
73 reass->frags_per_reass_list_head_index)) !=
76 elt = pool_elt_at_index (srm->ip6_frags_list_pool, elt_index);
77 vec_add1 (*bi, elt->value);
79 pool_put_index (srm->ip6_frags_list_pool, elt_index);
84 nat_reass_set (u32 timeout, u16 max_reass, u8 max_frag, u8 drop_frag,
87 nat_reass_main_t *srm = &nat_reass_main;
92 if (srm->ip6_max_reass != max_reass)
94 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
96 srm->ip6_max_reass = max_reass;
97 pool_free (srm->ip6_reass_pool);
98 pool_alloc (srm->ip6_reass_pool, srm->ip4_max_reass);
99 nbuckets = nat_reass_get_nbuckets (0);
100 clib_bihash_free_48_8 (&srm->ip6_reass_hash);
101 clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass",
102 nbuckets, nbuckets * 1024);
104 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
106 srm->ip6_timeout = timeout;
107 srm->ip6_max_frag = max_frag;
108 srm->ip6_drop_frag = drop_frag;
112 if (srm->ip4_max_reass != max_reass)
114 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
116 srm->ip4_max_reass = max_reass;
117 pool_free (srm->ip4_reass_pool);
118 pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
119 nbuckets = nat_reass_get_nbuckets (0);
120 clib_bihash_free_16_8 (&srm->ip4_reass_hash);
121 clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass",
122 nbuckets, nbuckets * 1024);
123 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
125 srm->ip4_timeout = timeout;
126 srm->ip4_max_frag = max_frag;
127 srm->ip4_drop_frag = drop_frag;
134 nat_reass_get_timeout (u8 is_ip6)
136 nat_reass_main_t *srm = &nat_reass_main;
139 return srm->ip6_timeout;
141 return srm->ip4_timeout;
145 nat_reass_get_max_reass (u8 is_ip6)
147 nat_reass_main_t *srm = &nat_reass_main;
150 return srm->ip6_max_reass;
152 return srm->ip4_max_reass;
156 nat_reass_get_max_frag (u8 is_ip6)
158 nat_reass_main_t *srm = &nat_reass_main;
161 return srm->ip6_max_frag;
163 return srm->ip4_max_frag;
167 nat_reass_is_drop_frag (u8 is_ip6)
169 nat_reass_main_t *srm = &nat_reass_main;
172 return srm->ip6_drop_frag;
174 return srm->ip4_drop_frag;
177 static_always_inline nat_reass_ip4_t *
178 nat_ip4_reass_lookup (nat_reass_ip4_key_t * k, f64 now)
180 nat_reass_main_t *srm = &nat_reass_main;
181 clib_bihash_kv_16_8_t kv, value;
182 nat_reass_ip4_t *reass;
184 kv.key[0] = k->as_u64[0];
185 kv.key[1] = k->as_u64[1];
187 if (clib_bihash_search_16_8 (&srm->ip4_reass_hash, &kv, &value))
190 reass = pool_elt_at_index (srm->ip4_reass_pool, value.value);
191 if (now < reass->last_heard + (f64) srm->ip4_timeout)
198 nat_ip4_reass_find (ip4_address_t src, ip4_address_t dst, u16 frag_id,
201 nat_reass_main_t *srm = &nat_reass_main;
202 nat_reass_ip4_t *reass = 0;
203 nat_reass_ip4_key_t k;
204 f64 now = vlib_time_now (srm->vlib_main);
206 k.src.as_u32 = src.as_u32;
207 k.dst.as_u32 = dst.as_u32;
211 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
212 reass = nat_ip4_reass_lookup (&k, now);
213 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
219 nat_ip4_reass_find_or_create (ip4_address_t src, ip4_address_t dst,
220 u16 frag_id, u8 proto, u8 reset_timeout,
223 nat_reass_main_t *srm = &nat_reass_main;
224 nat_reass_ip4_t *reass = 0;
225 nat_reass_ip4_key_t k;
226 f64 now = vlib_time_now (srm->vlib_main);
227 dlist_elt_t *oldest_elt, *elt;
228 dlist_elt_t *per_reass_list_head_elt;
229 u32 oldest_index, elt_index;
230 clib_bihash_kv_16_8_t kv;
232 k.src.as_u32 = src.as_u32;
233 k.dst.as_u32 = dst.as_u32;
237 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
239 reass = nat_ip4_reass_lookup (&k, now);
244 reass->last_heard = now;
245 clib_dlist_remove (srm->ip4_reass_lru_list_pool,
246 reass->lru_list_index);
247 clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
248 srm->ip4_reass_head_index,
249 reass->lru_list_index);
254 if (srm->ip4_reass_n >= srm->ip4_max_reass)
257 clib_dlist_remove_head (srm->ip4_reass_lru_list_pool,
258 srm->ip4_reass_head_index);
259 ASSERT (oldest_index != ~0);
261 pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
262 reass = pool_elt_at_index (srm->ip4_reass_pool, oldest_elt->value);
263 if (now < reass->last_heard + (f64) srm->ip4_timeout)
265 clib_dlist_addhead (srm->ip4_reass_lru_list_pool,
266 srm->ip4_reass_head_index, oldest_index);
267 clib_warning ("no free resassembly slot");
272 clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
273 srm->ip4_reass_head_index, oldest_index);
275 kv.key[0] = k.as_u64[0];
276 kv.key[1] = k.as_u64[1];
277 if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 0))
283 nat_ip4_reass_get_frags_inline (reass, bi_to_drop);
287 pool_get (srm->ip4_reass_pool, reass);
288 pool_get (srm->ip4_reass_lru_list_pool, elt);
289 reass->lru_list_index = elt_index = elt - srm->ip4_reass_lru_list_pool;
290 clib_dlist_init (srm->ip4_reass_lru_list_pool, elt_index);
291 elt->value = reass - srm->ip4_reass_pool;
292 clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
293 srm->ip4_reass_head_index, elt_index);
294 pool_get (srm->ip4_frags_list_pool, per_reass_list_head_elt);
295 reass->frags_per_reass_list_head_index =
296 per_reass_list_head_elt - srm->ip4_frags_list_pool;
297 clib_dlist_init (srm->ip4_frags_list_pool,
298 reass->frags_per_reass_list_head_index);
302 reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
303 reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
304 kv.value = reass - srm->ip4_reass_pool;
305 reass->sess_index = (u32) ~ 0;
306 reass->thread_index = (u32) ~ 0;
307 reass->last_heard = now;
309 if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 1))
316 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
321 nat_ip4_reass_add_fragment (nat_reass_ip4_t * reass, u32 bi)
323 nat_reass_main_t *srm = &nat_reass_main;
327 if (reass->frag_n >= srm->ip4_max_frag)
330 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
332 pool_get (srm->ip4_frags_list_pool, elt);
333 elt_index = elt - srm->ip4_frags_list_pool;
334 clib_dlist_init (srm->ip4_frags_list_pool, elt_index);
336 clib_dlist_addtail (srm->ip4_frags_list_pool,
337 reass->frags_per_reass_list_head_index, elt_index);
340 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
346 nat_ip4_reass_get_frags (nat_reass_ip4_t * reass, u32 ** bi)
348 nat_reass_main_t *srm = &nat_reass_main;
350 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
352 nat_ip4_reass_get_frags_inline (reass, bi);
354 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
358 nat_ip4_reass_walk (nat_ip4_reass_walk_fn_t fn, void *ctx)
360 nat_reass_ip4_t *reass;
361 nat_reass_main_t *srm = &nat_reass_main;
362 f64 now = vlib_time_now (srm->vlib_main);
365 pool_foreach (reass, srm->ip4_reass_pool,
367 if (now < reass->last_heard + (f64) srm->ip4_timeout)
376 static_always_inline nat_reass_ip6_t *
377 nat_ip6_reass_lookup (nat_reass_ip6_key_t * k, f64 now)
379 nat_reass_main_t *srm = &nat_reass_main;
380 clib_bihash_kv_48_8_t kv, value;
381 nat_reass_ip6_t *reass;
384 kv.key[0] = k->as_u64[0];
385 kv.key[1] = k->as_u64[1];
386 kv.key[2] = k->as_u64[2];
387 kv.key[3] = k->as_u64[3];
388 kv.key[4] = k->as_u64[4];
389 kv.key[5] = k->as_u64[5];
391 if (clib_bihash_search_48_8 (&srm->ip6_reass_hash, &kv, &value))
394 reass = pool_elt_at_index (srm->ip6_reass_pool, value.value);
395 if (now < reass->last_heard + (f64) srm->ip6_timeout)
402 nat_ip6_reass_find_or_create (ip6_address_t src, ip6_address_t dst,
403 u32 frag_id, u8 proto, u8 reset_timeout,
406 nat_reass_main_t *srm = &nat_reass_main;
407 nat_reass_ip6_t *reass = 0;
408 nat_reass_ip6_key_t k;
409 f64 now = vlib_time_now (srm->vlib_main);
410 dlist_elt_t *oldest_elt, *elt;
411 dlist_elt_t *per_reass_list_head_elt;
412 u32 oldest_index, elt_index;
413 clib_bihash_kv_48_8_t kv;
415 k.src.as_u64[0] = src.as_u64[0];
416 k.src.as_u64[1] = src.as_u64[1];
417 k.dst.as_u64[0] = dst.as_u64[0];
418 k.dst.as_u64[1] = dst.as_u64[1];
423 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
425 reass = nat_ip6_reass_lookup (&k, now);
430 reass->last_heard = now;
431 clib_dlist_remove (srm->ip6_reass_lru_list_pool,
432 reass->lru_list_index);
433 clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
434 srm->ip6_reass_head_index,
435 reass->lru_list_index);
440 if (srm->ip6_reass_n >= srm->ip6_max_reass)
443 clib_dlist_remove_head (srm->ip6_reass_lru_list_pool,
444 srm->ip6_reass_head_index);
445 ASSERT (oldest_index != ~0);
447 pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
448 reass = pool_elt_at_index (srm->ip6_reass_pool, oldest_elt->value);
449 if (now < reass->last_heard + (f64) srm->ip6_timeout)
451 clib_dlist_addhead (srm->ip6_reass_lru_list_pool,
452 srm->ip6_reass_head_index, oldest_index);
453 clib_warning ("no free resassembly slot");
458 clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
459 srm->ip6_reass_head_index, oldest_index);
461 kv.key[0] = k.as_u64[0];
462 kv.key[1] = k.as_u64[1];
463 kv.key[2] = k.as_u64[2];
464 kv.key[3] = k.as_u64[4];
465 kv.key[4] = k.as_u64[5];
466 if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 0))
472 nat_ip6_reass_get_frags_inline (reass, bi_to_drop);
476 pool_get (srm->ip6_reass_pool, reass);
477 pool_get (srm->ip6_reass_lru_list_pool, elt);
478 reass->lru_list_index = elt_index = elt - srm->ip6_reass_lru_list_pool;
479 clib_dlist_init (srm->ip6_reass_lru_list_pool, elt_index);
480 elt->value = reass - srm->ip6_reass_pool;
481 clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
482 srm->ip6_reass_head_index, elt_index);
483 pool_get (srm->ip6_frags_list_pool, per_reass_list_head_elt);
484 reass->frags_per_reass_list_head_index =
485 per_reass_list_head_elt - srm->ip6_frags_list_pool;
486 clib_dlist_init (srm->ip6_frags_list_pool,
487 reass->frags_per_reass_list_head_index);
491 reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
492 reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
493 reass->key.as_u64[2] = kv.key[2] = k.as_u64[2];
494 reass->key.as_u64[3] = kv.key[3] = k.as_u64[3];
495 reass->key.as_u64[4] = kv.key[4] = k.as_u64[4];
496 reass->key.as_u64[5] = kv.key[5] = k.as_u64[5];
497 kv.value = reass - srm->ip6_reass_pool;
498 reass->sess_index = (u32) ~ 0;
499 reass->last_heard = now;
501 if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 1))
508 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
513 nat_ip6_reass_add_fragment (nat_reass_ip6_t * reass, u32 bi)
515 nat_reass_main_t *srm = &nat_reass_main;
519 if (reass->frag_n >= srm->ip6_max_frag)
522 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
524 pool_get (srm->ip6_frags_list_pool, elt);
525 elt_index = elt - srm->ip6_frags_list_pool;
526 clib_dlist_init (srm->ip6_frags_list_pool, elt_index);
528 clib_dlist_addtail (srm->ip6_frags_list_pool,
529 reass->frags_per_reass_list_head_index, elt_index);
532 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
538 nat_ip6_reass_get_frags (nat_reass_ip6_t * reass, u32 ** bi)
540 nat_reass_main_t *srm = &nat_reass_main;
542 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
544 nat_ip6_reass_get_frags_inline (reass, bi);
546 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
550 nat_ip6_reass_walk (nat_ip6_reass_walk_fn_t fn, void *ctx)
552 nat_reass_ip6_t *reass;
553 nat_reass_main_t *srm = &nat_reass_main;
554 f64 now = vlib_time_now (srm->vlib_main);
557 pool_foreach (reass, srm->ip6_reass_pool,
559 if (now < reass->last_heard + (f64) srm->ip4_timeout)
569 nat_reass_init (vlib_main_t * vm)
571 nat_reass_main_t *srm = &nat_reass_main;
572 vlib_thread_main_t *tm = vlib_get_thread_main ();
573 clib_error_t *error = 0;
575 u32 nbuckets, head_index;
578 srm->vnet_main = vnet_get_main ();
581 srm->ip4_timeout = NAT_REASS_TIMEOUT_DEFAULT;
582 srm->ip4_max_reass = NAT_MAX_REASS_DEAFULT;
583 srm->ip4_max_frag = NAT_MAX_FRAG_DEFAULT;
584 srm->ip4_drop_frag = 0;
585 srm->ip4_reass_n = 0;
587 if (tm->n_vlib_mains > 1)
588 clib_spinlock_init (&srm->ip4_reass_lock);
590 pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
592 nbuckets = nat_reass_get_nbuckets (0);
593 clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass", nbuckets,
596 pool_get (srm->ip4_reass_lru_list_pool, head);
597 srm->ip4_reass_head_index = head_index =
598 head - srm->ip4_reass_lru_list_pool;
599 clib_dlist_init (srm->ip4_reass_lru_list_pool, head_index);
602 srm->ip6_timeout = NAT_REASS_TIMEOUT_DEFAULT;
603 srm->ip6_max_reass = NAT_MAX_REASS_DEAFULT;
604 srm->ip6_max_frag = NAT_MAX_FRAG_DEFAULT;
605 srm->ip6_drop_frag = 0;
606 srm->ip6_reass_n = 0;
608 if (tm->n_vlib_mains > 1)
609 clib_spinlock_init (&srm->ip6_reass_lock);
611 pool_alloc (srm->ip6_reass_pool, srm->ip6_max_reass);
613 nbuckets = nat_reass_get_nbuckets (1);
614 clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass", nbuckets,
617 pool_get (srm->ip6_reass_lru_list_pool, head);
618 srm->ip6_reass_head_index = head_index =
619 head - srm->ip6_reass_lru_list_pool;
620 clib_dlist_init (srm->ip6_reass_lru_list_pool, head_index);
625 static clib_error_t *
626 nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
627 vlib_cli_command_t * cmd)
629 clib_error_t *error = 0;
630 unformat_input_t _line_input, *line_input = &_line_input;
631 u32 timeout = 0, max_reass = 0, max_frag = 0;
632 u8 drop_frag = (u8) ~ 0, is_ip6 = 0;
635 /* Get a line of input. */
636 if (!unformat_user (input, unformat_line_input, line_input))
639 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
641 if (unformat (line_input, "max-reassemblies %u", &max_reass))
643 else if (unformat (line_input, "max-fragments %u", &max_frag))
645 else if (unformat (line_input, "timeout %u", &timeout))
647 else if (unformat (line_input, "enable"))
649 else if (unformat (line_input, "disable"))
651 else if (unformat (line_input, "ip4"))
653 else if (unformat (line_input, "ip6"))
657 error = clib_error_return (0, "unknown input '%U'",
658 format_unformat_error, line_input);
664 timeout = nat_reass_get_timeout (is_ip6);
666 max_reass = nat_reass_get_max_reass (is_ip6);
668 max_frag = nat_reass_get_max_frag (is_ip6);
669 if (drop_frag == (u8) ~ 0)
670 drop_frag = nat_reass_is_drop_frag (is_ip6);
673 nat_reass_set (timeout, (u16) max_reass, (u8) max_frag, drop_frag,
677 error = clib_error_return (0, "nat_set_reass return %d", rv);
682 unformat_free (line_input);
688 nat_ip4_reass_walk_cli (nat_reass_ip4_t * reass, void *ctx)
690 vlib_main_t *vm = ctx;
692 vlib_cli_output (vm, " src %U dst %U proto %u id 0x%04x cached %u",
693 format_ip4_address, &reass->key.src,
694 format_ip4_address, &reass->key.dst,
696 clib_net_to_host_u16 (reass->key.frag_id), reass->frag_n);
702 nat_ip6_reass_walk_cli (nat_reass_ip6_t * reass, void *ctx)
704 vlib_main_t *vm = ctx;
706 vlib_cli_output (vm, " src %U dst %U proto %u id 0x%08x cached %u",
707 format_ip6_address, &reass->key.src,
708 format_ip6_address, &reass->key.dst,
710 clib_net_to_host_u32 (reass->key.frag_id), reass->frag_n);
715 static clib_error_t *
716 show_nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
717 vlib_cli_command_t * cmd)
719 vlib_cli_output (vm, "NAT IPv4 virtual fragmentation reassembly is %s",
720 nat_reass_is_drop_frag (0) ? "DISABLED" : "ENABLED");
721 vlib_cli_output (vm, " max-reasssemblies %u", nat_reass_get_max_reass (0));
722 vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (0));
723 vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (0));
724 vlib_cli_output (vm, " reassemblies:");
725 nat_ip4_reass_walk (nat_ip4_reass_walk_cli, vm);
727 vlib_cli_output (vm, "NAT IPv6 virtual fragmentation reassembly is %s",
728 nat_reass_is_drop_frag (1) ? "DISABLED" : "ENABLED");
729 vlib_cli_output (vm, " max-reasssemblies %u", nat_reass_get_max_reass (1));
730 vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (1));
731 vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (1));
732 vlib_cli_output (vm, " reassemblies:");
733 nat_ip6_reass_walk (nat_ip6_reass_walk_cli, vm);
739 VLIB_CLI_COMMAND (nat_reass_command, static) =
741 .path = "nat virtual-reassembly",
742 .short_help = "nat virtual-reassembly ip4|ip6 [max-reassemblies <n>] "
743 "[max-fragments <n>] [timeout <sec>] [enable|disable]",
744 .function = nat_reass_command_fn,
747 VLIB_CLI_COMMAND (show_nat_reass_command, static) =
749 .path = "show nat virtual-reassembly",
750 .short_help = "show nat virtual-reassembly",
751 .function = show_nat_reass_command_fn,
756 * fd.io coding-style-patch-verification: ON
759 * eval: (c-set-style "gnu")