2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 * @brief NAT plugin virtual fragmentation reassembly
20 #include <vnet/vnet.h>
21 #include <nat/nat_reass.h>
23 nat_reass_main_t nat_reass_main;
26 nat_reass_get_nbuckets (u8 is_ip6)
28 nat_reass_main_t *srm = &nat_reass_main;
33 nbuckets = (u32) (srm->ip6_max_reass / NAT_REASS_HT_LOAD_FACTOR);
35 nbuckets = (u32) (srm->ip4_max_reass / NAT_REASS_HT_LOAD_FACTOR);
37 for (i = 0; i < 31; i++)
38 if ((1 << i) >= nbuckets)
45 static_always_inline void
46 nat_ip4_reass_get_frags_inline (nat_reass_ip4_t * reass, u32 ** bi)
48 nat_reass_main_t *srm = &nat_reass_main;
53 clib_dlist_remove_head (srm->ip4_frags_list_pool,
54 reass->frags_per_reass_list_head_index)) !=
57 elt = pool_elt_at_index (srm->ip4_frags_list_pool, elt_index);
58 vec_add1 (*bi, elt->value);
60 pool_put_index (srm->ip4_frags_list_pool, elt_index);
64 static_always_inline void
65 nat_ip6_reass_get_frags_inline (nat_reass_ip6_t * reass, u32 ** bi)
67 nat_reass_main_t *srm = &nat_reass_main;
72 clib_dlist_remove_head (srm->ip6_frags_list_pool,
73 reass->frags_per_reass_list_head_index)) !=
76 elt = pool_elt_at_index (srm->ip6_frags_list_pool, elt_index);
77 vec_add1 (*bi, elt->value);
79 pool_put_index (srm->ip6_frags_list_pool, elt_index);
84 nat_reass_set (u32 timeout, u16 max_reass, u8 max_frag, u8 drop_frag,
87 nat_reass_main_t *srm = &nat_reass_main;
92 if (srm->ip6_max_reass != max_reass)
94 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
96 srm->ip6_max_reass = max_reass;
97 pool_free (srm->ip6_reass_pool);
98 pool_alloc (srm->ip6_reass_pool, srm->ip4_max_reass);
99 nbuckets = nat_reass_get_nbuckets (0);
100 clib_bihash_free_48_8 (&srm->ip6_reass_hash);
101 clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass",
102 nbuckets, nbuckets * 1024);
104 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
106 srm->ip6_timeout = timeout;
107 srm->ip6_max_frag = max_frag;
108 srm->ip6_drop_frag = drop_frag;
112 if (srm->ip4_max_reass != max_reass)
114 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
116 srm->ip4_max_reass = max_reass;
117 pool_free (srm->ip4_reass_pool);
118 pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
119 nbuckets = nat_reass_get_nbuckets (0);
120 clib_bihash_free_16_8 (&srm->ip4_reass_hash);
121 clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass",
122 nbuckets, nbuckets * 1024);
123 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
125 srm->ip4_timeout = timeout;
126 srm->ip4_max_frag = max_frag;
127 srm->ip4_drop_frag = drop_frag;
134 nat_reass_get_timeout (u8 is_ip6)
136 nat_reass_main_t *srm = &nat_reass_main;
139 return srm->ip6_timeout;
141 return srm->ip4_timeout;
145 nat_reass_get_max_reass (u8 is_ip6)
147 nat_reass_main_t *srm = &nat_reass_main;
150 return srm->ip6_max_reass;
152 return srm->ip4_max_reass;
156 nat_reass_get_max_frag (u8 is_ip6)
158 nat_reass_main_t *srm = &nat_reass_main;
161 return srm->ip6_max_frag;
163 return srm->ip4_max_frag;
167 nat_reass_is_drop_frag (u8 is_ip6)
169 nat_reass_main_t *srm = &nat_reass_main;
172 return srm->ip6_drop_frag;
174 return srm->ip4_drop_frag;
177 static_always_inline nat_reass_ip4_t *
178 nat_ip4_reass_lookup (nat_reass_ip4_key_t * k, f64 now)
180 nat_reass_main_t *srm = &nat_reass_main;
181 clib_bihash_kv_16_8_t kv, value;
182 nat_reass_ip4_t *reass;
184 kv.key[0] = k->as_u64[0];
185 kv.key[1] = k->as_u64[1];
187 if (clib_bihash_search_16_8 (&srm->ip4_reass_hash, &kv, &value))
190 reass = pool_elt_at_index (srm->ip4_reass_pool, value.value);
191 if (now < reass->last_heard + (f64) srm->ip4_timeout)
198 nat_ip4_reass_find_or_create (ip4_address_t src, ip4_address_t dst,
199 u16 frag_id, u8 proto, u8 reset_timeout,
202 nat_reass_main_t *srm = &nat_reass_main;
203 nat_reass_ip4_t *reass = 0;
204 nat_reass_ip4_key_t k;
205 f64 now = vlib_time_now (srm->vlib_main);
206 dlist_elt_t *oldest_elt, *elt;
207 dlist_elt_t *per_reass_list_head_elt;
208 u32 oldest_index, elt_index;
209 clib_bihash_kv_16_8_t kv;
211 k.src.as_u32 = src.as_u32;
212 k.dst.as_u32 = dst.as_u32;
216 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
218 reass = nat_ip4_reass_lookup (&k, now);
223 reass->last_heard = now;
224 clib_dlist_remove (srm->ip4_reass_lru_list_pool,
225 reass->lru_list_index);
226 clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
227 srm->ip4_reass_head_index,
228 reass->lru_list_index);
233 if (srm->ip4_reass_n >= srm->ip4_max_reass)
236 clib_dlist_remove_head (srm->ip4_reass_lru_list_pool,
237 srm->ip4_reass_head_index);
238 ASSERT (oldest_index != ~0);
240 pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
241 reass = pool_elt_at_index (srm->ip4_reass_pool, oldest_elt->value);
242 if (now < reass->last_heard + (f64) srm->ip4_timeout)
244 clib_dlist_addhead (srm->ip4_reass_lru_list_pool,
245 srm->ip4_reass_head_index, oldest_index);
246 clib_warning ("no free resassembly slot");
251 clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
252 srm->ip4_reass_head_index, oldest_index);
254 kv.key[0] = k.as_u64[0];
255 kv.key[1] = k.as_u64[1];
256 if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 0))
262 nat_ip4_reass_get_frags_inline (reass, bi_to_drop);
266 pool_get (srm->ip4_reass_pool, reass);
267 pool_get (srm->ip4_reass_lru_list_pool, elt);
268 reass->lru_list_index = elt_index = elt - srm->ip4_reass_lru_list_pool;
269 clib_dlist_init (srm->ip4_reass_lru_list_pool, elt_index);
270 elt->value = reass - srm->ip4_reass_pool;
271 clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
272 srm->ip4_reass_head_index, elt_index);
273 pool_get (srm->ip4_frags_list_pool, per_reass_list_head_elt);
274 reass->frags_per_reass_list_head_index =
275 per_reass_list_head_elt - srm->ip4_frags_list_pool;
276 clib_dlist_init (srm->ip4_frags_list_pool,
277 reass->frags_per_reass_list_head_index);
281 reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
282 reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
283 kv.value = reass - srm->ip4_reass_pool;
284 reass->sess_index = (u32) ~ 0;
285 reass->last_heard = now;
287 if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 1))
294 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
299 nat_ip4_reass_add_fragment (nat_reass_ip4_t * reass, u32 bi)
301 nat_reass_main_t *srm = &nat_reass_main;
305 if (reass->frag_n >= srm->ip4_max_frag)
308 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
310 pool_get (srm->ip4_frags_list_pool, elt);
311 elt_index = elt - srm->ip4_frags_list_pool;
312 clib_dlist_init (srm->ip4_frags_list_pool, elt_index);
314 clib_dlist_addtail (srm->ip4_frags_list_pool,
315 reass->frags_per_reass_list_head_index, elt_index);
318 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
324 nat_ip4_reass_get_frags (nat_reass_ip4_t * reass, u32 ** bi)
326 nat_reass_main_t *srm = &nat_reass_main;
328 clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
330 nat_ip4_reass_get_frags_inline (reass, bi);
332 clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
336 nat_ip4_reass_walk (nat_ip4_reass_walk_fn_t fn, void *ctx)
338 nat_reass_ip4_t *reass;
339 nat_reass_main_t *srm = &nat_reass_main;
340 f64 now = vlib_time_now (srm->vlib_main);
343 pool_foreach (reass, srm->ip4_reass_pool,
345 if (now < reass->last_heard + (f64) srm->ip4_timeout)
354 static_always_inline nat_reass_ip6_t *
355 nat_ip6_reass_lookup (nat_reass_ip6_key_t * k, f64 now)
357 nat_reass_main_t *srm = &nat_reass_main;
358 clib_bihash_kv_48_8_t kv, value;
359 nat_reass_ip6_t *reass;
362 kv.key[0] = k->as_u64[0];
363 kv.key[1] = k->as_u64[1];
364 kv.key[2] = k->as_u64[2];
365 kv.key[3] = k->as_u64[3];
366 kv.key[4] = k->as_u64[4];
367 kv.key[5] = k->as_u64[5];
369 if (clib_bihash_search_48_8 (&srm->ip6_reass_hash, &kv, &value))
372 reass = pool_elt_at_index (srm->ip6_reass_pool, value.value);
373 if (now < reass->last_heard + (f64) srm->ip6_timeout)
380 nat_ip6_reass_find_or_create (ip6_address_t src, ip6_address_t dst,
381 u32 frag_id, u8 proto, u8 reset_timeout,
384 nat_reass_main_t *srm = &nat_reass_main;
385 nat_reass_ip6_t *reass = 0;
386 nat_reass_ip6_key_t k;
387 f64 now = vlib_time_now (srm->vlib_main);
388 dlist_elt_t *oldest_elt, *elt;
389 dlist_elt_t *per_reass_list_head_elt;
390 u32 oldest_index, elt_index;
391 clib_bihash_kv_48_8_t kv;
393 k.src.as_u64[0] = src.as_u64[0];
394 k.src.as_u64[1] = src.as_u64[1];
395 k.dst.as_u64[0] = dst.as_u64[0];
396 k.dst.as_u64[1] = dst.as_u64[1];
401 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
403 reass = nat_ip6_reass_lookup (&k, now);
408 reass->last_heard = now;
409 clib_dlist_remove (srm->ip6_reass_lru_list_pool,
410 reass->lru_list_index);
411 clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
412 srm->ip6_reass_head_index,
413 reass->lru_list_index);
418 if (srm->ip6_reass_n >= srm->ip6_max_reass)
421 clib_dlist_remove_head (srm->ip6_reass_lru_list_pool,
422 srm->ip6_reass_head_index);
423 ASSERT (oldest_index != ~0);
425 pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
426 reass = pool_elt_at_index (srm->ip6_reass_pool, oldest_elt->value);
427 if (now < reass->last_heard + (f64) srm->ip6_timeout)
429 clib_dlist_addhead (srm->ip6_reass_lru_list_pool,
430 srm->ip6_reass_head_index, oldest_index);
431 clib_warning ("no free resassembly slot");
436 clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
437 srm->ip6_reass_head_index, oldest_index);
439 kv.key[0] = k.as_u64[0];
440 kv.key[1] = k.as_u64[1];
441 kv.key[2] = k.as_u64[2];
442 kv.key[3] = k.as_u64[4];
443 kv.key[4] = k.as_u64[5];
444 if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 0))
450 nat_ip6_reass_get_frags_inline (reass, bi_to_drop);
454 pool_get (srm->ip6_reass_pool, reass);
455 pool_get (srm->ip6_reass_lru_list_pool, elt);
456 reass->lru_list_index = elt_index = elt - srm->ip6_reass_lru_list_pool;
457 clib_dlist_init (srm->ip6_reass_lru_list_pool, elt_index);
458 elt->value = reass - srm->ip6_reass_pool;
459 clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
460 srm->ip6_reass_head_index, elt_index);
461 pool_get (srm->ip6_frags_list_pool, per_reass_list_head_elt);
462 reass->frags_per_reass_list_head_index =
463 per_reass_list_head_elt - srm->ip6_frags_list_pool;
464 clib_dlist_init (srm->ip6_frags_list_pool,
465 reass->frags_per_reass_list_head_index);
469 reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
470 reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
471 reass->key.as_u64[2] = kv.key[2] = k.as_u64[2];
472 reass->key.as_u64[3] = kv.key[3] = k.as_u64[3];
473 reass->key.as_u64[4] = kv.key[4] = k.as_u64[4];
474 reass->key.as_u64[5] = kv.key[5] = k.as_u64[5];
475 kv.value = reass - srm->ip6_reass_pool;
476 reass->sess_index = (u32) ~ 0;
477 reass->last_heard = now;
479 if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 1))
486 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
491 nat_ip6_reass_add_fragment (nat_reass_ip6_t * reass, u32 bi)
493 nat_reass_main_t *srm = &nat_reass_main;
497 if (reass->frag_n >= srm->ip6_max_frag)
500 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
502 pool_get (srm->ip6_frags_list_pool, elt);
503 elt_index = elt - srm->ip6_frags_list_pool;
504 clib_dlist_init (srm->ip6_frags_list_pool, elt_index);
506 clib_dlist_addtail (srm->ip6_frags_list_pool,
507 reass->frags_per_reass_list_head_index, elt_index);
510 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
516 nat_ip6_reass_get_frags (nat_reass_ip6_t * reass, u32 ** bi)
518 nat_reass_main_t *srm = &nat_reass_main;
520 clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
522 nat_ip6_reass_get_frags_inline (reass, bi);
524 clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
528 nat_ip6_reass_walk (nat_ip6_reass_walk_fn_t fn, void *ctx)
530 nat_reass_ip6_t *reass;
531 nat_reass_main_t *srm = &nat_reass_main;
532 f64 now = vlib_time_now (srm->vlib_main);
535 pool_foreach (reass, srm->ip6_reass_pool,
537 if (now < reass->last_heard + (f64) srm->ip4_timeout)
547 nat_reass_init (vlib_main_t * vm)
549 nat_reass_main_t *srm = &nat_reass_main;
550 vlib_thread_main_t *tm = vlib_get_thread_main ();
551 clib_error_t *error = 0;
553 u32 nbuckets, head_index;
556 srm->vnet_main = vnet_get_main ();
559 srm->ip4_timeout = NAT_REASS_TIMEOUT_DEFAULT;
560 srm->ip4_max_reass = NAT_MAX_REASS_DEAFULT;
561 srm->ip4_max_frag = NAT_MAX_FRAG_DEFAULT;
562 srm->ip4_drop_frag = 0;
563 srm->ip4_reass_n = 0;
565 if (tm->n_vlib_mains > 1)
566 clib_spinlock_init (&srm->ip4_reass_lock);
568 pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
570 nbuckets = nat_reass_get_nbuckets (0);
571 clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass", nbuckets,
574 pool_get (srm->ip4_reass_lru_list_pool, head);
575 srm->ip4_reass_head_index = head_index =
576 head - srm->ip4_reass_lru_list_pool;
577 clib_dlist_init (srm->ip4_reass_lru_list_pool, head_index);
580 srm->ip6_timeout = NAT_REASS_TIMEOUT_DEFAULT;
581 srm->ip6_max_reass = NAT_MAX_REASS_DEAFULT;
582 srm->ip6_max_frag = NAT_MAX_FRAG_DEFAULT;
583 srm->ip6_drop_frag = 0;
584 srm->ip6_reass_n = 0;
586 if (tm->n_vlib_mains > 1)
587 clib_spinlock_init (&srm->ip6_reass_lock);
589 pool_alloc (srm->ip6_reass_pool, srm->ip6_max_reass);
591 nbuckets = nat_reass_get_nbuckets (1);
592 clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass", nbuckets,
595 pool_get (srm->ip6_reass_lru_list_pool, head);
596 srm->ip6_reass_head_index = head_index =
597 head - srm->ip6_reass_lru_list_pool;
598 clib_dlist_init (srm->ip6_reass_lru_list_pool, head_index);
603 static clib_error_t *
604 nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
605 vlib_cli_command_t * cmd)
607 clib_error_t *error = 0;
608 unformat_input_t _line_input, *line_input = &_line_input;
609 u32 timeout = 0, max_reass = 0, max_frag = 0;
610 u8 drop_frag = (u8) ~ 0, is_ip6 = 0;
613 /* Get a line of input. */
614 if (!unformat_user (input, unformat_line_input, line_input))
617 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
619 if (unformat (line_input, "max-reassemblies %u", &max_reass))
621 else if (unformat (line_input, "max-fragments %u", &max_frag))
623 else if (unformat (line_input, "timeout %u", &timeout))
625 else if (unformat (line_input, "enable"))
627 else if (unformat (line_input, "disable"))
629 else if (unformat (line_input, "ip4"))
631 else if (unformat (line_input, "ip6"))
635 error = clib_error_return (0, "unknown input '%U'",
636 format_unformat_error, line_input);
642 timeout = nat_reass_get_timeout (is_ip6);
644 max_reass = nat_reass_get_max_reass (is_ip6);
646 max_frag = nat_reass_get_max_frag (is_ip6);
647 if (drop_frag == (u8) ~ 0)
648 drop_frag = nat_reass_is_drop_frag (is_ip6);
651 nat_reass_set (timeout, (u16) max_reass, (u8) max_frag, drop_frag,
655 error = clib_error_return (0, "nat_set_reass return %d", rv);
660 unformat_free (line_input);
666 nat_ip4_reass_walk_cli (nat_reass_ip4_t * reass, void *ctx)
668 vlib_main_t *vm = ctx;
670 vlib_cli_output (vm, " src %U dst %U proto %u id 0x%04x cached %u",
671 format_ip4_address, &reass->key.src,
672 format_ip4_address, &reass->key.dst,
674 clib_net_to_host_u16 (reass->key.frag_id), reass->frag_n);
680 nat_ip6_reass_walk_cli (nat_reass_ip6_t * reass, void *ctx)
682 vlib_main_t *vm = ctx;
684 vlib_cli_output (vm, " src %U dst %U proto %u id 0x%08x cached %u",
685 format_ip6_address, &reass->key.src,
686 format_ip6_address, &reass->key.dst,
688 clib_net_to_host_u32 (reass->key.frag_id), reass->frag_n);
693 static clib_error_t *
694 show_nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
695 vlib_cli_command_t * cmd)
697 vlib_cli_output (vm, "NAT IPv4 virtual fragmentation reassembly is %s",
698 nat_reass_is_drop_frag (0) ? "DISABLED" : "ENABLED");
699 vlib_cli_output (vm, " max-reasssemblies %u", nat_reass_get_max_reass (0));
700 vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (0));
701 vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (0));
702 vlib_cli_output (vm, " reassemblies:");
703 nat_ip4_reass_walk (nat_ip4_reass_walk_cli, vm);
705 vlib_cli_output (vm, "NAT IPv6 virtual fragmentation reassembly is %s",
706 nat_reass_is_drop_frag (1) ? "DISABLED" : "ENABLED");
707 vlib_cli_output (vm, " max-reasssemblies %u", nat_reass_get_max_reass (1));
708 vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (1));
709 vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (1));
710 vlib_cli_output (vm, " reassemblies:");
711 nat_ip6_reass_walk (nat_ip6_reass_walk_cli, vm);
717 VLIB_CLI_COMMAND (nat_reass_command, static) =
719 .path = "nat virtual-reassembly",
720 .short_help = "nat virtual-reassembly ip4|ip6 [max-reassemblies <n>] "
721 "[max-fragments <n>] [timeout <sec>] [enable|disable]",
722 .function = nat_reass_command_fn,
725 VLIB_CLI_COMMAND (show_nat_reass_command, static) =
727 .path = "show nat virtual-reassembly",
728 .short_help = "show nat virtual-reassembly",
729 .function = show_nat_reass_command_fn,
734 * fd.io coding-style-patch-verification: ON
737 * eval: (c-set-style "gnu")