api: clean up use of deprecated flag
[vpp.git] / src / plugins / nat / nat.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/vnet.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/ip/ip4.h>
21 #include <vnet/plugin/plugin.h>
22 #include <nat/nat.h>
23 #include <nat/nat_dpo.h>
24 #include <nat/nat_ipfix_logging.h>
25 #include <nat/nat64.h>
26 #include <nat/nat_inlines.h>
27 #include <nat/nat44/inlines.h>
28 #include <nat/nat_affinity.h>
29 #include <nat/nat_syslog.h>
30 #include <nat/nat_ha.h>
31 #include <vnet/fib/fib_table.h>
32 #include <vnet/fib/ip4_fib.h>
33 #include <vnet/ip/reass/ip4_sv_reass.h>
34 #include <vppinfra/bihash_16_8.h>
35 #include <nat/nat44/ed_inlines.h>
36
37 #include <vpp/app/version.h>
38
39 snat_main_t snat_main;
40
41 fib_source_t nat_fib_src_hi;
42 fib_source_t nat_fib_src_low;
43
44 /* *INDENT-OFF* */
45 /* Hook up input features */
46 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
47   .arc_name = "ip4-unicast",
48   .node_name = "nat-pre-in2out",
49   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
50                                "ip4-sv-reassembly-feature"),
51 };
52 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
53   .arc_name = "ip4-unicast",
54   .node_name = "nat-pre-out2in",
55   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
56                                "ip4-dhcp-client-detect",
57                                "ip4-sv-reassembly-feature"),
58 };
59 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
60   .arc_name = "ip4-unicast",
61   .node_name = "nat44-in2out-worker-handoff",
62   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
63 };
64 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
65   .arc_name = "ip4-unicast",
66   .node_name = "nat44-out2in-worker-handoff",
67   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
68                                "ip4-dhcp-client-detect"),
69 };
70 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
71   .arc_name = "ip4-unicast",
72   .node_name = "nat44-in2out",
73   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
74 };
75 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
76   .arc_name = "ip4-unicast",
77   .node_name = "nat44-out2in",
78   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
79                                "ip4-dhcp-client-detect"),
80 };
81 VNET_FEATURE_INIT (ip4_nat_classify, static) = {
82   .arc_name = "ip4-unicast",
83   .node_name = "nat44-classify",
84   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
85 };
86 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
87   .arc_name = "ip4-unicast",
88   .node_name = "nat44-ed-in2out",
89   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
90 };
91 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
92   .arc_name = "ip4-unicast",
93   .node_name = "nat44-ed-out2in",
94   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
95                                "ip4-dhcp-client-detect"),
96 };
97 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
98   .arc_name = "ip4-unicast",
99   .node_name = "nat44-ed-classify",
100   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
101 };
102 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
103   .arc_name = "ip4-unicast",
104   .node_name = "nat44-handoff-classify",
105   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
106 };
107 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
108   .arc_name = "ip4-unicast",
109   .node_name = "nat44-in2out-fast",
110   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
111 };
112 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
113   .arc_name = "ip4-unicast",
114   .node_name = "nat44-out2in-fast",
115   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
116                                "ip4-dhcp-client-detect"),
117 };
118 VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = {
119   .arc_name = "ip4-unicast",
120   .node_name = "nat44-hairpin-dst",
121   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
122 };
123 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_dst, static) = {
124   .arc_name = "ip4-unicast",
125   .node_name = "nat44-ed-hairpin-dst",
126   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
127 };
128
129 /* Hook up output features */
130 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
131   .arc_name = "ip4-output",
132   .node_name = "nat44-in2out-output",
133   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
134 };
135 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
136   .arc_name = "ip4-output",
137   .node_name = "nat44-in2out-output-worker-handoff",
138   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
139 };
140 VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = {
141   .arc_name = "ip4-output",
142   .node_name = "nat44-hairpin-src",
143   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
144 };
145 VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
146   .arc_name = "ip4-output",
147   .node_name = "nat-pre-in2out-output",
148   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
149   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
150 };
151 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
152   .arc_name = "ip4-output",
153   .node_name = "nat44-ed-in2out-output",
154   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
155   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
156 };
157 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_src, static) = {
158   .arc_name = "ip4-output",
159   .node_name = "nat44-ed-hairpin-src",
160   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
161   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
162 };
163
164 /* Hook up ip4-local features */
165 VNET_FEATURE_INIT (ip4_nat_hairpinning, static) =
166 {
167   .arc_name = "ip4-local",
168   .node_name = "nat44-hairpinning",
169   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
170 };
171 VNET_FEATURE_INIT (ip4_nat44_ed_hairpinning, static) =
172 {
173   .arc_name = "ip4-local",
174   .node_name = "nat44-ed-hairpinning",
175   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
176 };
177
178
179 VLIB_PLUGIN_REGISTER () = {
180     .version = VPP_BUILD_VER,
181     .description = "Network Address Translation (NAT)",
182 };
183 /* *INDENT-ON* */
184
185 void
186 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
187                        u8 is_ha)
188 {
189   clib_bihash_kv_8_8_t kv;
190   u8 proto;
191   u16 r_port, l_port;
192   ip4_address_t *l_addr, *r_addr;
193   u32 fib_index = 0;
194   clib_bihash_kv_16_8_t ed_kv;
195   snat_main_per_thread_data_t *tsm =
196     vec_elt_at_index (sm->per_thread_data, thread_index);
197
198   if (is_ed_session (s))
199     {
200       per_vrf_sessions_unregister_session (s, thread_index);
201     }
202
203   if (is_fwd_bypass_session (s))
204     {
205       if (snat_is_unk_proto_session (s))
206         {
207           init_ed_k (&ed_kv, s->in2out.addr, 0, s->ext_host_addr, 0, 0,
208                      s->in2out.port);
209         }
210       else
211         {
212           l_port = s->in2out.port;
213           r_port = s->ext_host_port;
214           l_addr = &s->in2out.addr;
215           r_addr = &s->ext_host_addr;
216           proto = nat_proto_to_ip_proto (s->nat_proto);
217           init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index,
218                      proto);
219         }
220       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
221         nat_elog_warn ("in2out_ed key del failed");
222       return;
223     }
224
225   /* session lookup tables */
226   if (is_ed_session (s))
227     {
228       if (is_affinity_sessions (s))
229         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
230                              s->nat_proto, s->out2in.port);
231       l_addr = &s->out2in.addr;
232       r_addr = &s->ext_host_addr;
233       fib_index = s->out2in.fib_index;
234       if (snat_is_unk_proto_session (s))
235         {
236           proto = s->in2out.port;
237           r_port = 0;
238           l_port = 0;
239         }
240       else
241         {
242           proto = nat_proto_to_ip_proto (s->nat_proto);
243           l_port = s->out2in.port;
244           r_port = s->ext_host_port;
245         }
246       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
247       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0))
248         nat_elog_warn ("out2in_ed key del failed");
249       l_addr = &s->in2out.addr;
250       fib_index = s->in2out.fib_index;
251       if (!snat_is_unk_proto_session (s))
252         l_port = s->in2out.port;
253       if (is_twice_nat_session (s))
254         {
255           r_addr = &s->ext_host_nat_addr;
256           r_port = s->ext_host_nat_port;
257         }
258       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
259       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
260         nat_elog_warn ("in2out_ed key del failed");
261
262       if (!is_ha)
263         nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
264                                &s->in2out.addr, s->in2out.port,
265                                &s->ext_host_nat_addr, s->ext_host_nat_port,
266                                &s->out2in.addr, s->out2in.port,
267                                &s->ext_host_addr, s->ext_host_port,
268                                s->nat_proto, is_twice_nat_session (s));
269     }
270   else
271     {
272       init_nat_i2o_k (&kv, s);
273       if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0))
274         nat_elog_warn ("in2out key del failed");
275       init_nat_o2i_k (&kv, s);
276       if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0))
277         nat_elog_warn ("out2in key del failed");
278
279       if (!is_ha)
280         nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index,
281                                  &s->in2out.addr, s->in2out.port,
282                                  &s->out2in.addr, s->out2in.port,
283                                  s->nat_proto);
284     }
285
286   if (snat_is_unk_proto_session (s))
287     return;
288
289   if (!is_ha)
290     {
291       /* log NAT event */
292       snat_ipfix_logging_nat44_ses_delete (thread_index,
293                                            s->in2out.addr.as_u32,
294                                            s->out2in.addr.as_u32,
295                                            s->nat_proto,
296                                            s->in2out.port,
297                                            s->out2in.port,
298                                            s->in2out.fib_index);
299
300       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
301                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
302                    thread_index);
303     }
304
305   /* Twice NAT address and port for external host */
306   if (is_twice_nat_session (s))
307     {
308       snat_free_outside_address_and_port (sm->twice_nat_addresses,
309                                           thread_index,
310                                           &s->ext_host_nat_addr,
311                                           s->ext_host_nat_port, s->nat_proto);
312     }
313
314   if (snat_is_session_static (s))
315     return;
316
317   snat_free_outside_address_and_port (sm->addresses, thread_index,
318                                       &s->out2in.addr, s->out2in.port,
319                                       s->nat_proto);
320 }
321
322 void
323 nat44_free_session_data (snat_main_t * sm, snat_session_t * s,
324                          u32 thread_index, u8 is_ha)
325 {
326   u8 proto;
327   u16 r_port, l_port;
328   ip4_address_t *l_addr, *r_addr;
329   u32 fib_index;
330   clib_bihash_kv_16_8_t ed_kv;
331   snat_main_per_thread_data_t *tsm =
332     vec_elt_at_index (sm->per_thread_data, thread_index);
333
334   if (is_fwd_bypass_session (s))
335     {
336       if (snat_is_unk_proto_session (s))
337         {
338           proto = s->in2out.port;
339           r_port = 0;
340           l_port = 0;
341         }
342       else
343         {
344           proto = nat_proto_to_ip_proto (s->nat_proto);
345           l_port = s->in2out.port;
346           r_port = s->ext_host_port;
347         }
348
349       l_addr = &s->in2out.addr;
350       r_addr = &s->ext_host_addr;
351       fib_index = 0;
352       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
353
354       if (PREDICT_FALSE
355           (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)))
356         nat_elog_warn ("in2out_ed key del failed");
357       return;
358     }
359
360   /* session lookup tables */
361   if (is_affinity_sessions (s))
362     nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
363                          s->nat_proto, s->out2in.port);
364   l_addr = &s->out2in.addr;
365   r_addr = &s->ext_host_addr;
366   fib_index = s->out2in.fib_index;
367   if (snat_is_unk_proto_session (s))
368     {
369       proto = s->in2out.port;
370       r_port = 0;
371       l_port = 0;
372     }
373   else
374     {
375       proto = nat_proto_to_ip_proto (s->nat_proto);
376       l_port = s->out2in.port;
377       r_port = s->ext_host_port;
378     }
379   init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
380
381   if (PREDICT_FALSE (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0)))
382     nat_elog_warn ("out2in_ed key del failed");
383
384   l_addr = &s->in2out.addr;
385   fib_index = s->in2out.fib_index;
386
387   if (!snat_is_unk_proto_session (s))
388     l_port = s->in2out.port;
389
390   if (is_twice_nat_session (s))
391     {
392       r_addr = &s->ext_host_nat_addr;
393       r_port = s->ext_host_nat_port;
394     }
395   init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
396
397   if (PREDICT_FALSE (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)))
398     nat_elog_warn ("in2out_ed key del failed");
399
400   if (!is_ha)
401     {
402       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
403                              &s->in2out.addr, s->in2out.port,
404                              &s->ext_host_nat_addr, s->ext_host_nat_port,
405                              &s->out2in.addr, s->out2in.port,
406                              &s->ext_host_addr, s->ext_host_port,
407                              s->nat_proto, is_twice_nat_session (s));
408     }
409
410   if (snat_is_unk_proto_session (s))
411     return;
412
413   if (!is_ha)
414     {
415       snat_ipfix_logging_nat44_ses_delete (thread_index,
416                                            s->in2out.addr.as_u32,
417                                            s->out2in.addr.as_u32,
418                                            s->nat_proto,
419                                            s->in2out.port,
420                                            s->out2in.port,
421                                            s->in2out.fib_index);
422       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
423                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
424                    thread_index);
425     }
426
427   /* Twice NAT address and port for external host */
428   if (is_twice_nat_session (s))
429     {
430       snat_free_outside_address_and_port (sm->twice_nat_addresses,
431                                           thread_index,
432                                           &s->ext_host_nat_addr,
433                                           s->ext_host_nat_port, s->nat_proto);
434     }
435
436   if (snat_is_session_static (s))
437     return;
438
439   snat_free_outside_address_and_port (sm->addresses, thread_index,
440                                       &s->out2in.addr, s->out2in.port,
441                                       s->nat_proto);
442 }
443
444
445 snat_user_t *
446 nat_user_get_or_create (snat_main_t * sm, ip4_address_t * addr, u32 fib_index,
447                         u32 thread_index)
448 {
449   snat_user_t *u = 0;
450   snat_user_key_t user_key;
451   clib_bihash_kv_8_8_t kv, value;
452   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
453   dlist_elt_t *per_user_list_head_elt;
454
455   user_key.addr.as_u32 = addr->as_u32;
456   user_key.fib_index = fib_index;
457   kv.key = user_key.as_u64;
458
459   /* Ever heard of the "user" = src ip4 address before? */
460   if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
461     {
462       if (pool_elts (tsm->users) >= sm->max_users_per_thread)
463         {
464           vlib_increment_simple_counter (&sm->user_limit_reached,
465                                          thread_index, 0, 1);
466           nat_elog_warn ("maximum user limit reached");
467           return NULL;
468         }
469       /* no, make a new one */
470       pool_get (tsm->users, u);
471       clib_memset (u, 0, sizeof (*u));
472
473       u->addr.as_u32 = addr->as_u32;
474       u->fib_index = fib_index;
475
476       pool_get (tsm->list_pool, per_user_list_head_elt);
477
478       u->sessions_per_user_list_head_index = per_user_list_head_elt -
479         tsm->list_pool;
480
481       clib_dlist_init (tsm->list_pool, u->sessions_per_user_list_head_index);
482
483       kv.value = u - tsm->users;
484
485       /* add user */
486       if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
487         {
488           nat_elog_warn ("user_hash key add failed");
489           nat44_delete_user_with_no_session (sm, u, thread_index);
490           return NULL;
491         }
492
493       vlib_set_simple_counter (&sm->total_users, thread_index, 0,
494                                pool_elts (tsm->users));
495     }
496   else
497     {
498       u = pool_elt_at_index (tsm->users, value.value);
499     }
500
501   return u;
502 }
503
504 snat_session_t *
505 nat_session_alloc_or_recycle (snat_main_t * sm, snat_user_t * u,
506                               u32 thread_index, f64 now)
507 {
508   snat_session_t *s;
509   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
510   u32 oldest_per_user_translation_list_index, session_index;
511   dlist_elt_t *oldest_per_user_translation_list_elt;
512   dlist_elt_t *per_user_translation_list_elt;
513
514   /* Over quota? Recycle the least recently used translation */
515   if ((u->nsessions + u->nstaticsessions) >= sm->max_translations_per_user)
516     {
517       oldest_per_user_translation_list_index =
518         clib_dlist_remove_head (tsm->list_pool,
519                                 u->sessions_per_user_list_head_index);
520
521       ASSERT (oldest_per_user_translation_list_index != ~0);
522
523       /* Add it back to the end of the LRU list */
524       clib_dlist_addtail (tsm->list_pool,
525                           u->sessions_per_user_list_head_index,
526                           oldest_per_user_translation_list_index);
527       /* Get the list element */
528       oldest_per_user_translation_list_elt =
529         pool_elt_at_index (tsm->list_pool,
530                            oldest_per_user_translation_list_index);
531
532       /* Get the session index from the list element */
533       session_index = oldest_per_user_translation_list_elt->value;
534
535       /* Get the session */
536       s = pool_elt_at_index (tsm->sessions, session_index);
537       nat_free_session_data (sm, s, thread_index, 0);
538       if (snat_is_session_static (s))
539         u->nstaticsessions--;
540       else
541         u->nsessions--;
542       s->flags = 0;
543       s->total_bytes = 0;
544       s->total_pkts = 0;
545       s->state = 0;
546       s->ext_host_addr.as_u32 = 0;
547       s->ext_host_port = 0;
548       s->ext_host_nat_addr.as_u32 = 0;
549       s->ext_host_nat_port = 0;
550     }
551   else
552     {
553       pool_get (tsm->sessions, s);
554       clib_memset (s, 0, sizeof (*s));
555
556       /* Create list elts */
557       pool_get (tsm->list_pool, per_user_translation_list_elt);
558       clib_dlist_init (tsm->list_pool,
559                        per_user_translation_list_elt - tsm->list_pool);
560
561       per_user_translation_list_elt->value = s - tsm->sessions;
562       s->per_user_index = per_user_translation_list_elt - tsm->list_pool;
563       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
564
565       clib_dlist_addtail (tsm->list_pool,
566                           s->per_user_list_head_index,
567                           per_user_translation_list_elt - tsm->list_pool);
568
569       s->user_index = u - tsm->users;
570       vlib_set_simple_counter (&sm->total_sessions, thread_index, 0,
571                                pool_elts (tsm->sessions));
572     }
573
574   s->ha_last_refreshed = now;
575
576   return s;
577 }
578
579 void
580 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
581                           int is_add)
582 {
583   fib_prefix_t prefix = {
584     .fp_len = p_len,
585     .fp_proto = FIB_PROTOCOL_IP4,
586     .fp_addr = {
587                 .ip4.as_u32 = addr->as_u32,
588                 },
589   };
590   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
591
592   if (is_add)
593     fib_table_entry_update_one_path (fib_index,
594                                      &prefix,
595                                      nat_fib_src_low,
596                                      (FIB_ENTRY_FLAG_CONNECTED |
597                                       FIB_ENTRY_FLAG_LOCAL |
598                                       FIB_ENTRY_FLAG_EXCLUSIVE),
599                                      DPO_PROTO_IP4,
600                                      NULL,
601                                      sw_if_index,
602                                      ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
603   else
604     fib_table_entry_delete (fib_index, &prefix, nat_fib_src_low);
605 }
606
607 int
608 snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
609                   u8 twice_nat)
610 {
611   snat_address_t *ap;
612   snat_interface_t *i;
613   vlib_thread_main_t *tm = vlib_get_thread_main ();
614
615   if (twice_nat && !sm->endpoint_dependent)
616     return VNET_API_ERROR_FEATURE_DISABLED;
617
618   /* Check if address already exists */
619   /* *INDENT-OFF* */
620   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
621     {
622       if (ap->addr.as_u32 == addr->as_u32)
623         return VNET_API_ERROR_VALUE_EXIST;
624     }
625   /* *INDENT-ON* */
626
627   if (twice_nat)
628     vec_add2 (sm->twice_nat_addresses, ap, 1);
629   else
630     vec_add2 (sm->addresses, ap, 1);
631
632   ap->addr = *addr;
633   if (vrf_id != ~0)
634     ap->fib_index =
635       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
636                                          nat_fib_src_low);
637   else
638     ap->fib_index = ~0;
639 #define _(N, i, n, s) \
640   clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
641   ap->busy_##n##_ports = 0; \
642   ap->busy_##n##_ports_per_thread = 0;\
643   vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
644   foreach_nat_protocol
645 #undef _
646     if (twice_nat)
647     return 0;
648
649   /* Add external address to FIB */
650   /* *INDENT-OFF* */
651   pool_foreach (i, sm->interfaces,
652   ({
653     if (nat_interface_is_inside(i) || sm->out2in_dpo)
654       continue;
655
656     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
657     break;
658   }));
659   pool_foreach (i, sm->output_feature_interfaces,
660   ({
661     if (nat_interface_is_inside(i) || sm->out2in_dpo)
662       continue;
663
664     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
665     break;
666   }));
667   /* *INDENT-ON* */
668
669   return 0;
670 }
671
672 static int
673 is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
674 {
675   snat_static_mapping_t *m;
676   /* *INDENT-OFF* */
677   pool_foreach (m, sm->static_mappings,
678   ({
679       if (is_addr_only_static_mapping (m) ||
680           is_out2in_only_static_mapping (m) ||
681           is_identity_static_mapping (m))
682         continue;
683       if (m->external_addr.as_u32 == addr.as_u32)
684         return 1;
685   }));
686   /* *INDENT-ON* */
687
688   return 0;
689 }
690
691 static void
692 snat_add_static_mapping_when_resolved (snat_main_t * sm,
693                                        ip4_address_t l_addr,
694                                        u16 l_port,
695                                        u32 sw_if_index,
696                                        u16 e_port,
697                                        u32 vrf_id,
698                                        nat_protocol_t proto,
699                                        int addr_only, int is_add, u8 * tag,
700                                        int twice_nat, int out2in_only,
701                                        int identity_nat,
702                                        ip4_address_t pool_addr, int exact)
703 {
704   snat_static_map_resolve_t *rp;
705
706   vec_add2 (sm->to_resolve, rp, 1);
707   rp->l_addr.as_u32 = l_addr.as_u32;
708   rp->l_port = l_port;
709   rp->sw_if_index = sw_if_index;
710   rp->e_port = e_port;
711   rp->vrf_id = vrf_id;
712   rp->proto = proto;
713   rp->addr_only = addr_only;
714   rp->is_add = is_add;
715   rp->twice_nat = twice_nat;
716   rp->out2in_only = out2in_only;
717   rp->identity_nat = identity_nat;
718   rp->tag = vec_dup (tag);
719   rp->pool_addr = pool_addr;
720   rp->exact = exact;
721 }
722
723 static u32
724 get_thread_idx_by_port (u16 e_port)
725 {
726   snat_main_t *sm = &snat_main;
727   u32 thread_idx = sm->num_workers;
728   if (sm->num_workers > 1)
729     {
730       thread_idx =
731         sm->first_worker_index +
732         sm->workers[(e_port - 1024) / sm->port_per_thread];
733     }
734   return thread_idx;
735 }
736
737 void
738 snat_static_mapping_del_sessions (snat_main_t * sm,
739                                   snat_main_per_thread_data_t * tsm,
740                                   snat_user_key_t u_key, int addr_only,
741                                   ip4_address_t e_addr, u16 e_port)
742 {
743   clib_bihash_kv_8_8_t kv, value;
744   kv.key = u_key.as_u64;
745   u64 user_index;
746   dlist_elt_t *head, *elt;
747   snat_user_t *u;
748   snat_session_t *s;
749   u32 elt_index, head_index, ses_index;
750   if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
751     {
752       user_index = value.value;
753       u = pool_elt_at_index (tsm->users, user_index);
754       if (u->nstaticsessions)
755         {
756           head_index = u->sessions_per_user_list_head_index;
757           head = pool_elt_at_index (tsm->list_pool, head_index);
758           elt_index = head->next;
759           elt = pool_elt_at_index (tsm->list_pool, elt_index);
760           ses_index = elt->value;
761           while (ses_index != ~0)
762             {
763               s = pool_elt_at_index (tsm->sessions, ses_index);
764               elt = pool_elt_at_index (tsm->list_pool, elt->next);
765               ses_index = elt->value;
766
767               if (!addr_only)
768                 {
769                   if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
770                       (s->out2in.port != e_port))
771                     continue;
772                 }
773
774               if (is_lb_session (s))
775                 continue;
776
777               if (!snat_is_session_static (s))
778                 continue;
779
780               nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
781               nat44_delete_session (sm, s, tsm - sm->per_thread_data);
782
783               if (!addr_only)
784                 break;
785             }
786         }
787     }
788 }
789
790 void
791 snat_ed_static_mapping_del_sessions (snat_main_t * sm,
792                                      snat_main_per_thread_data_t * tsm,
793                                      ip4_address_t l_addr,
794                                      u16 l_port,
795                                      u8 protocol,
796                                      u32 fib_index, int addr_only,
797                                      ip4_address_t e_addr, u16 e_port)
798 {
799   snat_session_t *s;
800   u32 *indexes_to_free = NULL;
801   /* *INDENT-OFF* */
802   pool_foreach (s, tsm->sessions, {
803     if (s->in2out.fib_index != fib_index ||
804         s->in2out.addr.as_u32 != l_addr.as_u32)
805       {
806         continue;
807       }
808     if (!addr_only)
809       {
810         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
811             s->out2in.port != e_port ||
812             s->in2out.port != l_port ||
813             s->nat_proto != protocol)
814           continue;
815       }
816
817     if (is_lb_session (s))
818       continue;
819     if (!snat_is_session_static (s))
820       continue;
821     nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
822     vec_add1 (indexes_to_free, s - tsm->sessions);
823     if (!addr_only)
824       break;
825   });
826   /* *INDENT-ON* */
827   u32 *ses_index;
828   vec_foreach (ses_index, indexes_to_free)
829   {
830     s = pool_elt_at_index (tsm->sessions, *ses_index);
831     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
832   }
833   vec_free (indexes_to_free);
834 }
835
836 int
837 snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
838                          u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
839                          u32 sw_if_index, nat_protocol_t proto, int is_add,
840                          twice_nat_type_t twice_nat, u8 out2in_only, u8 * tag,
841                          u8 identity_nat, ip4_address_t pool_addr, int exact)
842 {
843   snat_main_t *sm = &snat_main;
844   snat_static_mapping_t *m;
845   clib_bihash_kv_8_8_t kv, value;
846   snat_address_t *a = 0;
847   u32 fib_index = ~0;
848   snat_interface_t *interface;
849   int i;
850   snat_main_per_thread_data_t *tsm;
851   snat_user_key_t u_key;
852   snat_user_t *u;
853   dlist_elt_t *head, *elt;
854   u32 elt_index, head_index;
855   u32 ses_index;
856   u64 user_index;
857   snat_session_t *s;
858   snat_static_map_resolve_t *rp, *rp_match = 0;
859   nat44_lb_addr_port_t *local;
860   u32 find = ~0;
861
862   if (!sm->endpoint_dependent)
863     {
864       if (twice_nat || out2in_only)
865         return VNET_API_ERROR_FEATURE_DISABLED;
866     }
867
868   /* If the external address is a specific interface address */
869   if (sw_if_index != ~0)
870     {
871       ip4_address_t *first_int_addr;
872
873       for (i = 0; i < vec_len (sm->to_resolve); i++)
874         {
875           rp = sm->to_resolve + i;
876           if (rp->sw_if_index != sw_if_index ||
877               rp->l_addr.as_u32 != l_addr.as_u32 ||
878               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
879             continue;
880
881           if (!addr_only)
882             {
883               if ((rp->l_port != l_port && rp->e_port != e_port)
884                   || rp->proto != proto)
885                 continue;
886             }
887
888           rp_match = rp;
889           break;
890         }
891
892       /* Might be already set... */
893       first_int_addr = ip4_interface_first_address
894         (sm->ip4_main, sw_if_index, 0 /* just want the address */ );
895
896       if (is_add)
897         {
898           if (rp_match)
899             return VNET_API_ERROR_VALUE_EXIST;
900
901           snat_add_static_mapping_when_resolved
902             (sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto,
903              addr_only, is_add, tag, twice_nat, out2in_only,
904              identity_nat, pool_addr, exact);
905
906           /* DHCP resolution required? */
907           if (first_int_addr == 0)
908             {
909               return 0;
910             }
911           else
912             {
913               e_addr.as_u32 = first_int_addr->as_u32;
914               /* Identity mapping? */
915               if (l_addr.as_u32 == 0)
916                 l_addr.as_u32 = e_addr.as_u32;
917             }
918         }
919       else
920         {
921           if (!rp_match)
922             return VNET_API_ERROR_NO_SUCH_ENTRY;
923
924           vec_del1 (sm->to_resolve, i);
925
926           if (first_int_addr)
927             {
928               e_addr.as_u32 = first_int_addr->as_u32;
929               /* Identity mapping? */
930               if (l_addr.as_u32 == 0)
931                 l_addr.as_u32 = e_addr.as_u32;
932             }
933           else
934             return 0;
935         }
936     }
937
938   init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
939   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
940     m = 0;
941   else
942     m = pool_elt_at_index (sm->static_mappings, value.value);
943
944   if (is_add)
945     {
946       if (m)
947         {
948           if (is_identity_static_mapping (m))
949             {
950               /* *INDENT-OFF* */
951               pool_foreach (local, m->locals,
952               ({
953                 if (local->vrf_id == vrf_id)
954                   return VNET_API_ERROR_VALUE_EXIST;
955               }));
956               /* *INDENT-ON* */
957               pool_get (m->locals, local);
958               local->vrf_id = vrf_id;
959               local->fib_index =
960                 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
961                                                    nat_fib_src_low);
962               init_nat_kv (&kv, m->local_addr, m->local_port,
963                            local->fib_index, m->proto,
964                            m - sm->static_mappings);
965               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
966               return 0;
967             }
968           else
969             return VNET_API_ERROR_VALUE_EXIST;
970         }
971
972       if (twice_nat && addr_only)
973         return VNET_API_ERROR_UNSUPPORTED;
974
975       /* Convert VRF id to FIB index */
976       if (vrf_id != ~0)
977         fib_index =
978           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
979                                              nat_fib_src_low);
980       /* If not specified use inside VRF id from SNAT plugin startup config */
981       else
982         {
983           fib_index = sm->inside_fib_index;
984           vrf_id = sm->inside_vrf_id;
985           fib_table_lock (fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
986         }
987
988       if (!(out2in_only || identity_nat))
989         {
990           init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
991                       addr_only ? 0 : proto);
992           if (!clib_bihash_search_8_8
993               (&sm->static_mapping_by_local, &kv, &value))
994             return VNET_API_ERROR_VALUE_EXIST;
995         }
996
997       /* Find external address in allocated addresses and reserve port for
998          address and port pair mapping when dynamic translations enabled */
999       if (!(addr_only || sm->static_mapping_only || out2in_only))
1000         {
1001           for (i = 0; i < vec_len (sm->addresses); i++)
1002             {
1003               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1004                 {
1005                   a = sm->addresses + i;
1006                   /* External port must be unused */
1007                   switch (proto)
1008                     {
1009 #define _(N, j, n, s) \
1010                     case NAT_PROTOCOL_##N: \
1011                       if (a->busy_##n##_port_refcounts[e_port]) \
1012                         return VNET_API_ERROR_INVALID_VALUE; \
1013                       ++a->busy_##n##_port_refcounts[e_port]; \
1014                       if (e_port > 1024) \
1015                         { \
1016                           a->busy_##n##_ports++; \
1017                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1018                         } \
1019                       break;
1020                       foreach_nat_protocol
1021 #undef _
1022                     default:
1023                       nat_elog_info ("unknown protocol");
1024                       return VNET_API_ERROR_INVALID_VALUE_2;
1025                     }
1026                   break;
1027                 }
1028             }
1029           /* External address must be allocated */
1030           if (!a && (l_addr.as_u32 != e_addr.as_u32))
1031             {
1032               if (sw_if_index != ~0)
1033                 {
1034                   for (i = 0; i < vec_len (sm->to_resolve); i++)
1035                     {
1036                       rp = sm->to_resolve + i;
1037                       if (rp->addr_only)
1038                         continue;
1039                       if (rp->sw_if_index != sw_if_index &&
1040                           rp->l_addr.as_u32 != l_addr.as_u32 &&
1041                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
1042                           rp->e_port != e_port && rp->proto != proto)
1043                         continue;
1044
1045                       vec_del1 (sm->to_resolve, i);
1046                       break;
1047                     }
1048                 }
1049               return VNET_API_ERROR_NO_SUCH_ENTRY;
1050             }
1051         }
1052
1053       pool_get (sm->static_mappings, m);
1054       clib_memset (m, 0, sizeof (*m));
1055       m->tag = vec_dup (tag);
1056       m->local_addr = l_addr;
1057       m->external_addr = e_addr;
1058       m->twice_nat = twice_nat;
1059
1060       if (twice_nat == TWICE_NAT && exact)
1061         {
1062           m->flags |= NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS;
1063           m->pool_addr = pool_addr;
1064         }
1065
1066       if (out2in_only)
1067         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
1068       if (addr_only)
1069         m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY;
1070       if (identity_nat)
1071         {
1072           m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT;
1073           pool_get (m->locals, local);
1074           local->vrf_id = vrf_id;
1075           local->fib_index = fib_index;
1076         }
1077       else
1078         {
1079           m->vrf_id = vrf_id;
1080           m->fib_index = fib_index;
1081         }
1082       if (!addr_only)
1083         {
1084           m->local_port = l_port;
1085           m->external_port = e_port;
1086           m->proto = proto;
1087         }
1088
1089       if (sm->num_workers > 1)
1090         {
1091           ip4_header_t ip = {
1092             .src_address = m->local_addr,
1093           };
1094           vec_add1 (m->workers, sm->worker_in2out_cb (&ip, m->fib_index, 0));
1095           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1096         }
1097       else
1098         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1099
1100       init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto,
1101                    m - sm->static_mappings);
1102       if (!out2in_only)
1103         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1104
1105       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto,
1106                    m - sm->static_mappings);
1107       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
1108
1109       /* Delete dynamic sessions matching local address (+ local port) */
1110       if (!(sm->static_mapping_only))
1111         {
1112           u_key.addr = m->local_addr;
1113           u_key.fib_index = m->fib_index;
1114           kv.key = u_key.as_u64;
1115           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1116             {
1117               user_index = value.value;
1118               u = pool_elt_at_index (tsm->users, user_index);
1119               if (u->nsessions)
1120                 {
1121                   head_index = u->sessions_per_user_list_head_index;
1122                   head = pool_elt_at_index (tsm->list_pool, head_index);
1123                   elt_index = head->next;
1124                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1125                   ses_index = elt->value;
1126                   while (ses_index != ~0)
1127                     {
1128                       s = pool_elt_at_index (tsm->sessions, ses_index);
1129                       elt = pool_elt_at_index (tsm->list_pool, elt->next);
1130                       ses_index = elt->value;
1131
1132                       if (snat_is_session_static (s))
1133                         continue;
1134
1135                       if (!addr_only && s->in2out.port != m->local_port)
1136                         continue;
1137
1138                       nat_free_session_data (sm, s,
1139                                              tsm - sm->per_thread_data, 0);
1140                       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
1141
1142                       if (!addr_only && !sm->endpoint_dependent)
1143                         break;
1144                     }
1145                 }
1146             }
1147         }
1148     }
1149   else
1150     {
1151       if (!m)
1152         {
1153           if (sw_if_index != ~0)
1154             return 0;
1155           else
1156             return VNET_API_ERROR_NO_SUCH_ENTRY;
1157         }
1158
1159       if (identity_nat)
1160         {
1161           if (vrf_id == ~0)
1162             vrf_id = sm->inside_vrf_id;
1163
1164           /* *INDENT-OFF* */
1165           pool_foreach (local, m->locals,
1166           ({
1167             if (local->vrf_id == vrf_id)
1168               find = local - m->locals;
1169           }));
1170           /* *INDENT-ON* */
1171           if (find == ~0)
1172             return VNET_API_ERROR_NO_SUCH_ENTRY;
1173
1174           local = pool_elt_at_index (m->locals, find);
1175           fib_index = local->fib_index;
1176           pool_put (m->locals, local);
1177         }
1178       else
1179         fib_index = m->fib_index;
1180
1181       /* Free external address port */
1182       if (!(addr_only || sm->static_mapping_only || out2in_only))
1183         {
1184           for (i = 0; i < vec_len (sm->addresses); i++)
1185             {
1186               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1187                 {
1188                   a = sm->addresses + i;
1189                   switch (proto)
1190                     {
1191 #define _(N, j, n, s) \
1192                     case NAT_PROTOCOL_##N: \
1193                       --a->busy_##n##_port_refcounts[e_port]; \
1194                       if (e_port > 1024) \
1195                         { \
1196                           a->busy_##n##_ports--; \
1197                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1198                         } \
1199                       break;
1200                       foreach_nat_protocol
1201 #undef _
1202                     default:
1203                       nat_elog_info ("unknown protocol");
1204                       return VNET_API_ERROR_INVALID_VALUE_2;
1205                     }
1206                   break;
1207                 }
1208             }
1209         }
1210
1211       if (sm->num_workers > 1)
1212         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1213       else
1214         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1215
1216       init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
1217       if (!out2in_only)
1218         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
1219
1220       /* Delete session(s) for static mapping if exist */
1221       if (!(sm->static_mapping_only) ||
1222           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
1223         {
1224           if (sm->endpoint_dependent)
1225             {
1226               snat_ed_static_mapping_del_sessions (sm, tsm, m->local_addr,
1227                                                    m->local_port, m->proto,
1228                                                    fib_index, addr_only,
1229                                                    e_addr, e_port);
1230             }
1231           else
1232             {
1233               u_key.addr = m->local_addr;
1234               u_key.fib_index = fib_index;
1235               kv.key = u_key.as_u64;
1236               snat_static_mapping_del_sessions (sm, tsm, u_key, addr_only,
1237                                                 e_addr, e_port);
1238             }
1239         }
1240
1241       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1242       if (pool_elts (m->locals))
1243         return 0;
1244
1245       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1246       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
1247
1248       vec_free (m->tag);
1249       vec_free (m->workers);
1250       /* Delete static mapping from pool */
1251       pool_put (sm->static_mappings, m);
1252     }
1253
1254   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
1255     return 0;
1256
1257   /* Add/delete external address to FIB */
1258   /* *INDENT-OFF* */
1259   pool_foreach (interface, sm->interfaces,
1260   ({
1261     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1262       continue;
1263
1264     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1265     break;
1266   }));
1267   pool_foreach (interface, sm->output_feature_interfaces,
1268   ({
1269     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1270       continue;
1271
1272     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1273     break;
1274   }));
1275   /* *INDENT-ON* */
1276
1277   return 0;
1278 }
1279
1280 int
1281 nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
1282                                  nat_protocol_t proto,
1283                                  nat44_lb_addr_port_t * locals, u8 is_add,
1284                                  twice_nat_type_t twice_nat, u8 out2in_only,
1285                                  u8 * tag, u32 affinity)
1286 {
1287   snat_main_t *sm = &snat_main;
1288   snat_static_mapping_t *m;
1289   clib_bihash_kv_8_8_t kv, value;
1290   snat_address_t *a = 0;
1291   int i;
1292   nat44_lb_addr_port_t *local;
1293   snat_main_per_thread_data_t *tsm;
1294   snat_session_t *s;
1295   uword *bitmap = 0;
1296
1297   if (!sm->endpoint_dependent)
1298     return VNET_API_ERROR_FEATURE_DISABLED;
1299
1300   init_nat_k (&kv, e_addr, e_port, 0, proto);
1301   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1302     m = 0;
1303   else
1304     m = pool_elt_at_index (sm->static_mappings, value.value);
1305
1306   if (is_add)
1307     {
1308       if (m)
1309         return VNET_API_ERROR_VALUE_EXIST;
1310
1311       if (vec_len (locals) < 2)
1312         return VNET_API_ERROR_INVALID_VALUE;
1313
1314       /* Find external address in allocated addresses and reserve port for
1315          address and port pair mapping when dynamic translations enabled */
1316       if (!(sm->static_mapping_only || out2in_only))
1317         {
1318           for (i = 0; i < vec_len (sm->addresses); i++)
1319             {
1320               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1321                 {
1322                   a = sm->addresses + i;
1323                   /* External port must be unused */
1324                   switch (proto)
1325                     {
1326 #define _(N, j, n, s) \
1327                     case NAT_PROTOCOL_##N: \
1328                       if (a->busy_##n##_port_refcounts[e_port]) \
1329                         return VNET_API_ERROR_INVALID_VALUE; \
1330                       ++a->busy_##n##_port_refcounts[e_port]; \
1331                       if (e_port > 1024) \
1332                         { \
1333                           a->busy_##n##_ports++; \
1334                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1335                         } \
1336                       break;
1337                       foreach_nat_protocol
1338 #undef _
1339                     default:
1340                       nat_elog_info ("unknown protocol");
1341                       return VNET_API_ERROR_INVALID_VALUE_2;
1342                     }
1343                   break;
1344                 }
1345             }
1346           /* External address must be allocated */
1347           if (!a)
1348             return VNET_API_ERROR_NO_SUCH_ENTRY;
1349         }
1350
1351       pool_get (sm->static_mappings, m);
1352       clib_memset (m, 0, sizeof (*m));
1353       m->tag = vec_dup (tag);
1354       m->external_addr = e_addr;
1355       m->external_port = e_port;
1356       m->proto = proto;
1357       m->twice_nat = twice_nat;
1358       m->flags |= NAT_STATIC_MAPPING_FLAG_LB;
1359       if (out2in_only)
1360         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
1361       m->affinity = affinity;
1362
1363       if (affinity)
1364         m->affinity_per_service_list_head_index =
1365           nat_affinity_get_per_service_list_head_index ();
1366       else
1367         m->affinity_per_service_list_head_index = ~0;
1368
1369       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto,
1370                    m - sm->static_mappings);
1371       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
1372         {
1373           nat_elog_err ("static_mapping_by_external key add failed");
1374           return VNET_API_ERROR_UNSPECIFIED;
1375         }
1376
1377       for (i = 0; i < vec_len (locals); i++)
1378         {
1379           locals[i].fib_index =
1380             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
1381                                                locals[i].vrf_id,
1382                                                nat_fib_src_low);
1383           if (!out2in_only)
1384             {
1385               init_nat_kv (&kv, locals[i].addr, locals[i].port,
1386                            locals[i].fib_index, m->proto,
1387                            m - sm->static_mappings);
1388               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1389             }
1390           locals[i].prefix = (i == 0) ? locals[i].probability :
1391             (locals[i - 1].prefix + locals[i].probability);
1392           pool_get (m->locals, local);
1393           *local = locals[i];
1394           if (sm->num_workers > 1)
1395             {
1396               ip4_header_t ip = {
1397                 .src_address = locals[i].addr,
1398               };
1399               bitmap =
1400                 clib_bitmap_set (bitmap,
1401                                  sm->worker_in2out_cb (&ip, m->fib_index, 0),
1402                                  1);
1403             }
1404         }
1405
1406       /* Assign workers */
1407       if (sm->num_workers > 1)
1408         {
1409           /* *INDENT-OFF* */
1410           clib_bitmap_foreach (i, bitmap,
1411             ({
1412                vec_add1(m->workers, i);
1413             }));
1414           /* *INDENT-ON* */
1415         }
1416     }
1417   else
1418     {
1419       if (!m)
1420         return VNET_API_ERROR_NO_SUCH_ENTRY;
1421
1422       if (!is_lb_static_mapping (m))
1423         return VNET_API_ERROR_INVALID_VALUE;
1424
1425       /* Free external address port */
1426       if (!(sm->static_mapping_only || out2in_only))
1427         {
1428           for (i = 0; i < vec_len (sm->addresses); i++)
1429             {
1430               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1431                 {
1432                   a = sm->addresses + i;
1433                   switch (proto)
1434                     {
1435 #define _(N, j, n, s) \
1436                     case NAT_PROTOCOL_##N: \
1437                       --a->busy_##n##_port_refcounts[e_port]; \
1438                       if (e_port > 1024) \
1439                         { \
1440                           a->busy_##n##_ports--; \
1441                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1442                         } \
1443                       break;
1444                       foreach_nat_protocol
1445 #undef _
1446                     default:
1447                       nat_elog_info ("unknown protocol");
1448                       return VNET_API_ERROR_INVALID_VALUE_2;
1449                     }
1450                   break;
1451                 }
1452             }
1453         }
1454
1455       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1456       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
1457         {
1458           nat_elog_err ("static_mapping_by_external key del failed");
1459           return VNET_API_ERROR_UNSPECIFIED;
1460         }
1461
1462       /* *INDENT-OFF* */
1463       pool_foreach (local, m->locals,
1464       ({
1465           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1466                             nat_fib_src_low);
1467           if (!out2in_only)
1468             {
1469 init_nat_k(&              kv, local->addr, local->port, local->fib_index, m->proto);
1470               if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0))
1471                 {
1472                   nat_elog_err ("static_mapping_by_local key del failed");
1473                   return VNET_API_ERROR_UNSPECIFIED;
1474                 }
1475             }
1476
1477           if (sm->num_workers > 1)
1478             {
1479               ip4_header_t ip = {
1480                 .src_address = local->addr,
1481               };
1482               tsm = vec_elt_at_index (sm->per_thread_data,
1483                                       sm->worker_in2out_cb (&ip, m->fib_index, 0));
1484             }
1485           else
1486             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1487
1488           /* Delete sessions */
1489           pool_foreach (s, tsm->sessions, {
1490             if (!(is_lb_session (s)))
1491               continue;
1492
1493             if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1494                 s->in2out.port != local->port)
1495               continue;
1496
1497             nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1498             nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1499           });
1500       }));
1501       /* *INDENT-ON* */
1502       if (m->affinity)
1503         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1504       pool_free (m->locals);
1505       vec_free (m->tag);
1506       vec_free (m->workers);
1507
1508       pool_put (sm->static_mappings, m);
1509     }
1510
1511   return 0;
1512 }
1513
1514 int
1515 nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
1516                                        ip4_address_t l_addr, u16 l_port,
1517                                        nat_protocol_t proto, u32 vrf_id,
1518                                        u8 probability, u8 is_add)
1519 {
1520   snat_main_t *sm = &snat_main;
1521   snat_static_mapping_t *m = 0;
1522   clib_bihash_kv_8_8_t kv, value;
1523   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1524   snat_main_per_thread_data_t *tsm;
1525   snat_session_t *s;
1526   u32 *locals = 0;
1527   uword *bitmap = 0;
1528   int i;
1529
1530   if (!sm->endpoint_dependent)
1531     return VNET_API_ERROR_FEATURE_DISABLED;
1532
1533   init_nat_k (&kv, e_addr, e_port, 0, proto);
1534   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1535     m = pool_elt_at_index (sm->static_mappings, value.value);
1536
1537   if (!m)
1538     return VNET_API_ERROR_NO_SUCH_ENTRY;
1539
1540   if (!is_lb_static_mapping (m))
1541     return VNET_API_ERROR_INVALID_VALUE;
1542
1543   /* *INDENT-OFF* */
1544   pool_foreach (local, m->locals,
1545   ({
1546     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1547         (local->vrf_id == vrf_id))
1548       {
1549         match_local = local;
1550         break;
1551       }
1552   }));
1553   /* *INDENT-ON* */
1554
1555   if (is_add)
1556     {
1557       if (match_local)
1558         return VNET_API_ERROR_VALUE_EXIST;
1559
1560       pool_get (m->locals, local);
1561       clib_memset (local, 0, sizeof (*local));
1562       local->addr.as_u32 = l_addr.as_u32;
1563       local->port = l_port;
1564       local->probability = probability;
1565       local->vrf_id = vrf_id;
1566       local->fib_index =
1567         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1568                                            nat_fib_src_low);
1569
1570       if (!is_out2in_only_static_mapping (m))
1571         {
1572           init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto,
1573                        m - sm->static_mappings);
1574           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
1575             nat_elog_err ("static_mapping_by_local key add failed");
1576         }
1577     }
1578   else
1579     {
1580       if (!match_local)
1581         return VNET_API_ERROR_NO_SUCH_ENTRY;
1582
1583       if (pool_elts (m->locals) < 3)
1584         return VNET_API_ERROR_UNSPECIFIED;
1585
1586       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1587                         nat_fib_src_low);
1588
1589       if (!is_out2in_only_static_mapping (m))
1590         {
1591           init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
1592           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
1593             nat_elog_err ("static_mapping_by_local key del failed");
1594         }
1595
1596       if (sm->num_workers > 1)
1597         {
1598           ip4_header_t ip = {
1599             .src_address = local->addr,
1600           };
1601           tsm = vec_elt_at_index (sm->per_thread_data,
1602                                   sm->worker_in2out_cb (&ip, m->fib_index,
1603                                                         0));
1604         }
1605       else
1606         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1607
1608       /* Delete sessions */
1609       /* *INDENT-OFF* */
1610       pool_foreach (s, tsm->sessions, {
1611         if (!(is_lb_session (s)))
1612           continue;
1613
1614         if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1615             s->in2out.port != match_local->port)
1616           continue;
1617
1618         nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1619         nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1620       });
1621       /* *INDENT-ON* */
1622
1623       pool_put (m->locals, match_local);
1624     }
1625
1626   vec_free (m->workers);
1627
1628   /* *INDENT-OFF* */
1629   pool_foreach (local, m->locals,
1630   ({
1631     vec_add1 (locals, local - m->locals);
1632     if (sm->num_workers > 1)
1633       {
1634         ip4_header_t ip;
1635         ip.src_address.as_u32 = local->addr.as_u32,
1636         bitmap = clib_bitmap_set (bitmap,
1637                                   sm->worker_in2out_cb (&ip, local->fib_index, 0),
1638                                   1);
1639       }
1640   }));
1641   /* *INDENT-ON* */
1642
1643   ASSERT (vec_len (locals) > 1);
1644
1645   local = pool_elt_at_index (m->locals, locals[0]);
1646   local->prefix = local->probability;
1647   for (i = 1; i < vec_len (locals); i++)
1648     {
1649       local = pool_elt_at_index (m->locals, locals[i]);
1650       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1651       local->prefix = local->probability + prev_local->prefix;
1652     }
1653
1654   /* Assign workers */
1655   if (sm->num_workers > 1)
1656     {
1657       /* *INDENT-OFF* */
1658       clib_bitmap_foreach (i, bitmap, ({ vec_add1(m->workers, i); }));
1659       /* *INDENT-ON* */
1660     }
1661
1662   return 0;
1663 }
1664
1665 int
1666 snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
1667                   u8 twice_nat)
1668 {
1669   snat_address_t *a = 0;
1670   snat_session_t *ses;
1671   u32 *ses_to_be_removed = 0, *ses_index;
1672   snat_main_per_thread_data_t *tsm;
1673   snat_static_mapping_t *m;
1674   snat_interface_t *interface;
1675   int i;
1676   snat_address_t *addresses =
1677     twice_nat ? sm->twice_nat_addresses : sm->addresses;
1678
1679   /* Find SNAT address */
1680   for (i = 0; i < vec_len (addresses); i++)
1681     {
1682       if (addresses[i].addr.as_u32 == addr.as_u32)
1683         {
1684           a = addresses + i;
1685           break;
1686         }
1687     }
1688   if (!a)
1689     return VNET_API_ERROR_NO_SUCH_ENTRY;
1690
1691   if (delete_sm)
1692     {
1693       ip4_address_t pool_addr = { 0 };
1694       /* *INDENT-OFF* */
1695       pool_foreach (m, sm->static_mappings,
1696       ({
1697           if (m->external_addr.as_u32 == addr.as_u32)
1698             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1699                                             m->local_port, m->external_port,
1700                                             m->vrf_id,
1701                                             is_addr_only_static_mapping(m), ~0,
1702                                             m->proto, 0 /* is_add */,
1703                                             m->twice_nat,
1704                                             is_out2in_only_static_mapping(m),
1705                                             m->tag,
1706                                             is_identity_static_mapping(m),
1707                                             pool_addr, 0);
1708       }));
1709       /* *INDENT-ON* */
1710     }
1711   else
1712     {
1713       /* Check if address is used in some static mapping */
1714       if (is_snat_address_used_in_static_mapping (sm, addr))
1715         {
1716           nat_elog_notice ("address used in static mapping");
1717           return VNET_API_ERROR_UNSPECIFIED;
1718         }
1719     }
1720
1721   if (a->fib_index != ~0)
1722     fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1723
1724   /* Delete sessions using address */
1725   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1726     {
1727       /* *INDENT-OFF* */
1728       vec_foreach (tsm, sm->per_thread_data)
1729         {
1730           pool_foreach (ses, tsm->sessions, ({
1731             if (ses->out2in.addr.as_u32 == addr.as_u32)
1732               {
1733                 nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
1734                 vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1735               }
1736           }));
1737
1738           if (sm->endpoint_dependent){
1739               vec_foreach (ses_index, ses_to_be_removed)
1740                 {
1741                   ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1742                   nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
1743                 }
1744           }else{
1745               vec_foreach (ses_index, ses_to_be_removed)
1746                 {
1747                   ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1748                   nat44_delete_session (sm, ses, tsm - sm->per_thread_data);
1749                 }
1750           }
1751
1752           vec_free (ses_to_be_removed);
1753         }
1754       /* *INDENT-ON* */
1755     }
1756
1757 #define _(N, i, n, s) \
1758   vec_free (a->busy_##n##_ports_per_thread);
1759   foreach_nat_protocol
1760 #undef _
1761     if (twice_nat)
1762     {
1763       vec_del1 (sm->twice_nat_addresses, i);
1764       return 0;
1765     }
1766   else
1767     vec_del1 (sm->addresses, i);
1768
1769   /* Delete external address from FIB */
1770   /* *INDENT-OFF* */
1771   pool_foreach (interface, sm->interfaces,
1772   ({
1773     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1774       continue;
1775
1776     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1777     break;
1778   }));
1779   pool_foreach (interface, sm->output_feature_interfaces,
1780   ({
1781     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1782       continue;
1783
1784     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1785     break;
1786   }));
1787   /* *INDENT-ON* */
1788
1789   return 0;
1790 }
1791
1792 static void
1793 nat_validate_counters (snat_main_t * sm, u32 sw_if_index)
1794 {
1795 #define _(x)                                                                  \
1796   vlib_validate_simple_counter (&sm->counters.fastpath.in2out.x,              \
1797                                 sw_if_index);                                 \
1798   vlib_zero_simple_counter (&sm->counters.fastpath.in2out.x, sw_if_index);    \
1799   vlib_validate_simple_counter (&sm->counters.fastpath.out2in.x,              \
1800                                 sw_if_index);                                 \
1801   vlib_zero_simple_counter (&sm->counters.fastpath.out2in.x, sw_if_index);    \
1802   vlib_validate_simple_counter (&sm->counters.slowpath.in2out.x,              \
1803                                 sw_if_index);                                 \
1804   vlib_zero_simple_counter (&sm->counters.slowpath.in2out.x, sw_if_index);    \
1805   vlib_validate_simple_counter (&sm->counters.slowpath.out2in.x,              \
1806                                 sw_if_index);                                 \
1807   vlib_zero_simple_counter (&sm->counters.slowpath.out2in.x, sw_if_index);    \
1808   vlib_validate_simple_counter (&sm->counters.fastpath.in2out_ed.x,           \
1809                                 sw_if_index);                                 \
1810   vlib_zero_simple_counter (&sm->counters.fastpath.in2out_ed.x, sw_if_index); \
1811   vlib_validate_simple_counter (&sm->counters.fastpath.out2in_ed.x,           \
1812                                 sw_if_index);                                 \
1813   vlib_zero_simple_counter (&sm->counters.fastpath.out2in_ed.x, sw_if_index); \
1814   vlib_validate_simple_counter (&sm->counters.slowpath.in2out_ed.x,           \
1815                                 sw_if_index);                                 \
1816   vlib_zero_simple_counter (&sm->counters.slowpath.in2out_ed.x, sw_if_index); \
1817   vlib_validate_simple_counter (&sm->counters.slowpath.out2in_ed.x,           \
1818                                 sw_if_index);                                 \
1819   vlib_zero_simple_counter (&sm->counters.slowpath.out2in_ed.x, sw_if_index);
1820   foreach_nat_counter;
1821 #undef _
1822   vlib_validate_simple_counter (&sm->counters.hairpinning, sw_if_index);
1823   vlib_zero_simple_counter (&sm->counters.hairpinning, sw_if_index);
1824 }
1825
1826 void
1827 expire_per_vrf_sessions (u32 fib_index)
1828 {
1829   per_vrf_sessions_t *per_vrf_sessions;
1830   snat_main_per_thread_data_t *tsm;
1831   snat_main_t *sm = &snat_main;
1832
1833   /* *INDENT-OFF* */
1834   vec_foreach (tsm, sm->per_thread_data)
1835     {
1836       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1837         {
1838           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1839               (per_vrf_sessions->tx_fib_index == fib_index))
1840             {
1841               per_vrf_sessions->expired = 1;
1842             }
1843         }
1844     }
1845   /* *INDENT-ON* */
1846 }
1847
1848 void
1849 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1850 {
1851   snat_main_t *sm = &snat_main;
1852   nat_fib_t *fib;
1853
1854   // we don't care if it is outside/inside fib
1855   // we just care about their ref_count
1856   // if it reaches 0 sessions should expire
1857   // because the fib isn't valid for NAT anymore
1858
1859   vec_foreach (fib, sm->fibs)
1860   {
1861     if (fib->fib_index == fib_index)
1862       {
1863         if (is_del)
1864           {
1865             fib->ref_count--;
1866             if (!fib->ref_count)
1867               {
1868                 vec_del1 (sm->fibs, fib - sm->fibs);
1869                 expire_per_vrf_sessions (fib_index);
1870               }
1871             return;
1872           }
1873         else
1874           fib->ref_count++;
1875       }
1876   }
1877   if (!is_del)
1878     {
1879       vec_add2 (sm->fibs, fib, 1);
1880       fib->ref_count = 1;
1881       fib->fib_index = fib_index;
1882     }
1883 }
1884
1885 int
1886 snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
1887 {
1888   snat_main_t *sm = &snat_main;
1889   snat_interface_t *i;
1890   const char *feature_name, *del_feature_name;
1891   snat_address_t *ap;
1892   snat_static_mapping_t *m;
1893   nat_outside_fib_t *outside_fib;
1894   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1895                                                        sw_if_index);
1896
1897   if (sm->out2in_dpo && !is_inside)
1898     return VNET_API_ERROR_UNSUPPORTED;
1899
1900   /* *INDENT-OFF* */
1901   pool_foreach (i, sm->output_feature_interfaces,
1902   ({
1903     if (i->sw_if_index == sw_if_index)
1904       return VNET_API_ERROR_VALUE_EXIST;
1905   }));
1906   /* *INDENT-ON* */
1907
1908   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1909     feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast";
1910   else
1911     {
1912       if (sm->num_workers > 1)
1913         feature_name =
1914           is_inside ? "nat44-in2out-worker-handoff" :
1915           "nat44-out2in-worker-handoff";
1916       else if (sm->endpoint_dependent)
1917         {
1918           feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1919         }
1920       else
1921         feature_name = is_inside ? "nat44-in2out" : "nat44-out2in";
1922     }
1923
1924   if (sm->fq_in2out_index == ~0 && sm->num_workers > 1)
1925     sm->fq_in2out_index =
1926       vlib_frame_queue_main_init (sm->in2out_node_index, NAT_FQ_NELTS);
1927
1928   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1929     sm->fq_out2in_index =
1930       vlib_frame_queue_main_init (sm->out2in_node_index, NAT_FQ_NELTS);
1931
1932   if (sm->endpoint_dependent)
1933     update_per_vrf_sessions_vec (fib_index, is_del);
1934
1935   if (!is_inside)
1936     {
1937       /* *INDENT-OFF* */
1938       vec_foreach (outside_fib, sm->outside_fibs)
1939         {
1940           if (outside_fib->fib_index == fib_index)
1941             {
1942               if (is_del)
1943                 {
1944                   outside_fib->refcount--;
1945                   if (!outside_fib->refcount)
1946                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1947                 }
1948               else
1949                 outside_fib->refcount++;
1950               goto feature_set;
1951             }
1952         }
1953       /* *INDENT-ON* */
1954       if (!is_del)
1955         {
1956           vec_add2 (sm->outside_fibs, outside_fib, 1);
1957           outside_fib->refcount = 1;
1958           outside_fib->fib_index = fib_index;
1959         }
1960     }
1961
1962 feature_set:
1963   /* *INDENT-OFF* */
1964   pool_foreach (i, sm->interfaces,
1965   ({
1966     if (i->sw_if_index == sw_if_index)
1967       {
1968         if (is_del)
1969           {
1970             if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
1971               {
1972                 if (is_inside)
1973                   i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
1974                 else
1975                   i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
1976
1977                 if (sm->num_workers > 1)
1978                   {
1979                     del_feature_name = "nat44-handoff-classify";
1980                     feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
1981                                                  "nat44-out2in-worker-handoff";
1982                   }
1983                 else if (sm->endpoint_dependent)
1984                   {
1985                     del_feature_name = "nat44-ed-classify";
1986                     feature_name = !is_inside ?  "nat-pre-in2out" :
1987                                                  "nat-pre-out2in";
1988                   }
1989                 else
1990                   {
1991                     del_feature_name = "nat44-classify";
1992                     feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
1993                   }
1994
1995                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1996                 if (rv)
1997                   return rv;
1998                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1999                                              sw_if_index, 0, 0, 0);
2000                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
2001                                              sw_if_index, 1, 0, 0);
2002                 if (!is_inside)
2003                   {
2004                     if (sm->endpoint_dependent)
2005                       vnet_feature_enable_disable ("ip4-local",
2006                                                    "nat44-ed-hairpinning",
2007                                                    sw_if_index, 1, 0, 0);
2008                     else
2009                       vnet_feature_enable_disable ("ip4-local",
2010                                                    "nat44-hairpinning",
2011                                                    sw_if_index, 1, 0, 0);
2012                   }
2013               }
2014             else
2015               {
2016                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
2017                 if (rv)
2018                   return rv;
2019                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
2020                                              sw_if_index, 0, 0, 0);
2021                 pool_put (sm->interfaces, i);
2022                 if (is_inside)
2023                   {
2024                     if (sm->endpoint_dependent)
2025                       vnet_feature_enable_disable ("ip4-local",
2026                                                    "nat44-ed-hairpinning",
2027                                                    sw_if_index, 0, 0, 0);
2028                     else
2029                       vnet_feature_enable_disable ("ip4-local",
2030                                                    "nat44-hairpinning",
2031                                                    sw_if_index, 0, 0, 0);
2032                   }
2033               }
2034           }
2035         else
2036           {
2037             if ((nat_interface_is_inside(i) && is_inside) ||
2038                 (nat_interface_is_outside(i) && !is_inside))
2039               return 0;
2040
2041             if (sm->num_workers > 1)
2042               {
2043                 del_feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
2044                                                  "nat44-out2in-worker-handoff";
2045                 feature_name = "nat44-handoff-classify";
2046               }
2047             else if (sm->endpoint_dependent)
2048               {
2049                 del_feature_name = !is_inside ?  "nat-pre-in2out" :
2050                                                  "nat-pre-out2in";
2051
2052                 feature_name = "nat44-ed-classify";
2053               }
2054             else
2055               {
2056                 del_feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
2057                 feature_name = "nat44-classify";
2058               }
2059
2060             int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
2061             if (rv)
2062               return rv;
2063             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
2064                                          sw_if_index, 0, 0, 0);
2065             vnet_feature_enable_disable ("ip4-unicast", feature_name,
2066                                          sw_if_index, 1, 0, 0);
2067             if (!is_inside)
2068               {
2069                 if (sm->endpoint_dependent)
2070                   vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
2071                                                sw_if_index, 0, 0, 0);
2072                 else
2073                   vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
2074                                                sw_if_index, 0, 0, 0);
2075               }
2076             goto set_flags;
2077           }
2078
2079         goto fib;
2080       }
2081   }));
2082   /* *INDENT-ON* */
2083
2084   if (is_del)
2085     return VNET_API_ERROR_NO_SUCH_ENTRY;
2086
2087   pool_get (sm->interfaces, i);
2088   i->sw_if_index = sw_if_index;
2089   i->flags = 0;
2090   nat_validate_counters (sm, sw_if_index);
2091
2092   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0,
2093                                0);
2094
2095   int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
2096   if (rv)
2097     return rv;
2098
2099   if (is_inside && !sm->out2in_dpo)
2100     {
2101       if (sm->endpoint_dependent)
2102         vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
2103                                      sw_if_index, 1, 0, 0);
2104       else
2105         vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
2106                                      sw_if_index, 1, 0, 0);
2107     }
2108
2109 set_flags:
2110   if (is_inside)
2111     {
2112       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
2113       return 0;
2114     }
2115   else
2116     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
2117
2118   /* Add/delete external addresses to FIB */
2119 fib:
2120   /* *INDENT-OFF* */
2121   vec_foreach (ap, sm->addresses)
2122     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
2123
2124   pool_foreach (m, sm->static_mappings,
2125   ({
2126     if (!(is_addr_only_static_mapping(m)) || (m->local_addr.as_u32 == m->external_addr.as_u32))
2127       continue;
2128
2129     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
2130   }));
2131   /* *INDENT-ON* */
2132
2133   return 0;
2134 }
2135
2136 int
2137 snat_interface_add_del_output_feature (u32 sw_if_index,
2138                                        u8 is_inside, int is_del)
2139 {
2140   snat_main_t *sm = &snat_main;
2141   snat_interface_t *i;
2142   snat_address_t *ap;
2143   snat_static_mapping_t *m;
2144   nat_outside_fib_t *outside_fib;
2145   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2146                                                        sw_if_index);
2147
2148   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
2149     return VNET_API_ERROR_UNSUPPORTED;
2150
2151   /* *INDENT-OFF* */
2152   pool_foreach (i, sm->interfaces,
2153   ({
2154     if (i->sw_if_index == sw_if_index)
2155       return VNET_API_ERROR_VALUE_EXIST;
2156   }));
2157   /* *INDENT-ON* */
2158
2159   if (sm->endpoint_dependent)
2160     update_per_vrf_sessions_vec (fib_index, is_del);
2161
2162   if (!is_inside)
2163     {
2164       /* *INDENT-OFF* */
2165       vec_foreach (outside_fib, sm->outside_fibs)
2166         {
2167           if (outside_fib->fib_index == fib_index)
2168             {
2169               if (is_del)
2170                 {
2171                   outside_fib->refcount--;
2172                   if (!outside_fib->refcount)
2173                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2174                 }
2175               else
2176                 outside_fib->refcount++;
2177               goto feature_set;
2178             }
2179         }
2180       /* *INDENT-ON* */
2181       if (!is_del)
2182         {
2183           vec_add2 (sm->outside_fibs, outside_fib, 1);
2184           outside_fib->refcount = 1;
2185           outside_fib->fib_index = fib_index;
2186         }
2187     }
2188
2189 feature_set:
2190   if (is_inside)
2191     {
2192       if (sm->endpoint_dependent)
2193         {
2194           int rv =
2195             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2196           if (rv)
2197             return rv;
2198           rv =
2199             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2200                                                             !is_del);
2201           if (rv)
2202             return rv;
2203           vnet_feature_enable_disable ("ip4-unicast", "nat44-ed-hairpin-dst",
2204                                        sw_if_index, !is_del, 0, 0);
2205           vnet_feature_enable_disable ("ip4-output", "nat44-ed-hairpin-src",
2206                                        sw_if_index, !is_del, 0, 0);
2207         }
2208       else
2209         {
2210           int rv =
2211             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2212           if (rv)
2213             return rv;
2214           rv =
2215             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2216                                                             !is_del);
2217           if (rv)
2218             return rv;
2219           vnet_feature_enable_disable ("ip4-unicast", "nat44-hairpin-dst",
2220                                        sw_if_index, !is_del, 0, 0);
2221           vnet_feature_enable_disable ("ip4-output", "nat44-hairpin-src",
2222                                        sw_if_index, !is_del, 0, 0);
2223         }
2224       goto fq;
2225     }
2226
2227   if (sm->num_workers > 1)
2228     {
2229       int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2230       if (rv)
2231         return rv;
2232       rv =
2233         ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
2234       if (rv)
2235         return rv;
2236       vnet_feature_enable_disable ("ip4-unicast",
2237                                    "nat44-out2in-worker-handoff",
2238                                    sw_if_index, !is_del, 0, 0);
2239       vnet_feature_enable_disable ("ip4-output",
2240                                    "nat44-in2out-output-worker-handoff",
2241                                    sw_if_index, !is_del, 0, 0);
2242     }
2243   else
2244     {
2245       if (sm->endpoint_dependent)
2246         {
2247           int rv =
2248             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2249           if (rv)
2250             return rv;
2251           rv =
2252             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2253                                                             !is_del);
2254           if (rv)
2255             return rv;
2256           vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
2257                                        sw_if_index, !is_del, 0, 0);
2258           vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
2259                                        sw_if_index, !is_del, 0, 0);
2260         }
2261       else
2262         {
2263           int rv =
2264             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2265           if (rv)
2266             return rv;
2267           rv =
2268             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2269                                                             !is_del);
2270           if (rv)
2271             return rv;
2272           vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in",
2273                                        sw_if_index, !is_del, 0, 0);
2274           vnet_feature_enable_disable ("ip4-output", "nat44-in2out-output",
2275                                        sw_if_index, !is_del, 0, 0);
2276         }
2277     }
2278
2279 fq:
2280   if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
2281     sm->fq_in2out_output_index =
2282       vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
2283
2284   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
2285     sm->fq_out2in_index =
2286       vlib_frame_queue_main_init (sm->out2in_node_index, 0);
2287
2288   /* *INDENT-OFF* */
2289   pool_foreach (i, sm->output_feature_interfaces,
2290   ({
2291     if (i->sw_if_index == sw_if_index)
2292       {
2293         if (is_del)
2294           pool_put (sm->output_feature_interfaces, i);
2295         else
2296           return VNET_API_ERROR_VALUE_EXIST;
2297
2298         goto fib;
2299       }
2300   }));
2301   /* *INDENT-ON* */
2302
2303   if (is_del)
2304     return VNET_API_ERROR_NO_SUCH_ENTRY;
2305
2306   pool_get (sm->output_feature_interfaces, i);
2307   i->sw_if_index = sw_if_index;
2308   i->flags = 0;
2309   nat_validate_counters (sm, sw_if_index);
2310   if (is_inside)
2311     i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
2312   else
2313     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
2314
2315   /* Add/delete external addresses to FIB */
2316 fib:
2317   if (is_inside)
2318     return 0;
2319
2320   /* *INDENT-OFF* */
2321   vec_foreach (ap, sm->addresses)
2322     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
2323
2324   pool_foreach (m, sm->static_mappings,
2325   ({
2326     if (!((is_addr_only_static_mapping(m)))  || (m->local_addr.as_u32 == m->external_addr.as_u32))
2327       continue;
2328
2329     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
2330   }));
2331   /* *INDENT-ON* */
2332
2333   return 0;
2334 }
2335
2336 int
2337 snat_set_workers (uword * bitmap)
2338 {
2339   snat_main_t *sm = &snat_main;
2340   int i, j = 0;
2341
2342   if (sm->num_workers < 2)
2343     return VNET_API_ERROR_FEATURE_DISABLED;
2344
2345   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
2346     return VNET_API_ERROR_INVALID_WORKER;
2347
2348   vec_free (sm->workers);
2349   /* *INDENT-OFF* */
2350   clib_bitmap_foreach (i, bitmap,
2351     ({
2352       vec_add1(sm->workers, i);
2353       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
2354       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
2355       j++;
2356     }));
2357   /* *INDENT-ON* */
2358
2359   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
2360
2361   return 0;
2362 }
2363
2364 static void
2365 snat_update_outside_fib (u32 sw_if_index, u32 new_fib_index,
2366                          u32 old_fib_index)
2367 {
2368   snat_main_t *sm = &snat_main;
2369   nat_outside_fib_t *outside_fib;
2370   snat_interface_t *i;
2371   u8 is_add = 1;
2372   u8 match = 0;
2373
2374   if (new_fib_index == old_fib_index)
2375     return;
2376
2377   if (!vec_len (sm->outside_fibs))
2378     return;
2379
2380   /* *INDENT-OFF* */
2381   pool_foreach (i, sm->interfaces,
2382     ({
2383       if (i->sw_if_index == sw_if_index)
2384         {
2385           if (!(nat_interface_is_outside (i)))
2386             return;
2387           match = 1;
2388         }
2389     }));
2390
2391   pool_foreach (i, sm->output_feature_interfaces,
2392     ({
2393       if (i->sw_if_index == sw_if_index)
2394         {
2395           if (!(nat_interface_is_outside (i)))
2396             return;
2397           match = 1;
2398         }
2399     }));
2400   /* *INDENT-ON* */
2401
2402   if (!match)
2403     return;
2404
2405   vec_foreach (outside_fib, sm->outside_fibs)
2406   {
2407     if (outside_fib->fib_index == old_fib_index)
2408       {
2409         outside_fib->refcount--;
2410         if (!outside_fib->refcount)
2411           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2412         break;
2413       }
2414   }
2415
2416   vec_foreach (outside_fib, sm->outside_fibs)
2417   {
2418     if (outside_fib->fib_index == new_fib_index)
2419       {
2420         outside_fib->refcount++;
2421         is_add = 0;
2422         break;
2423       }
2424   }
2425
2426   if (is_add)
2427     {
2428       vec_add2 (sm->outside_fibs, outside_fib, 1);
2429       outside_fib->refcount = 1;
2430       outside_fib->fib_index = new_fib_index;
2431     }
2432 }
2433
2434 static void
2435 snat_ip4_table_bind (ip4_main_t * im,
2436                      uword opaque,
2437                      u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
2438 {
2439   snat_update_outside_fib (sw_if_index, new_fib_index, old_fib_index);
2440 }
2441
2442 static void
2443 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
2444                                        uword opaque,
2445                                        u32 sw_if_index,
2446                                        ip4_address_t * address,
2447                                        u32 address_length,
2448                                        u32 if_address_index, u32 is_delete);
2449
2450 static void
2451 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
2452                                  uword opaque,
2453                                  u32 sw_if_index,
2454                                  ip4_address_t * address,
2455                                  u32 address_length,
2456                                  u32 if_address_index, u32 is_delete);
2457
2458 static int
2459 nat_alloc_addr_and_port_default (snat_address_t * addresses, u32 fib_index,
2460                                  u32 thread_index, nat_protocol_t proto,
2461                                  ip4_address_t * addr, u16 * port,
2462                                  u16 port_per_thread, u32 snat_thread_index);
2463
2464 void
2465 test_key_calc_split ()
2466 {
2467   ip4_address_t l_addr;
2468   l_addr.as_u8[0] = 1;
2469   l_addr.as_u8[1] = 1;
2470   l_addr.as_u8[2] = 1;
2471   l_addr.as_u8[3] = 1;
2472   ip4_address_t r_addr;
2473   r_addr.as_u8[0] = 2;
2474   r_addr.as_u8[1] = 2;
2475   r_addr.as_u8[2] = 2;
2476   r_addr.as_u8[3] = 2;
2477   u16 l_port = 40001;
2478   u16 r_port = 40301;
2479   u8 proto = 9;
2480   u32 fib_index = 9000001;
2481   u32 thread_index = 3000000001;
2482   u32 session_index = 3000000221;
2483   clib_bihash_kv_16_8_t kv;
2484   init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
2485               thread_index, session_index);
2486   ip4_address_t l_addr2;
2487   ip4_address_t r_addr2;
2488   clib_memset (&l_addr2, 0, sizeof (l_addr2));
2489   clib_memset (&r_addr2, 0, sizeof (r_addr2));
2490   u16 l_port2 = 0;
2491   u16 r_port2 = 0;
2492   u8 proto2 = 0;
2493   u32 fib_index2 = 0;
2494   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
2495                &r_port2);
2496   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2497   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
2498   ASSERT (l_port == l_port2);
2499   ASSERT (r_port == r_port2);
2500   ASSERT (proto == proto2);
2501   ASSERT (fib_index == fib_index2);
2502   ASSERT (thread_index == ed_value_get_thread_index (&kv));
2503   ASSERT (session_index == ed_value_get_session_index (&kv));
2504
2505   fib_index = 7001;
2506   proto = 5;
2507   nat_protocol_t proto3 = ~0;
2508   u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
2509   split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
2510   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2511   ASSERT (l_port == l_port2);
2512   ASSERT (proto == proto3);
2513   ASSERT (fib_index == fib_index2);
2514 }
2515
2516 static clib_error_t *
2517 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
2518 {
2519   snat_main_t *sm = &snat_main;
2520   u32 fib_index;
2521
2522   if (sm->endpoint_dependent)
2523     {
2524       // TODO: consider removing all NAT interfaces
2525
2526       if (!is_add)
2527         {
2528           fib_index = ip4_fib_index_from_table_id (table_id);
2529           if (fib_index != ~0)
2530             expire_per_vrf_sessions (fib_index);
2531         }
2532     }
2533   return 0;
2534 }
2535
2536 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
2537
2538
2539 static clib_error_t *
2540 snat_init (vlib_main_t * vm)
2541 {
2542   snat_main_t *sm = &snat_main;
2543   clib_error_t *error = 0;
2544   ip4_main_t *im = &ip4_main;
2545   ip_lookup_main_t *lm = &im->lookup_main;
2546   uword *p;
2547   vlib_thread_registration_t *tr;
2548   vlib_thread_main_t *tm = vlib_get_thread_main ();
2549   uword *bitmap = 0;
2550   u32 i;
2551   ip4_add_del_interface_address_callback_t cb4;
2552   vlib_node_t *node;
2553
2554   sm->vnet_main = vnet_get_main ();
2555   sm->ip4_main = im;
2556   sm->ip4_lookup_main = lm;
2557   sm->api_main = vlibapi_get_main ();
2558   sm->first_worker_index = 0;
2559   sm->num_workers = 0;
2560   sm->workers = 0;
2561   sm->port_per_thread = 0xffff - 1024;
2562   sm->fq_in2out_index = ~0;
2563   sm->fq_in2out_output_index = ~0;
2564   sm->fq_out2in_index = ~0;
2565
2566   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
2567   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
2568   sm->forwarding_enabled = 0;
2569   sm->log_class = vlib_log_register_class ("nat", 0);
2570   sm->log_level = SNAT_LOG_ERROR;
2571   sm->mss_clamping = 0;
2572
2573   node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
2574   sm->error_node_index = node->index;
2575
2576   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-in2out");
2577   sm->pre_in2out_node_index = node->index;
2578   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-out2in");
2579   sm->pre_out2in_node_index = node->index;
2580
2581   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-in2out");
2582   sm->pre_in2out_node_index = node->index;
2583
2584   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-out2in");
2585   sm->pre_out2in_node_index = node->index;
2586
2587   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out");
2588   sm->in2out_node_index = node->index;
2589   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output");
2590   sm->in2out_output_node_index = node->index;
2591   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-fast");
2592   sm->in2out_fast_node_index = node->index;
2593   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-slowpath");
2594   sm->in2out_slowpath_node_index = node->index;
2595   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output-slowpath");
2596   sm->in2out_slowpath_output_node_index = node->index;
2597
2598   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
2599   sm->ed_in2out_node_index = node->index;
2600   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-slowpath");
2601   sm->ed_in2out_slowpath_node_index = node->index;
2602
2603   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in");
2604   sm->out2in_node_index = node->index;
2605   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in-fast");
2606   sm->out2in_fast_node_index = node->index;
2607
2608   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
2609   sm->ed_out2in_node_index = node->index;
2610   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in-slowpath");
2611   sm->ed_out2in_slowpath_node_index = node->index;
2612
2613   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpinning");
2614   sm->hairpinning_node_index = node->index;
2615   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-dst");
2616   sm->hairpin_dst_node_index = node->index;
2617   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-src");
2618   sm->hairpin_src_node_index = node->index;
2619   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpinning");
2620   sm->ed_hairpinning_node_index = node->index;
2621   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-dst");
2622   sm->ed_hairpin_dst_node_index = node->index;
2623   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-src");
2624   sm->ed_hairpin_src_node_index = node->index;
2625
2626   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2627   if (p)
2628     {
2629       tr = (vlib_thread_registration_t *) p[0];
2630       if (tr)
2631         {
2632           sm->num_workers = tr->count;
2633           sm->first_worker_index = tr->first_index;
2634         }
2635     }
2636
2637   vec_validate (sm->per_thread_data, tm->n_vlib_mains - 1);
2638
2639   /* Use all available workers by default */
2640   if (sm->num_workers > 1)
2641     {
2642       for (i = 0; i < sm->num_workers; i++)
2643         bitmap = clib_bitmap_set (bitmap, i, 1);
2644       snat_set_workers (bitmap);
2645       clib_bitmap_free (bitmap);
2646     }
2647   else
2648     {
2649       sm->per_thread_data[0].snat_thread_index = 0;
2650     }
2651
2652   error = snat_api_init (vm, sm);
2653   if (error)
2654     return error;
2655
2656   /* Set up the interface address add/del callback */
2657   cb4.function = snat_ip4_add_del_interface_address_cb;
2658   cb4.function_opaque = 0;
2659
2660   vec_add1 (im->add_del_interface_address_callbacks, cb4);
2661
2662   cb4.function = nat_ip4_add_del_addr_only_sm_cb;
2663   cb4.function_opaque = 0;
2664
2665   vec_add1 (im->add_del_interface_address_callbacks, cb4);
2666
2667   nat_dpo_module_init ();
2668
2669   /* Init counters */
2670   sm->total_users.name = "total-users";
2671   sm->total_users.stat_segment_name = "/nat44/total-users";
2672   vlib_validate_simple_counter (&sm->total_users, 0);
2673   vlib_zero_simple_counter (&sm->total_users, 0);
2674   sm->total_sessions.name = "total-sessions";
2675   sm->total_sessions.stat_segment_name = "/nat44/total-sessions";
2676   vlib_validate_simple_counter (&sm->total_sessions, 0);
2677   vlib_zero_simple_counter (&sm->total_sessions, 0);
2678   sm->user_limit_reached.name = "user-limit-reached";
2679   sm->user_limit_reached.stat_segment_name = "/nat44/user-limit-reached";
2680   vlib_validate_simple_counter (&sm->user_limit_reached, 0);
2681   vlib_zero_simple_counter (&sm->user_limit_reached, 0);
2682
2683 #define _(x)                                            \
2684   sm->counters.fastpath.in2out.x.name = #x;             \
2685   sm->counters.fastpath.in2out.x.stat_segment_name =    \
2686       "/nat44/in2out/fastpath/" #x;                     \
2687   sm->counters.slowpath.in2out.x.name = #x;             \
2688   sm->counters.slowpath.in2out.x.stat_segment_name =    \
2689       "/nat44/in2out/slowpath/" #x;                     \
2690   sm->counters.fastpath.out2in.x.name = #x;             \
2691   sm->counters.fastpath.out2in.x.stat_segment_name =    \
2692       "/nat44/out2in/fastpath/" #x;                     \
2693   sm->counters.slowpath.out2in.x.name = #x;             \
2694   sm->counters.slowpath.out2in.x.stat_segment_name =    \
2695       "/nat44/out2in/slowpath/" #x;                     \
2696   sm->counters.fastpath.in2out_ed.x.name = #x;          \
2697   sm->counters.fastpath.in2out_ed.x.stat_segment_name = \
2698       "/nat44/ed/in2out/fastpath/" #x;                  \
2699   sm->counters.slowpath.in2out_ed.x.name = #x;          \
2700   sm->counters.slowpath.in2out_ed.x.stat_segment_name = \
2701       "/nat44/ed/in2out/slowpath/" #x;                  \
2702   sm->counters.fastpath.out2in_ed.x.name = #x;          \
2703   sm->counters.fastpath.out2in_ed.x.stat_segment_name = \
2704       "/nat44/ed/out2in/fastpath/" #x;                  \
2705   sm->counters.slowpath.out2in_ed.x.name = #x;          \
2706   sm->counters.slowpath.out2in_ed.x.stat_segment_name = \
2707       "/nat44/ed/out2in/slowpath/" #x;
2708   foreach_nat_counter;
2709 #undef _
2710   sm->counters.hairpinning.name = "hairpinning";
2711   sm->counters.hairpinning.stat_segment_name = "/nat44/hairpinning";
2712
2713   /* Init IPFIX logging */
2714   snat_ipfix_logging_init (vm);
2715
2716   /* Init NAT64 */
2717   error = nat64_init (vm);
2718   if (error)
2719     return error;
2720
2721   ip4_table_bind_callback_t cbt4 = {
2722     .function = snat_ip4_table_bind,
2723   };
2724   vec_add1 (ip4_main.table_bind_callbacks, cbt4);
2725
2726   nat_fib_src_hi = fib_source_allocate ("nat-hi",
2727                                         FIB_SOURCE_PRIORITY_HI,
2728                                         FIB_SOURCE_BH_SIMPLE);
2729   nat_fib_src_low = fib_source_allocate ("nat-low",
2730                                          FIB_SOURCE_PRIORITY_LOW,
2731                                          FIB_SOURCE_BH_SIMPLE);
2732
2733   test_key_calc_split ();
2734   return error;
2735 }
2736
2737 VLIB_INIT_FUNCTION (snat_init);
2738
2739 void
2740 snat_free_outside_address_and_port (snat_address_t * addresses,
2741                                     u32 thread_index,
2742                                     ip4_address_t * addr,
2743                                     u16 port, nat_protocol_t protocol)
2744 {
2745   snat_address_t *a;
2746   u32 address_index;
2747   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2748
2749   for (address_index = 0; address_index < vec_len (addresses);
2750        address_index++)
2751     {
2752       if (addresses[address_index].addr.as_u32 == addr->as_u32)
2753         break;
2754     }
2755
2756   ASSERT (address_index < vec_len (addresses));
2757
2758   a = addresses + address_index;
2759
2760   switch (protocol)
2761     {
2762 #define _(N, i, n, s) \
2763     case NAT_PROTOCOL_##N: \
2764       ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
2765       --a->busy_##n##_port_refcounts[port_host_byte_order]; \
2766       a->busy_##n##_ports--; \
2767       a->busy_##n##_ports_per_thread[thread_index]--; \
2768       break;
2769       foreach_nat_protocol
2770 #undef _
2771     default:
2772       nat_elog_info ("unknown protocol");
2773       return;
2774     }
2775 }
2776
2777 static int
2778 nat_set_outside_address_and_port (snat_address_t * addresses,
2779                                   u32 thread_index, ip4_address_t addr,
2780                                   u16 port, nat_protocol_t protocol)
2781 {
2782   snat_address_t *a = 0;
2783   u32 address_index;
2784   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2785
2786   for (address_index = 0; address_index < vec_len (addresses);
2787        address_index++)
2788     {
2789       if (addresses[address_index].addr.as_u32 != addr.as_u32)
2790         continue;
2791
2792       a = addresses + address_index;
2793       switch (protocol)
2794         {
2795 #define _(N, j, n, s) \
2796         case NAT_PROTOCOL_##N: \
2797           if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
2798             return VNET_API_ERROR_INSTANCE_IN_USE; \
2799           ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
2800           a->busy_##n##_ports_per_thread[thread_index]++; \
2801           a->busy_##n##_ports++; \
2802           return 0;
2803           foreach_nat_protocol
2804 #undef _
2805         default:
2806           nat_elog_info ("unknown protocol");
2807           return 1;
2808         }
2809     }
2810
2811   return VNET_API_ERROR_NO_SUCH_ENTRY;
2812 }
2813
2814 int
2815 snat_static_mapping_match (snat_main_t * sm,
2816                            ip4_address_t match_addr,
2817                            u16 match_port,
2818                            u32 match_fib_index,
2819                            nat_protocol_t match_protocol,
2820                            ip4_address_t * mapping_addr,
2821                            u16 * mapping_port,
2822                            u32 * mapping_fib_index,
2823                            u8 by_external,
2824                            u8 * is_addr_only,
2825                            twice_nat_type_t * twice_nat,
2826                            lb_nat_type_t * lb, ip4_address_t * ext_host_addr,
2827                            u8 * is_identity_nat, snat_static_mapping_t ** out)
2828 {
2829   clib_bihash_kv_8_8_t kv, value;
2830   clib_bihash_8_8_t *mapping_hash;
2831   snat_static_mapping_t *m;
2832   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
2833   nat44_lb_addr_port_t *local;
2834   u8 backend_index;
2835
2836   if (!by_external)
2837     {
2838       mapping_hash = &sm->static_mapping_by_local;
2839       init_nat_k (&kv, match_addr, match_port, match_fib_index,
2840                   match_protocol);
2841       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2842         {
2843           /* Try address only mapping */
2844           init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
2845           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2846             return 1;
2847         }
2848     }
2849   else
2850     {
2851       mapping_hash = &sm->static_mapping_by_external;
2852       init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
2853       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2854         {
2855           /* Try address only mapping */
2856           init_nat_k (&kv, match_addr, 0, 0, 0);
2857           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2858             return 1;
2859         }
2860     }
2861
2862   m = pool_elt_at_index (sm->static_mappings, value.value);
2863
2864   if (by_external)
2865     {
2866       if (is_lb_static_mapping (m))
2867         {
2868           if (PREDICT_FALSE (lb != 0))
2869             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
2870           if (m->affinity && !nat_affinity_find_and_lock (ext_host_addr[0],
2871                                                           match_addr,
2872                                                           match_protocol,
2873                                                           match_port,
2874                                                           &backend_index))
2875             {
2876               local = pool_elt_at_index (m->locals, backend_index);
2877               *mapping_addr = local->addr;
2878               *mapping_port = local->port;
2879               *mapping_fib_index = local->fib_index;
2880               goto end;
2881             }
2882           // pick locals matching this worker
2883           if (PREDICT_FALSE (sm->num_workers > 1))
2884             {
2885               u32 thread_index = vlib_get_thread_index ();
2886               /* *INDENT-OFF* */
2887               pool_foreach_index (i, m->locals,
2888               ({
2889                 local = pool_elt_at_index (m->locals, i);
2890
2891                 ip4_header_t ip = {
2892                   .src_address = local->addr,
2893                 };
2894
2895                 if (sm->worker_in2out_cb (&ip, m->fib_index, 0) ==
2896                     thread_index)
2897                   {
2898                     vec_add1 (tmp, i);
2899                   }
2900               }));
2901               /* *INDENT-ON* */
2902               ASSERT (vec_len (tmp) != 0);
2903             }
2904           else
2905             {
2906               /* *INDENT-OFF* */
2907               pool_foreach_index (i, m->locals,
2908               ({
2909                 vec_add1 (tmp, i);
2910               }));
2911               /* *INDENT-ON* */
2912             }
2913           hi = vec_len (tmp) - 1;
2914           local = pool_elt_at_index (m->locals, tmp[hi]);
2915           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
2916           while (lo < hi)
2917             {
2918               mid = ((hi - lo) >> 1) + lo;
2919               local = pool_elt_at_index (m->locals, tmp[mid]);
2920               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
2921             }
2922           local = pool_elt_at_index (m->locals, tmp[lo]);
2923           if (!(local->prefix >= rand))
2924             return 1;
2925           *mapping_addr = local->addr;
2926           *mapping_port = local->port;
2927           *mapping_fib_index = local->fib_index;
2928           if (m->affinity)
2929             {
2930               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
2931                                                 match_protocol, match_port,
2932                                                 tmp[lo], m->affinity,
2933                                                 m->affinity_per_service_list_head_index))
2934                 nat_elog_info ("create affinity record failed");
2935             }
2936           vec_free (tmp);
2937         }
2938       else
2939         {
2940           if (PREDICT_FALSE (lb != 0))
2941             *lb = NO_LB_NAT;
2942           *mapping_fib_index = m->fib_index;
2943           *mapping_addr = m->local_addr;
2944           /* Address only mapping doesn't change port */
2945           *mapping_port = is_addr_only_static_mapping (m) ? match_port
2946             : m->local_port;
2947         }
2948     }
2949   else
2950     {
2951       *mapping_addr = m->external_addr;
2952       /* Address only mapping doesn't change port */
2953       *mapping_port = is_addr_only_static_mapping (m) ? match_port
2954         : m->external_port;
2955       *mapping_fib_index = sm->outside_fib_index;
2956     }
2957
2958 end:
2959   if (PREDICT_FALSE (is_addr_only != 0))
2960     *is_addr_only = is_addr_only_static_mapping (m);
2961
2962   if (PREDICT_FALSE (twice_nat != 0))
2963     *twice_nat = m->twice_nat;
2964
2965   if (PREDICT_FALSE (is_identity_nat != 0))
2966     *is_identity_nat = is_identity_static_mapping (m);
2967
2968   if (out != 0)
2969     *out = m;
2970
2971   return 0;
2972 }
2973
2974 int
2975 snat_alloc_outside_address_and_port (snat_address_t * addresses,
2976                                      u32 fib_index,
2977                                      u32 thread_index,
2978                                      nat_protocol_t proto,
2979                                      ip4_address_t * addr,
2980                                      u16 * port,
2981                                      u16 port_per_thread,
2982                                      u32 snat_thread_index)
2983 {
2984   snat_main_t *sm = &snat_main;
2985
2986   return sm->alloc_addr_and_port (addresses, fib_index, thread_index, proto,
2987                                   addr, port, port_per_thread,
2988                                   snat_thread_index);
2989 }
2990
2991 static int
2992 nat_alloc_addr_and_port_default (snat_address_t * addresses,
2993                                  u32 fib_index,
2994                                  u32 thread_index,
2995                                  nat_protocol_t proto,
2996                                  ip4_address_t * addr,
2997                                  u16 * port,
2998                                  u16 port_per_thread, u32 snat_thread_index)
2999 {
3000   int i;
3001   snat_address_t *a, *ga = 0;
3002   u32 portnum;
3003
3004   for (i = 0; i < vec_len (addresses); i++)
3005     {
3006       a = addresses + i;
3007       switch (proto)
3008         {
3009 #define _(N, j, n, s) \
3010         case NAT_PROTOCOL_##N: \
3011           if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
3012             { \
3013               if (a->fib_index == fib_index) \
3014                 { \
3015                   while (1) \
3016                     { \
3017                       portnum = (port_per_thread * \
3018                         snat_thread_index) + \
3019                         snat_random_port(0, port_per_thread - 1) + 1024; \
3020                       if (a->busy_##n##_port_refcounts[portnum]) \
3021                         continue; \
3022                       --a->busy_##n##_port_refcounts[portnum]; \
3023                       a->busy_##n##_ports_per_thread[thread_index]++; \
3024                       a->busy_##n##_ports++; \
3025                       *addr = a->addr; \
3026                       *port = clib_host_to_net_u16(portnum); \
3027                       return 0; \
3028                     } \
3029                 } \
3030               else if (a->fib_index == ~0) \
3031                 { \
3032                   ga = a; \
3033                 } \
3034             } \
3035           break;
3036           foreach_nat_protocol
3037 #undef _
3038         default:
3039           nat_elog_info ("unknown protocol");
3040           return 1;
3041         }
3042
3043     }
3044
3045   if (ga)
3046     {
3047       a = ga;
3048       switch (proto)
3049         {
3050 #define _(N, j, n, s) \
3051         case NAT_PROTOCOL_##N: \
3052           while (1) \
3053             { \
3054               portnum = (port_per_thread * \
3055                 snat_thread_index) + \
3056                 snat_random_port(0, port_per_thread - 1) + 1024; \
3057               if (a->busy_##n##_port_refcounts[portnum]) \
3058                 continue; \
3059               ++a->busy_##n##_port_refcounts[portnum]; \
3060               a->busy_##n##_ports_per_thread[thread_index]++; \
3061               a->busy_##n##_ports++; \
3062               *addr = a->addr; \
3063               *port = clib_host_to_net_u16(portnum); \
3064               return 0; \
3065             }
3066           break;
3067           foreach_nat_protocol
3068 #undef _
3069         default:
3070           nat_elog_info ("unknown protocol");
3071           return 1;
3072         }
3073     }
3074
3075   /* Totally out of translations to use... */
3076   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
3077   return 1;
3078 }
3079
3080 static int
3081 nat_alloc_addr_and_port_mape (snat_address_t * addresses, u32 fib_index,
3082                               u32 thread_index, nat_protocol_t proto,
3083                               ip4_address_t * addr, u16 * port,
3084                               u16 port_per_thread, u32 snat_thread_index)
3085 {
3086   snat_main_t *sm = &snat_main;
3087   snat_address_t *a = addresses;
3088   u16 m, ports, portnum, A, j;
3089   m = 16 - (sm->psid_offset + sm->psid_length);
3090   ports = (1 << (16 - sm->psid_length)) - (1 << m);
3091
3092   if (!vec_len (addresses))
3093     goto exhausted;
3094
3095   switch (proto)
3096     {
3097 #define _(N, i, n, s) \
3098     case NAT_PROTOCOL_##N: \
3099       if (a->busy_##n##_ports < ports) \
3100         { \
3101           while (1) \
3102             { \
3103               A = snat_random_port(1, pow2_mask(sm->psid_offset)); \
3104               j = snat_random_port(0, pow2_mask(m)); \
3105               portnum = A | (sm->psid << sm->psid_offset) | (j << (16 - m)); \
3106               if (a->busy_##n##_port_refcounts[portnum]) \
3107                 continue; \
3108               ++a->busy_##n##_port_refcounts[portnum]; \
3109               a->busy_##n##_ports++; \
3110               *addr = a->addr; \
3111               *port = clib_host_to_net_u16 (portnum); \
3112               return 0; \
3113             } \
3114         } \
3115       break;
3116       foreach_nat_protocol
3117 #undef _
3118     default:
3119       nat_elog_info ("unknown protocol");
3120       return 1;
3121     }
3122
3123 exhausted:
3124   /* Totally out of translations to use... */
3125   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
3126   return 1;
3127 }
3128
3129 static int
3130 nat_alloc_addr_and_port_range (snat_address_t * addresses, u32 fib_index,
3131                                u32 thread_index, nat_protocol_t proto,
3132                                ip4_address_t * addr, u16 * port,
3133                                u16 port_per_thread, u32 snat_thread_index)
3134 {
3135   snat_main_t *sm = &snat_main;
3136   snat_address_t *a = addresses;
3137   u16 portnum, ports;
3138
3139   ports = sm->end_port - sm->start_port + 1;
3140
3141   if (!vec_len (addresses))
3142     goto exhausted;
3143
3144   switch (proto)
3145     {
3146 #define _(N, i, n, s) \
3147     case NAT_PROTOCOL_##N: \
3148       if (a->busy_##n##_ports < ports) \
3149         { \
3150           while (1) \
3151             { \
3152               portnum = snat_random_port(sm->start_port, sm->end_port); \
3153               if (a->busy_##n##_port_refcounts[portnum]) \
3154                 continue; \
3155               ++a->busy_##n##_port_refcounts[portnum]; \
3156               a->busy_##n##_ports++; \
3157               *addr = a->addr; \
3158               *port = clib_host_to_net_u16 (portnum); \
3159               return 0; \
3160             } \
3161         } \
3162       break;
3163       foreach_nat_protocol
3164 #undef _
3165     default:
3166       nat_elog_info ("unknown protocol");
3167       return 1;
3168     }
3169
3170 exhausted:
3171   /* Totally out of translations to use... */
3172   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
3173   return 1;
3174 }
3175
3176 void
3177 nat44_add_del_address_dpo (ip4_address_t addr, u8 is_add)
3178 {
3179   dpo_id_t dpo_v4 = DPO_INVALID;
3180   fib_prefix_t pfx = {
3181     .fp_proto = FIB_PROTOCOL_IP4,
3182     .fp_len = 32,
3183     .fp_addr.ip4.as_u32 = addr.as_u32,
3184   };
3185
3186   if (is_add)
3187     {
3188       nat_dpo_create (DPO_PROTO_IP4, 0, &dpo_v4);
3189       fib_table_entry_special_dpo_add (0, &pfx, nat_fib_src_hi,
3190                                        FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4);
3191       dpo_reset (&dpo_v4);
3192     }
3193   else
3194     {
3195       fib_table_entry_special_remove (0, &pfx, nat_fib_src_hi);
3196     }
3197 }
3198
3199 u8 *
3200 format_session_kvp (u8 * s, va_list * args)
3201 {
3202   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3203
3204   s = format (s, "%U session-index %llu", format_snat_key, v->key, v->value);
3205
3206   return s;
3207 }
3208
3209 u8 *
3210 format_static_mapping_kvp (u8 * s, va_list * args)
3211 {
3212   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3213
3214   s = format (s, "%U static-mapping-index %llu",
3215               format_snat_key, v->key, v->value);
3216
3217   return s;
3218 }
3219
3220 u8 *
3221 format_user_kvp (u8 * s, va_list * args)
3222 {
3223   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3224   snat_user_key_t k;
3225
3226   k.as_u64 = v->key;
3227
3228   s = format (s, "%U fib %d user-index %llu", format_ip4_address, &k.addr,
3229               k.fib_index, v->value);
3230
3231   return s;
3232 }
3233
3234 u8 *
3235 format_ed_session_kvp (u8 * s, va_list * args)
3236 {
3237   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
3238
3239   u8 proto;
3240   u16 r_port, l_port;
3241   ip4_address_t l_addr, r_addr;
3242   u32 fib_index;
3243
3244   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
3245   s =
3246     format (s,
3247             "local %U:%d remote %U:%d proto %U fib %d thread-index %u session-index %u",
3248             format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
3249             format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
3250             format_ip_protocol, proto, fib_index,
3251             ed_value_get_session_index (v), ed_value_get_thread_index (v));
3252
3253   return s;
3254 }
3255
3256 static u32
3257 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0,
3258                            u8 is_output)
3259 {
3260   snat_main_t *sm = &snat_main;
3261   u32 next_worker_index = 0;
3262   u32 hash;
3263
3264   next_worker_index = sm->first_worker_index;
3265   hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) +
3266     (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >> 24);
3267
3268   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
3269     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
3270   else
3271     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
3272
3273   return next_worker_index;
3274 }
3275
3276 static u32
3277 snat_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip0,
3278                            u32 rx_fib_index0, u8 is_output)
3279 {
3280   snat_main_t *sm = &snat_main;
3281   udp_header_t *udp;
3282   u16 port;
3283   clib_bihash_kv_8_8_t kv, value;
3284   snat_static_mapping_t *m;
3285   u32 proto;
3286   u32 next_worker_index = 0;
3287
3288   /* first try static mappings without port */
3289   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3290     {
3291       init_nat_k (&kv, ip0->dst_address, 0, rx_fib_index0, 0);
3292       if (!clib_bihash_search_8_8
3293           (&sm->static_mapping_by_external, &kv, &value))
3294         {
3295           m = pool_elt_at_index (sm->static_mappings, value.value);
3296           return m->workers[0];
3297         }
3298     }
3299
3300   proto = ip_proto_to_nat_proto (ip0->protocol);
3301   udp = ip4_next_header (ip0);
3302   port = udp->dst_port;
3303
3304   /* unknown protocol */
3305   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
3306     {
3307       /* use current thread */
3308       return vlib_get_thread_index ();
3309     }
3310
3311   if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_ICMP))
3312     {
3313       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3314       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3315       if (!icmp_type_is_error_message
3316           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3317         port = vnet_buffer (b)->ip.reass.l4_src_port;
3318       else
3319         {
3320           /* if error message, then it's not fragmented and we can access it */
3321           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3322           proto = ip_proto_to_nat_proto (inner_ip->protocol);
3323           void *l4_header = ip4_next_header (inner_ip);
3324           switch (proto)
3325             {
3326             case NAT_PROTOCOL_ICMP:
3327               icmp = (icmp46_header_t *) l4_header;
3328               echo = (icmp_echo_header_t *) (icmp + 1);
3329               port = echo->identifier;
3330               break;
3331             case NAT_PROTOCOL_UDP:
3332             case NAT_PROTOCOL_TCP:
3333               port = ((tcp_udp_header_t *) l4_header)->src_port;
3334               break;
3335             default:
3336               return vlib_get_thread_index ();
3337             }
3338         }
3339     }
3340
3341   /* try static mappings with port */
3342   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3343     {
3344       init_nat_k (&kv, ip0->dst_address, port, rx_fib_index0, proto);
3345       if (!clib_bihash_search_8_8
3346           (&sm->static_mapping_by_external, &kv, &value))
3347         {
3348           m = pool_elt_at_index (sm->static_mappings, value.value);
3349           return m->workers[0];
3350         }
3351     }
3352
3353   /* worker by outside port */
3354   next_worker_index = sm->first_worker_index;
3355   next_worker_index +=
3356     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3357   return next_worker_index;
3358 }
3359
3360 static u32
3361 nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index,
3362                                u8 is_output)
3363 {
3364   snat_main_t *sm = &snat_main;
3365   u32 next_worker_index = sm->first_worker_index;
3366   u32 hash;
3367
3368   clib_bihash_kv_16_8_t kv16, value16;
3369   snat_main_per_thread_data_t *tsm;
3370   udp_header_t *udp;
3371
3372   if (PREDICT_FALSE (is_output))
3373     {
3374       u32 fib_index = sm->outside_fib_index;
3375       nat_outside_fib_t *outside_fib;
3376       fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3377       fib_prefix_t pfx = {
3378         .fp_proto = FIB_PROTOCOL_IP4,
3379         .fp_len = 32,
3380         .fp_addr = {
3381                     .ip4.as_u32 = ip->dst_address.as_u32,
3382                     }
3383         ,
3384       };
3385
3386       udp = ip4_next_header (ip);
3387
3388       switch (vec_len (sm->outside_fibs))
3389         {
3390         case 0:
3391           fib_index = sm->outside_fib_index;
3392           break;
3393         case 1:
3394           fib_index = sm->outside_fibs[0].fib_index;
3395           break;
3396         default:
3397             /* *INDENT-OFF* */
3398             vec_foreach (outside_fib, sm->outside_fibs)
3399               {
3400                 fei = fib_table_lookup (outside_fib->fib_index, &pfx);
3401                 if (FIB_NODE_INDEX_INVALID != fei)
3402                   {
3403                     if (fib_entry_get_resolving_interface (fei) != ~0)
3404                       {
3405                         fib_index = outside_fib->fib_index;
3406                         break;
3407                       }
3408                   }
3409               }
3410             /* *INDENT-ON* */
3411           break;
3412         }
3413
3414       init_ed_k (&kv16, ip->src_address, udp->src_port, ip->dst_address,
3415                  udp->dst_port, fib_index, ip->protocol);
3416
3417       if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3418                                                   &kv16, &value16)))
3419         {
3420           tsm =
3421             vec_elt_at_index (sm->per_thread_data,
3422                               ed_value_get_thread_index (&value16));
3423           next_worker_index += tsm->thread_index;
3424
3425           nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE (session)",
3426                                   next_worker_index, fib_index,
3427                                   clib_net_to_host_u32 (ip->
3428                                                         src_address.as_u32),
3429                                   clib_net_to_host_u32 (ip->
3430                                                         dst_address.as_u32));
3431
3432           return next_worker_index;
3433         }
3434     }
3435
3436   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3437     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3438
3439   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
3440     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
3441   else
3442     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
3443
3444   if (PREDICT_TRUE (!is_output))
3445     {
3446       nat_elog_debug_handoff ("HANDOFF IN2OUT",
3447                               next_worker_index, rx_fib_index,
3448                               clib_net_to_host_u32 (ip->src_address.as_u32),
3449                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3450     }
3451   else
3452     {
3453       nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE",
3454                               next_worker_index, rx_fib_index,
3455                               clib_net_to_host_u32 (ip->src_address.as_u32),
3456                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3457     }
3458
3459   return next_worker_index;
3460 }
3461
3462 static u32
3463 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
3464                                u32 rx_fib_index, u8 is_output)
3465 {
3466   snat_main_t *sm = &snat_main;
3467   clib_bihash_kv_8_8_t kv, value;
3468   clib_bihash_kv_16_8_t kv16, value16;
3469   snat_main_per_thread_data_t *tsm;
3470
3471   u32 proto, next_worker_index = 0;
3472   udp_header_t *udp;
3473   u16 port;
3474   snat_static_mapping_t *m;
3475   u32 hash;
3476
3477   proto = ip_proto_to_nat_proto (ip->protocol);
3478
3479   if (PREDICT_TRUE (proto == NAT_PROTOCOL_UDP || proto == NAT_PROTOCOL_TCP))
3480     {
3481       udp = ip4_next_header (ip);
3482
3483       init_ed_k (&kv16, ip->dst_address, udp->dst_port, ip->src_address,
3484                  udp->src_port, rx_fib_index, ip->protocol);
3485
3486       if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3487                                                   &kv16, &value16)))
3488         {
3489           tsm =
3490             vec_elt_at_index (sm->per_thread_data,
3491                               ed_value_get_thread_index (&value16));
3492           vnet_buffer2 (b)->nat.ed_out2in_nat_session_index =
3493             ed_value_get_session_index (&value16);
3494           next_worker_index = sm->first_worker_index + tsm->thread_index;
3495           nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
3496                                   next_worker_index, rx_fib_index,
3497                                   clib_net_to_host_u32 (ip->
3498                                                         src_address.as_u32),
3499                                   clib_net_to_host_u32 (ip->
3500                                                         dst_address.as_u32));
3501           return next_worker_index;
3502         }
3503     }
3504   else if (proto == NAT_PROTOCOL_ICMP)
3505     {
3506       if (!get_icmp_o2i_ed_key (b, ip, rx_fib_index, ~0, ~0, 0, 0, 0, &kv16))
3507         {
3508           if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3509                                                       &kv16, &value16)))
3510             {
3511               tsm =
3512                 vec_elt_at_index (sm->per_thread_data,
3513                                   ed_value_get_thread_index (&value16));
3514               next_worker_index = sm->first_worker_index + tsm->thread_index;
3515               nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
3516                                       next_worker_index, rx_fib_index,
3517                                       clib_net_to_host_u32 (ip->
3518                                                             src_address.as_u32),
3519                                       clib_net_to_host_u32 (ip->
3520                                                             dst_address.as_u32));
3521               return next_worker_index;
3522             }
3523         }
3524     }
3525
3526   /* first try static mappings without port */
3527   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3528     {
3529       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
3530       if (!clib_bihash_search_8_8
3531           (&sm->static_mapping_by_external, &kv, &value))
3532         {
3533           m = pool_elt_at_index (sm->static_mappings, value.value);
3534           next_worker_index = m->workers[0];
3535           goto done;
3536         }
3537     }
3538
3539   /* unknown protocol */
3540   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
3541     {
3542       /* use current thread */
3543       next_worker_index = vlib_get_thread_index ();
3544       goto done;
3545     }
3546
3547   udp = ip4_next_header (ip);
3548   port = udp->dst_port;
3549
3550   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
3551     {
3552       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3553       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3554       if (!icmp_type_is_error_message
3555           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3556         port = vnet_buffer (b)->ip.reass.l4_src_port;
3557       else
3558         {
3559           /* if error message, then it's not fragmented and we can access it */
3560           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3561           proto = ip_proto_to_nat_proto (inner_ip->protocol);
3562           void *l4_header = ip4_next_header (inner_ip);
3563           switch (proto)
3564             {
3565             case NAT_PROTOCOL_ICMP:
3566               icmp = (icmp46_header_t *) l4_header;
3567               echo = (icmp_echo_header_t *) (icmp + 1);
3568               port = echo->identifier;
3569               break;
3570             case NAT_PROTOCOL_UDP:
3571             case NAT_PROTOCOL_TCP:
3572               port = ((tcp_udp_header_t *) l4_header)->src_port;
3573               break;
3574             default:
3575               next_worker_index = vlib_get_thread_index ();
3576               goto done;
3577             }
3578         }
3579     }
3580
3581   /* try static mappings with port */
3582   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3583     {
3584       init_nat_k (&kv, ip->dst_address, proto, 0, port);
3585       if (!clib_bihash_search_8_8
3586           (&sm->static_mapping_by_external, &kv, &value))
3587         {
3588           m = pool_elt_at_index (sm->static_mappings, value.value);
3589           if (!is_lb_static_mapping (m))
3590             {
3591               next_worker_index = m->workers[0];
3592               goto done;
3593             }
3594
3595           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3596             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3597
3598           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
3599             next_worker_index =
3600               m->workers[hash & (_vec_len (m->workers) - 1)];
3601           else
3602             next_worker_index = m->workers[hash % _vec_len (m->workers)];
3603           goto done;
3604         }
3605     }
3606
3607   /* worker by outside port */
3608   next_worker_index = sm->first_worker_index;
3609   next_worker_index +=
3610     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3611
3612 done:
3613   nat_elog_debug_handoff ("HANDOFF OUT2IN", next_worker_index, rx_fib_index,
3614                           clib_net_to_host_u32 (ip->src_address.as_u32),
3615                           clib_net_to_host_u32 (ip->dst_address.as_u32));
3616   return next_worker_index;
3617 }
3618
3619 void
3620 nat_ha_sadd_cb (ip4_address_t * in_addr, u16 in_port,
3621                 ip4_address_t * out_addr, u16 out_port,
3622                 ip4_address_t * eh_addr, u16 eh_port,
3623                 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
3624                 u32 fib_index, u16 flags, u32 thread_index)
3625 {
3626   snat_main_t *sm = &snat_main;
3627   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
3628   snat_user_t *u;
3629   snat_session_t *s;
3630   clib_bihash_kv_8_8_t kv;
3631   vlib_main_t *vm = vlib_get_main ();
3632   f64 now = vlib_time_now (vm);
3633   nat_outside_fib_t *outside_fib;
3634   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3635   fib_prefix_t pfx = {
3636     .fp_proto = FIB_PROTOCOL_IP4,
3637     .fp_len = 32,
3638     .fp_addr = {
3639                 .ip4.as_u32 = eh_addr->as_u32,
3640                 },
3641   };
3642
3643   if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
3644     {
3645       if (nat_set_outside_address_and_port
3646           (sm->addresses, thread_index, *out_addr, out_port, proto))
3647         return;
3648     }
3649
3650   u = nat_user_get_or_create (sm, in_addr, fib_index, thread_index);
3651   if (!u)
3652     return;
3653
3654   s = nat_session_alloc_or_recycle (sm, u, thread_index, now);
3655   if (!s)
3656     return;
3657
3658   if (sm->endpoint_dependent)
3659     {
3660       nat_ed_lru_insert (tsm, s, now, nat_proto_to_ip_proto (proto));
3661     }
3662
3663   s->out2in.addr.as_u32 = out_addr->as_u32;
3664   s->out2in.port = out_port;
3665   s->nat_proto = proto;
3666   s->last_heard = now;
3667   s->flags = flags;
3668   s->ext_host_addr.as_u32 = eh_addr->as_u32;
3669   s->ext_host_port = eh_port;
3670   user_session_increment (sm, u, snat_is_session_static (s));
3671   switch (vec_len (sm->outside_fibs))
3672     {
3673     case 0:
3674       s->out2in.fib_index = sm->outside_fib_index;
3675       break;
3676     case 1:
3677       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
3678       break;
3679     default:
3680       /* *INDENT-OFF* */
3681       vec_foreach (outside_fib, sm->outside_fibs)
3682         {
3683           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
3684           if (FIB_NODE_INDEX_INVALID != fei)
3685             {
3686               if (fib_entry_get_resolving_interface (fei) != ~0)
3687                 {
3688                   s->out2in.fib_index = outside_fib->fib_index;
3689                   break;
3690                 }
3691             }
3692         }
3693       /* *INDENT-ON* */
3694       break;
3695     }
3696   init_nat_o2i_kv (&kv, s, s - tsm->sessions);
3697   if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 1))
3698     nat_elog_warn ("out2in key add failed");
3699
3700   s->in2out.addr.as_u32 = in_addr->as_u32;
3701   s->in2out.port = in_port;
3702   s->in2out.fib_index = fib_index;
3703   init_nat_i2o_kv (&kv, s, s - tsm->sessions);
3704   if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 1))
3705     nat_elog_warn ("in2out key add failed");
3706 }
3707
3708 void
3709 nat_ha_sdel_cb (ip4_address_t * out_addr, u16 out_port,
3710                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
3711                 u32 ti)
3712 {
3713   snat_main_t *sm = &snat_main;
3714   clib_bihash_kv_8_8_t kv, value;
3715   u32 thread_index;
3716   snat_session_t *s;
3717   snat_main_per_thread_data_t *tsm;
3718
3719   if (sm->num_workers > 1)
3720     thread_index =
3721       sm->first_worker_index +
3722       (sm->workers[(clib_net_to_host_u16 (out_port) -
3723                     1024) / sm->port_per_thread]);
3724   else
3725     thread_index = sm->num_workers;
3726   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
3727
3728   init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
3729   if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
3730     return;
3731
3732   s = pool_elt_at_index (tsm->sessions, value.value);
3733   nat_free_session_data (sm, s, thread_index, 1);
3734   nat44_delete_session (sm, s, thread_index);
3735 }
3736
3737 void
3738 nat_ha_sref_cb (ip4_address_t * out_addr, u16 out_port,
3739                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
3740                 u32 total_pkts, u64 total_bytes, u32 thread_index)
3741 {
3742   snat_main_t *sm = &snat_main;
3743   clib_bihash_kv_8_8_t kv, value;
3744   snat_session_t *s;
3745   snat_main_per_thread_data_t *tsm;
3746
3747   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
3748
3749   init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
3750   if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
3751     return;
3752
3753   s = pool_elt_at_index (tsm->sessions, value.value);
3754   s->total_pkts = total_pkts;
3755   s->total_bytes = total_bytes;
3756 }
3757
3758 void
3759 nat_ha_sadd_ed_cb (ip4_address_t * in_addr, u16 in_port,
3760                    ip4_address_t * out_addr, u16 out_port,
3761                    ip4_address_t * eh_addr, u16 eh_port,
3762                    ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
3763                    u32 fib_index, u16 flags, u32 thread_index)
3764 {
3765   snat_main_t *sm = &snat_main;
3766   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
3767   snat_session_t *s;
3768   clib_bihash_kv_16_8_t kv;
3769   vlib_main_t *vm = vlib_get_main ();
3770   f64 now = vlib_time_now (vm);
3771   nat_outside_fib_t *outside_fib;
3772   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3773   fib_prefix_t pfx = {
3774     .fp_proto = FIB_PROTOCOL_IP4,
3775     .fp_len = 32,
3776     .fp_addr = {
3777                 .ip4.as_u32 = eh_addr->as_u32,
3778                 },
3779   };
3780
3781
3782   if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
3783     {
3784       if (nat_set_outside_address_and_port
3785           (sm->addresses, thread_index, *out_addr, out_port, proto))
3786         return;
3787     }
3788
3789   if (flags & SNAT_SESSION_FLAG_TWICE_NAT)
3790     {
3791       if (nat_set_outside_address_and_port
3792           (sm->addresses, thread_index, *ehn_addr, ehn_port, proto))
3793         return;
3794     }
3795
3796   s = nat_ed_session_alloc (sm, thread_index, now, proto);
3797   if (!s)
3798     return;
3799
3800   s->last_heard = now;
3801   s->flags = flags;
3802   s->ext_host_nat_addr.as_u32 = s->ext_host_addr.as_u32 = eh_addr->as_u32;
3803   s->ext_host_nat_port = s->ext_host_port = eh_port;
3804   if (is_twice_nat_session (s))
3805     {
3806       s->ext_host_nat_addr.as_u32 = ehn_addr->as_u32;
3807       s->ext_host_nat_port = ehn_port;
3808     }
3809   switch (vec_len (sm->outside_fibs))
3810     {
3811     case 0:
3812       s->out2in.fib_index = sm->outside_fib_index;
3813       break;
3814     case 1:
3815       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
3816       break;
3817     default:
3818       /* *INDENT-OFF* */
3819       vec_foreach (outside_fib, sm->outside_fibs)
3820         {
3821           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
3822           if (FIB_NODE_INDEX_INVALID != fei)
3823             {
3824               if (fib_entry_get_resolving_interface (fei) != ~0)
3825                 {
3826                   s->out2in.fib_index = outside_fib->fib_index;
3827                   break;
3828                 }
3829             }
3830         }
3831       /* *INDENT-ON* */
3832       break;
3833     }
3834   s->nat_proto = proto;
3835   s->out2in.addr.as_u32 = out_addr->as_u32;
3836   s->out2in.port = out_port;
3837
3838   s->in2out.addr.as_u32 = in_addr->as_u32;
3839   s->in2out.port = in_port;
3840   s->in2out.fib_index = fib_index;
3841
3842   init_ed_kv (&kv, *in_addr, in_port, s->ext_host_nat_addr,
3843               s->ext_host_nat_port, fib_index, nat_proto_to_ip_proto (proto),
3844               thread_index, s - tsm->sessions);
3845   if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
3846     nat_elog_warn ("in2out key add failed");
3847
3848   init_ed_kv (&kv, *out_addr, out_port, *eh_addr, eh_port,
3849               s->out2in.fib_index, nat_proto_to_ip_proto (proto),
3850               thread_index, s - tsm->sessions);
3851   if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &kv, 1))
3852     nat_elog_warn ("out2in key add failed");
3853 }
3854
3855 void
3856 nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
3857                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
3858                    u32 fib_index, u32 ti)
3859 {
3860   snat_main_t *sm = &snat_main;
3861   clib_bihash_kv_16_8_t kv, value;
3862   u32 thread_index;
3863   snat_session_t *s;
3864   snat_main_per_thread_data_t *tsm;
3865
3866   if (sm->num_workers > 1)
3867     thread_index =
3868       sm->first_worker_index +
3869       (sm->workers[(clib_net_to_host_u16 (out_port) -
3870                     1024) / sm->port_per_thread]);
3871   else
3872     thread_index = sm->num_workers;
3873   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
3874
3875   init_ed_k (&kv, *out_addr, out_port, *eh_addr, eh_port, fib_index, proto);
3876   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
3877     return;
3878
3879   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3880   nat_free_session_data (sm, s, thread_index, 1);
3881   nat44_delete_session (sm, s, thread_index);
3882 }
3883
3884 void
3885 nat_ha_sref_ed_cb (ip4_address_t * out_addr, u16 out_port,
3886                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
3887                    u32 fib_index, u32 total_pkts, u64 total_bytes,
3888                    u32 thread_index)
3889 {
3890   snat_main_t *sm = &snat_main;
3891   clib_bihash_kv_16_8_t kv, value;
3892   snat_session_t *s;
3893   snat_main_per_thread_data_t *tsm;
3894
3895   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
3896
3897   init_ed_k (&kv, *out_addr, out_port, *eh_addr, eh_port, fib_index, proto);
3898   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
3899     return;
3900
3901   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3902   s->total_pkts = total_pkts;
3903   s->total_bytes = total_bytes;
3904 }
3905
3906 static u32
3907 nat_calc_bihash_buckets (u32 n_elts)
3908 {
3909   return 1 << (max_log2 (n_elts >> 1) + 1);
3910 }
3911
3912 static u32
3913 nat_calc_bihash_memory (u32 n_buckets, uword kv_size)
3914 {
3915   return n_buckets * (8 + kv_size * 4);
3916 }
3917
3918 u32
3919 nat44_get_max_session_limit ()
3920 {
3921   snat_main_t *sm = &snat_main;
3922   u32 max_limit = 0, len = 0;
3923
3924   for (; len < vec_len (sm->max_translations_per_fib); len++)
3925     {
3926       if (max_limit < sm->max_translations_per_fib[len])
3927         max_limit = sm->max_translations_per_fib[len];
3928     }
3929   return max_limit;
3930 }
3931
3932 int
3933 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
3934 {
3935   snat_main_t *sm = &snat_main;
3936   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3937   u32 len = vec_len (sm->max_translations_per_fib);
3938
3939   if (len <= fib_index)
3940     {
3941       vec_validate (sm->max_translations_per_fib, fib_index + 1);
3942
3943       for (; len < vec_len (sm->max_translations_per_fib); len++)
3944         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
3945     }
3946
3947   sm->max_translations_per_fib[fib_index] = session_limit;
3948   return 0;
3949 }
3950
3951 int
3952 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
3953 {
3954   snat_main_t *sm = &snat_main;
3955
3956   if (nat44_set_session_limit (session_limit, vrf_id))
3957     return 1;
3958   sm->max_translations_per_thread = nat44_get_max_session_limit ();
3959
3960   sm->translation_buckets =
3961     nat_calc_bihash_buckets (sm->max_translations_per_thread);
3962
3963   if (!sm->translation_memory_size_set)
3964     {
3965       sm->translation_memory_size =
3966         nat_calc_bihash_memory (sm->translation_buckets,
3967                                 sizeof (clib_bihash_16_8_t));
3968     }
3969
3970   nat44_sessions_clear ();
3971   return 0;
3972 }
3973
3974 void
3975 nat44_db_init (snat_main_per_thread_data_t * tsm)
3976 {
3977   snat_main_t *sm = &snat_main;
3978
3979   pool_alloc (tsm->sessions, sm->max_translations_per_thread);
3980   pool_alloc (tsm->lru_pool, sm->max_translations_per_thread);
3981
3982   dlist_elt_t *head;
3983
3984   pool_get (tsm->lru_pool, head);
3985   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
3986   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
3987
3988   pool_get (tsm->lru_pool, head);
3989   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
3990   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
3991
3992   pool_get (tsm->lru_pool, head);
3993   tsm->udp_lru_head_index = head - tsm->lru_pool;
3994   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
3995
3996   pool_get (tsm->lru_pool, head);
3997   tsm->icmp_lru_head_index = head - tsm->lru_pool;
3998   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
3999
4000   pool_get (tsm->lru_pool, head);
4001   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
4002   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
4003
4004   if (sm->endpoint_dependent)
4005     {
4006       clib_bihash_init_16_8 (&tsm->in2out_ed, "in2out-ed",
4007                              sm->translation_buckets,
4008                              sm->translation_memory_size);
4009       clib_bihash_set_kvp_format_fn_16_8 (&tsm->in2out_ed,
4010                                           format_ed_session_kvp);
4011
4012     }
4013   else
4014     {
4015       clib_bihash_init_8_8 (&tsm->in2out, "in2out",
4016                             sm->translation_buckets,
4017                             sm->translation_memory_size);
4018       clib_bihash_set_kvp_format_fn_8_8 (&tsm->in2out, format_session_kvp);
4019       clib_bihash_init_8_8 (&tsm->out2in, "out2in",
4020                             sm->translation_buckets,
4021                             sm->translation_memory_size);
4022       clib_bihash_set_kvp_format_fn_8_8 (&tsm->out2in, format_session_kvp);
4023     }
4024
4025   // TODO: resolve static mappings (put only to !ED)
4026   pool_alloc (tsm->list_pool, sm->max_translations_per_thread);
4027   clib_bihash_init_8_8 (&tsm->user_hash, "users", sm->user_buckets,
4028                         sm->user_memory_size);
4029   clib_bihash_set_kvp_format_fn_8_8 (&tsm->user_hash, format_user_kvp);
4030 }
4031
4032 void
4033 nat44_db_free (snat_main_per_thread_data_t * tsm)
4034 {
4035   snat_main_t *sm = &snat_main;
4036
4037   pool_free (tsm->sessions);
4038   pool_free (tsm->lru_pool);
4039
4040   if (sm->endpoint_dependent)
4041     {
4042       clib_bihash_free_16_8 (&tsm->in2out_ed);
4043       vec_free (tsm->per_vrf_sessions_vec);
4044     }
4045   else
4046     {
4047       clib_bihash_free_8_8 (&tsm->in2out);
4048       clib_bihash_free_8_8 (&tsm->out2in);
4049     }
4050
4051   // TODO: resolve static mappings (put only to !ED)
4052   pool_free (tsm->users);
4053   pool_free (tsm->list_pool);
4054   clib_bihash_free_8_8 (&tsm->user_hash);
4055 }
4056
4057 void
4058 nat44_sessions_clear ()
4059 {
4060   snat_main_t *sm = &snat_main;
4061   snat_main_per_thread_data_t *tsm;
4062
4063   if (sm->endpoint_dependent)
4064     {
4065       clib_bihash_free_16_8 (&sm->out2in_ed);
4066       clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
4067                              clib_max (1, sm->num_workers) *
4068                              sm->translation_buckets,
4069                              clib_max (1, sm->num_workers) *
4070                              sm->translation_memory_size);
4071       clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed,
4072                                           format_ed_session_kvp);
4073     }
4074
4075   /* *INDENT-OFF* */
4076   vec_foreach (tsm, sm->per_thread_data)
4077     {
4078       u32 ti;
4079
4080       nat44_db_free (tsm);
4081       nat44_db_init (tsm);
4082
4083       ti = tsm->snat_thread_index;
4084       vlib_set_simple_counter (&sm->total_users, ti, 0, 0);
4085       vlib_set_simple_counter (&sm->total_sessions, ti, 0, 0);
4086     }
4087   /* *INDENT-ON* */
4088 }
4089
4090 static clib_error_t *
4091 snat_config (vlib_main_t * vm, unformat_input_t * input)
4092 {
4093   snat_main_t *sm = &snat_main;
4094   snat_main_per_thread_data_t *tsm;
4095
4096   u32 static_mapping_buckets = 1024;
4097   uword static_mapping_memory_size = 64 << 20;
4098
4099   u32 nat64_bib_buckets = 1024;
4100   u32 nat64_bib_memory_size = 128 << 20;
4101
4102   u32 nat64_st_buckets = 2048;
4103   uword nat64_st_memory_size = 256 << 20;
4104
4105   u32 max_users_per_thread = 0;
4106   u32 user_memory_size = 0;
4107   u32 max_translations_per_thread = 0;
4108   u32 translation_memory_size = 0;
4109
4110   u32 max_translations_per_user = ~0;
4111
4112   u32 outside_vrf_id = 0;
4113   u32 outside_ip6_vrf_id = 0;
4114   u32 inside_vrf_id = 0;
4115   u8 static_mapping_only = 0;
4116   u8 static_mapping_connection_tracking = 0;
4117
4118   u32 udp_timeout = SNAT_UDP_TIMEOUT;
4119   u32 icmp_timeout = SNAT_ICMP_TIMEOUT;
4120   u32 tcp_transitory_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
4121   u32 tcp_established_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
4122
4123   sm->out2in_dpo = 0;
4124   sm->endpoint_dependent = 0;
4125
4126   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
4127     {
4128       if (unformat
4129           (input, "max translations per thread %d",
4130            &max_translations_per_thread))
4131         ;
4132       else if (unformat (input, "udp timeout %d", &udp_timeout))
4133         ;
4134       else if (unformat (input, "icmp timeout %d", &icmp_timeout))
4135         ;
4136       else if (unformat (input, "tcp transitory timeout %d",
4137                          &tcp_transitory_timeout));
4138       else if (unformat (input, "tcp established timeout %d",
4139                          &tcp_established_timeout));
4140       else if (unformat (input, "translation hash memory %d",
4141                          &translation_memory_size));
4142       else
4143         if (unformat
4144             (input, "max users per thread %d", &max_users_per_thread))
4145         ;
4146       else if (unformat (input, "user hash memory %d", &user_memory_size))
4147         ;
4148       else if (unformat (input, "max translations per user %d",
4149                          &max_translations_per_user))
4150         ;
4151       else if (unformat (input, "outside VRF id %d", &outside_vrf_id))
4152         ;
4153       else if (unformat (input, "outside ip6 VRF id %d", &outside_ip6_vrf_id))
4154         ;
4155       else if (unformat (input, "inside VRF id %d", &inside_vrf_id))
4156         ;
4157       else if (unformat (input, "static mapping only"))
4158         {
4159           static_mapping_only = 1;
4160           if (unformat (input, "connection tracking"))
4161             static_mapping_connection_tracking = 1;
4162         }
4163       else if (unformat (input, "nat64 bib hash buckets %d",
4164                          &nat64_bib_buckets))
4165         ;
4166       else if (unformat (input, "nat64 bib hash memory %d",
4167                          &nat64_bib_memory_size))
4168         ;
4169       else
4170         if (unformat (input, "nat64 st hash buckets %d", &nat64_st_buckets))
4171         ;
4172       else if (unformat (input, "nat64 st hash memory %d",
4173                          &nat64_st_memory_size))
4174         ;
4175       else if (unformat (input, "out2in dpo"))
4176         sm->out2in_dpo = 1;
4177       else if (unformat (input, "endpoint-dependent"))
4178         sm->endpoint_dependent = 1;
4179       else
4180         return clib_error_return (0, "unknown input '%U'",
4181                                   format_unformat_error, input);
4182     }
4183
4184   if (static_mapping_only && (sm->endpoint_dependent))
4185     return clib_error_return (0,
4186                               "static mapping only mode available only for simple nat");
4187
4188   if (sm->out2in_dpo && (sm->endpoint_dependent))
4189     return clib_error_return (0,
4190                               "out2in dpo mode available only for simple nat");
4191   if (sm->endpoint_dependent && max_users_per_thread > 0)
4192     {
4193       return clib_error_return (0,
4194                                 "setting 'max users' in endpoint-dependent mode is not supported");
4195     }
4196
4197   if (sm->endpoint_dependent && max_translations_per_user != ~0)
4198     {
4199       return clib_error_return (0,
4200                                 "setting 'max translations per user' in endpoint-dependent mode is not supported");
4201     }
4202
4203   /* optionally configurable timeouts for testing purposes */
4204   sm->udp_timeout = udp_timeout;
4205   sm->tcp_transitory_timeout = tcp_transitory_timeout;
4206   sm->tcp_established_timeout = tcp_established_timeout;
4207   sm->icmp_timeout = icmp_timeout;
4208
4209   if (0 == max_users_per_thread)
4210     {
4211       max_users_per_thread = 1024;
4212     }
4213   sm->max_users_per_thread = max_users_per_thread;
4214   sm->user_buckets = nat_calc_bihash_buckets (sm->max_users_per_thread);
4215
4216   if (0 == max_translations_per_thread)
4217     {
4218       // default value based on legacy setting of load factor 10 * default
4219       // translation buckets 1024
4220       max_translations_per_thread = 10 * 1024;
4221     }
4222   sm->translation_memory_size_set = translation_memory_size != 0;
4223
4224   sm->max_translations_per_thread = max_translations_per_thread;
4225   sm->translation_buckets =
4226     nat_calc_bihash_buckets (sm->max_translations_per_thread);
4227   if (0 == translation_memory_size)
4228     {
4229       translation_memory_size =
4230         nat_calc_bihash_memory (sm->translation_buckets,
4231                                 sizeof (clib_bihash_16_8_t));
4232     }
4233   sm->translation_memory_size = translation_memory_size;
4234   if (0 == user_memory_size)
4235     {
4236       user_memory_size =
4237         nat_calc_bihash_memory (sm->max_users_per_thread,
4238                                 sizeof (clib_bihash_8_8_t));
4239     }
4240   sm->user_memory_size = user_memory_size;
4241   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
4242
4243   sm->max_translations_per_user = max_translations_per_user == ~0 ?
4244     sm->max_translations_per_thread : max_translations_per_user;
4245
4246   sm->outside_vrf_id = outside_vrf_id;
4247   sm->outside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
4248                                                              outside_vrf_id,
4249                                                              nat_fib_src_hi);
4250   sm->inside_vrf_id = inside_vrf_id;
4251   sm->inside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
4252                                                             inside_vrf_id,
4253                                                             nat_fib_src_hi);
4254   sm->static_mapping_only = static_mapping_only;
4255   sm->static_mapping_connection_tracking = static_mapping_connection_tracking;
4256
4257   nat64_set_hash (nat64_bib_buckets, nat64_bib_memory_size, nat64_st_buckets,
4258                   nat64_st_memory_size);
4259
4260   if (sm->endpoint_dependent)
4261     {
4262       sm->worker_in2out_cb = nat44_ed_get_worker_in2out_cb;
4263       sm->worker_out2in_cb = nat44_ed_get_worker_out2in_cb;
4264
4265       sm->in2out_node_index = nat44_ed_in2out_node.index;
4266       sm->in2out_output_node_index = nat44_ed_in2out_output_node.index;
4267       sm->out2in_node_index = nat44_ed_out2in_node.index;
4268
4269       sm->icmp_match_in2out_cb = icmp_match_in2out_ed;
4270       sm->icmp_match_out2in_cb = icmp_match_out2in_ed;
4271       nat_affinity_init (vm);
4272       nat_ha_init (vm, nat_ha_sadd_ed_cb, nat_ha_sdel_ed_cb,
4273                    nat_ha_sref_ed_cb);
4274       clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
4275                              clib_max (1, sm->num_workers) *
4276                              sm->translation_buckets,
4277                              clib_max (1, sm->num_workers) *
4278                              sm->translation_memory_size);
4279       clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed,
4280                                           format_ed_session_kvp);
4281     }
4282   else
4283     {
4284       sm->worker_in2out_cb = snat_get_worker_in2out_cb;
4285       sm->worker_out2in_cb = snat_get_worker_out2in_cb;
4286
4287       sm->in2out_node_index = snat_in2out_node.index;
4288       sm->in2out_output_node_index = snat_in2out_output_node.index;
4289       sm->out2in_node_index = snat_out2in_node.index;
4290
4291       sm->icmp_match_in2out_cb = icmp_match_in2out_slow;
4292       sm->icmp_match_out2in_cb = icmp_match_out2in_slow;
4293       nat_ha_init (vm, nat_ha_sadd_cb, nat_ha_sdel_cb, nat_ha_sref_cb);
4294     }
4295   if (!static_mapping_only ||
4296       (static_mapping_only && static_mapping_connection_tracking))
4297     {
4298           /* *INDENT-OFF* */
4299           vec_foreach (tsm, sm->per_thread_data)
4300             {
4301               nat44_db_init (tsm);
4302             }
4303           /* *INDENT-ON* */
4304     }
4305   else
4306     {
4307       sm->icmp_match_in2out_cb = icmp_match_in2out_fast;
4308       sm->icmp_match_out2in_cb = icmp_match_out2in_fast;
4309     }
4310   clib_bihash_init_8_8 (&sm->static_mapping_by_local,
4311                         "static_mapping_by_local", static_mapping_buckets,
4312                         static_mapping_memory_size);
4313   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
4314                                      format_static_mapping_kvp);
4315
4316   clib_bihash_init_8_8 (&sm->static_mapping_by_external,
4317                         "static_mapping_by_external",
4318                         static_mapping_buckets, static_mapping_memory_size);
4319   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
4320                                      format_static_mapping_kvp);
4321
4322   return 0;
4323 }
4324
4325 VLIB_CONFIG_FUNCTION (snat_config, "nat");
4326
4327 static void
4328 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
4329                                  uword opaque,
4330                                  u32 sw_if_index,
4331                                  ip4_address_t * address,
4332                                  u32 address_length,
4333                                  u32 if_address_index, u32 is_delete)
4334 {
4335   snat_main_t *sm = &snat_main;
4336   snat_static_map_resolve_t *rp;
4337   snat_static_mapping_t *m;
4338   clib_bihash_kv_8_8_t kv, value;
4339   int i, rv;
4340   ip4_address_t l_addr;
4341
4342   for (i = 0; i < vec_len (sm->to_resolve); i++)
4343     {
4344       rp = sm->to_resolve + i;
4345       if (rp->addr_only == 0)
4346         continue;
4347       if (rp->sw_if_index == sw_if_index)
4348         goto match;
4349     }
4350
4351   return;
4352
4353 match:
4354   init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
4355               sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
4356   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
4357     m = 0;
4358   else
4359     m = pool_elt_at_index (sm->static_mappings, value.value);
4360
4361   if (!is_delete)
4362     {
4363       /* Don't trip over lease renewal, static config */
4364       if (m)
4365         return;
4366     }
4367   else
4368     {
4369       if (!m)
4370         return;
4371     }
4372
4373   /* Indetity mapping? */
4374   if (rp->l_addr.as_u32 == 0)
4375     l_addr.as_u32 = address[0].as_u32;
4376   else
4377     l_addr.as_u32 = rp->l_addr.as_u32;
4378   /* Add the static mapping */
4379   rv = snat_add_static_mapping (l_addr,
4380                                 address[0],
4381                                 rp->l_port,
4382                                 rp->e_port,
4383                                 rp->vrf_id,
4384                                 rp->addr_only, ~0 /* sw_if_index */ ,
4385                                 rp->proto, !is_delete, rp->twice_nat,
4386                                 rp->out2in_only, rp->tag, rp->identity_nat,
4387                                 rp->pool_addr, rp->exact);
4388   if (rv)
4389     nat_elog_notice_X1 ("snat_add_static_mapping returned %d", "i4", rv);
4390 }
4391
4392 static void
4393 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
4394                                        uword opaque,
4395                                        u32 sw_if_index,
4396                                        ip4_address_t * address,
4397                                        u32 address_length,
4398                                        u32 if_address_index, u32 is_delete)
4399 {
4400   snat_main_t *sm = &snat_main;
4401   snat_static_map_resolve_t *rp;
4402   ip4_address_t l_addr;
4403   int i, j;
4404   int rv;
4405   u8 twice_nat = 0;
4406   snat_address_t *addresses = sm->addresses;
4407
4408   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
4409     {
4410       if (sw_if_index == sm->auto_add_sw_if_indices[i])
4411         goto match;
4412     }
4413
4414   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
4415     {
4416       twice_nat = 1;
4417       addresses = sm->twice_nat_addresses;
4418       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
4419         goto match;
4420     }
4421
4422   return;
4423
4424 match:
4425   if (!is_delete)
4426     {
4427       /* Don't trip over lease renewal, static config */
4428       for (j = 0; j < vec_len (addresses); j++)
4429         if (addresses[j].addr.as_u32 == address->as_u32)
4430           return;
4431
4432       (void) snat_add_address (sm, address, ~0, twice_nat);
4433       /* Scan static map resolution vector */
4434       for (j = 0; j < vec_len (sm->to_resolve); j++)
4435         {
4436           rp = sm->to_resolve + j;
4437           if (rp->addr_only)
4438             continue;
4439           /* On this interface? */
4440           if (rp->sw_if_index == sw_if_index)
4441             {
4442               /* Indetity mapping? */
4443               if (rp->l_addr.as_u32 == 0)
4444                 l_addr.as_u32 = address[0].as_u32;
4445               else
4446                 l_addr.as_u32 = rp->l_addr.as_u32;
4447               /* Add the static mapping */
4448               rv = snat_add_static_mapping (l_addr,
4449                                             address[0],
4450                                             rp->l_port,
4451                                             rp->e_port,
4452                                             rp->vrf_id,
4453                                             rp->addr_only,
4454                                             ~0 /* sw_if_index */ ,
4455                                             rp->proto,
4456                                             rp->is_add, rp->twice_nat,
4457                                             rp->out2in_only, rp->tag,
4458                                             rp->identity_nat,
4459                                             rp->pool_addr, rp->exact);
4460               if (rv)
4461                 nat_elog_notice_X1 ("snat_add_static_mapping returned %d",
4462                                     "i4", rv);
4463             }
4464         }
4465       return;
4466     }
4467   else
4468     {
4469       (void) snat_del_address (sm, address[0], 1, twice_nat);
4470       return;
4471     }
4472 }
4473
4474
4475 int
4476 snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
4477                             u8 twice_nat)
4478 {
4479   ip4_main_t *ip4_main = sm->ip4_main;
4480   ip4_address_t *first_int_addr;
4481   snat_static_map_resolve_t *rp;
4482   u32 *indices_to_delete = 0;
4483   int i, j;
4484   u32 *auto_add_sw_if_indices =
4485     twice_nat ? sm->
4486     auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
4487
4488   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0        /* just want the address */
4489     );
4490
4491   for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
4492     {
4493       if (auto_add_sw_if_indices[i] == sw_if_index)
4494         {
4495           if (is_del)
4496             {
4497               /* if have address remove it */
4498               if (first_int_addr)
4499                 (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
4500               else
4501                 {
4502                   for (j = 0; j < vec_len (sm->to_resolve); j++)
4503                     {
4504                       rp = sm->to_resolve + j;
4505                       if (rp->sw_if_index == sw_if_index)
4506                         vec_add1 (indices_to_delete, j);
4507                     }
4508                   if (vec_len (indices_to_delete))
4509                     {
4510                       for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
4511                         vec_del1 (sm->to_resolve, j);
4512                       vec_free (indices_to_delete);
4513                     }
4514                 }
4515               if (twice_nat)
4516                 vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
4517               else
4518                 vec_del1 (sm->auto_add_sw_if_indices, i);
4519             }
4520           else
4521             return VNET_API_ERROR_VALUE_EXIST;
4522
4523           return 0;
4524         }
4525     }
4526
4527   if (is_del)
4528     return VNET_API_ERROR_NO_SUCH_ENTRY;
4529
4530   /* add to the auto-address list */
4531   if (twice_nat)
4532     vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
4533   else
4534     vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
4535
4536   /* If the address is already bound - or static - add it now */
4537   if (first_int_addr)
4538     (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
4539
4540   return 0;
4541 }
4542
4543 int
4544 nat44_del_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
4545                    nat_protocol_t proto, u32 vrf_id, int is_in)
4546 {
4547   snat_main_per_thread_data_t *tsm;
4548   clib_bihash_kv_8_8_t kv, value;
4549   ip4_header_t ip;
4550   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4551   snat_session_t *s;
4552   clib_bihash_8_8_t *t;
4553
4554   if (sm->endpoint_dependent)
4555     return VNET_API_ERROR_UNSUPPORTED;
4556
4557   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
4558   if (sm->num_workers > 1)
4559     tsm =
4560       vec_elt_at_index (sm->per_thread_data,
4561                         sm->worker_in2out_cb (&ip, fib_index, 0));
4562   else
4563     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
4564
4565   init_nat_k (&kv, *addr, port, fib_index, proto);
4566   t = is_in ? &tsm->in2out : &tsm->out2in;
4567   if (!clib_bihash_search_8_8 (t, &kv, &value))
4568     {
4569       if (pool_is_free_index (tsm->sessions, value.value))
4570         return VNET_API_ERROR_UNSPECIFIED;
4571
4572       s = pool_elt_at_index (tsm->sessions, value.value);
4573       nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
4574       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
4575       return 0;
4576     }
4577
4578   return VNET_API_ERROR_NO_SUCH_ENTRY;
4579 }
4580
4581 int
4582 nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
4583                       ip4_address_t * eh_addr, u16 eh_port, u8 proto,
4584                       u32 vrf_id, int is_in)
4585 {
4586   ip4_header_t ip;
4587   clib_bihash_16_8_t *t;
4588   clib_bihash_kv_16_8_t kv, value;
4589   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4590   snat_session_t *s;
4591   snat_main_per_thread_data_t *tsm;
4592
4593   if (!sm->endpoint_dependent)
4594     return VNET_API_ERROR_FEATURE_DISABLED;
4595
4596   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
4597   if (sm->num_workers > 1)
4598     tsm =
4599       vec_elt_at_index (sm->per_thread_data,
4600                         sm->worker_in2out_cb (&ip, fib_index, 0));
4601   else
4602     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
4603
4604   t = is_in ? &tsm->in2out_ed : &sm->out2in_ed;
4605   init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
4606   if (clib_bihash_search_16_8 (t, &kv, &value))
4607     {
4608       return VNET_API_ERROR_NO_SUCH_ENTRY;
4609     }
4610
4611   if (pool_is_free_index (tsm->sessions, value.value))
4612     return VNET_API_ERROR_UNSPECIFIED;
4613   s = pool_elt_at_index (tsm->sessions, value.value);
4614   nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
4615   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
4616   return 0;
4617 }
4618
4619 void
4620 nat_set_alloc_addr_and_port_mape (u16 psid, u16 psid_offset, u16 psid_length)
4621 {
4622   snat_main_t *sm = &snat_main;
4623
4624   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_MAPE;
4625   sm->alloc_addr_and_port = nat_alloc_addr_and_port_mape;
4626   sm->psid = psid;
4627   sm->psid_offset = psid_offset;
4628   sm->psid_length = psid_length;
4629 }
4630
4631 void
4632 nat_set_alloc_addr_and_port_range (u16 start_port, u16 end_port)
4633 {
4634   snat_main_t *sm = &snat_main;
4635
4636   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_RANGE;
4637   sm->alloc_addr_and_port = nat_alloc_addr_and_port_range;
4638   sm->start_port = start_port;
4639   sm->end_port = end_port;
4640 }
4641
4642 void
4643 nat_set_alloc_addr_and_port_default (void)
4644 {
4645   snat_main_t *sm = &snat_main;
4646
4647   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
4648   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
4649 }
4650
4651 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
4652                                  vlib_node_runtime_t * node,
4653                                  vlib_frame_t * frame)
4654 {
4655   return 0;
4656 }
4657
4658 /* *INDENT-OFF* */
4659 VLIB_REGISTER_NODE (nat_default_node) = {
4660   .name = "nat-default",
4661   .vector_size = sizeof (u32),
4662   .format_trace = 0,
4663   .type = VLIB_NODE_TYPE_INTERNAL,
4664   .n_errors = 0,
4665   .n_next_nodes = NAT_N_NEXT,
4666   .next_nodes = {
4667     [NAT_NEXT_DROP] = "error-drop",
4668     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4669     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
4670     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
4671     [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output",
4672     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
4673     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
4674     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
4675     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
4676     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
4677   },
4678 };
4679 /* *INDENT-ON* */
4680
4681 /*
4682  * fd.io coding-style-patch-verification: ON
4683  *
4684  * Local Variables:
4685  * eval: (c-set-style "gnu")
4686  * End:
4687  */