fib: IPv6 lookup data structure MP safe when prefixes change
[vpp.git] / src / plugins / nat / nat.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/vnet.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/ip/ip4.h>
21 #include <vnet/plugin/plugin.h>
22 #include <nat/nat.h>
23 #include <nat/nat_dpo.h>
24 #include <nat/nat_ipfix_logging.h>
25 #include <nat/nat64.h>
26 #include <nat/nat_inlines.h>
27 #include <nat/nat44/inlines.h>
28 #include <nat/nat_affinity.h>
29 #include <nat/nat_syslog.h>
30 #include <nat/nat_ha.h>
31 #include <vnet/fib/fib_table.h>
32 #include <vnet/fib/ip4_fib.h>
33 #include <vnet/ip/reass/ip4_sv_reass.h>
34 #include <vppinfra/bihash_16_8.h>
35 #include <nat/nat44/ed_inlines.h>
36
37 #include <vpp/app/version.h>
38
39 snat_main_t snat_main;
40
41 fib_source_t nat_fib_src_hi;
42 fib_source_t nat_fib_src_low;
43
44 /* *INDENT-OFF* */
45 /* Hook up input features */
46 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
47   .arc_name = "ip4-unicast",
48   .node_name = "nat-pre-in2out",
49   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
50                                "ip4-sv-reassembly-feature"),
51 };
52 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
53   .arc_name = "ip4-unicast",
54   .node_name = "nat-pre-out2in",
55   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
56                                "ip4-dhcp-client-detect",
57                                "ip4-sv-reassembly-feature"),
58 };
59 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
60   .arc_name = "ip4-unicast",
61   .node_name = "nat44-in2out-worker-handoff",
62   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
63 };
64 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
65   .arc_name = "ip4-unicast",
66   .node_name = "nat44-out2in-worker-handoff",
67   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
68                                "ip4-dhcp-client-detect"),
69 };
70 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
71   .arc_name = "ip4-unicast",
72   .node_name = "nat44-in2out",
73   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
74 };
75 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
76   .arc_name = "ip4-unicast",
77   .node_name = "nat44-out2in",
78   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
79                                "ip4-dhcp-client-detect"),
80 };
81 VNET_FEATURE_INIT (ip4_nat_classify, static) = {
82   .arc_name = "ip4-unicast",
83   .node_name = "nat44-classify",
84   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
85 };
86 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
87   .arc_name = "ip4-unicast",
88   .node_name = "nat44-ed-in2out",
89   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
90 };
91 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
92   .arc_name = "ip4-unicast",
93   .node_name = "nat44-ed-out2in",
94   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
95                                "ip4-dhcp-client-detect"),
96 };
97 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
98   .arc_name = "ip4-unicast",
99   .node_name = "nat44-ed-classify",
100   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
101 };
102 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
103   .arc_name = "ip4-unicast",
104   .node_name = "nat44-handoff-classify",
105   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
106 };
107 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
108   .arc_name = "ip4-unicast",
109   .node_name = "nat44-in2out-fast",
110   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
111 };
112 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
113   .arc_name = "ip4-unicast",
114   .node_name = "nat44-out2in-fast",
115   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
116                                "ip4-dhcp-client-detect"),
117 };
118 VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = {
119   .arc_name = "ip4-unicast",
120   .node_name = "nat44-hairpin-dst",
121   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
122 };
123 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_dst, static) = {
124   .arc_name = "ip4-unicast",
125   .node_name = "nat44-ed-hairpin-dst",
126   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
127 };
128
129 /* Hook up output features */
130 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
131   .arc_name = "ip4-output",
132   .node_name = "nat44-in2out-output",
133   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
134 };
135 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
136   .arc_name = "ip4-output",
137   .node_name = "nat44-in2out-output-worker-handoff",
138   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
139 };
140 VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = {
141   .arc_name = "ip4-output",
142   .node_name = "nat44-hairpin-src",
143   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
144 };
145 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
146   .arc_name = "ip4-output",
147   .node_name = "nat44-ed-in2out-output",
148   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
149   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
150 };
151 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_src, static) = {
152   .arc_name = "ip4-output",
153   .node_name = "nat44-ed-hairpin-src",
154   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
155   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
156 };
157
158 /* Hook up ip4-local features */
159 VNET_FEATURE_INIT (ip4_nat_hairpinning, static) =
160 {
161   .arc_name = "ip4-local",
162   .node_name = "nat44-hairpinning",
163   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
164 };
165 VNET_FEATURE_INIT (ip4_nat44_ed_hairpinning, static) =
166 {
167   .arc_name = "ip4-local",
168   .node_name = "nat44-ed-hairpinning",
169   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
170 };
171
172
173 VLIB_PLUGIN_REGISTER () = {
174     .version = VPP_BUILD_VER,
175     .description = "Network Address Translation (NAT)",
176 };
177 /* *INDENT-ON* */
178
179 void
180 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
181                        u8 is_ha)
182 {
183   clib_bihash_kv_8_8_t kv;
184   u8 proto;
185   u16 r_port, l_port;
186   ip4_address_t *l_addr, *r_addr;
187   u32 fib_index = 0;
188   clib_bihash_kv_16_8_t ed_kv;
189   snat_main_per_thread_data_t *tsm =
190     vec_elt_at_index (sm->per_thread_data, thread_index);
191
192   if (is_ed_session (s))
193     {
194       per_vrf_sessions_unregister_session (s, thread_index);
195     }
196
197   if (is_fwd_bypass_session (s))
198     {
199       if (snat_is_unk_proto_session (s))
200         {
201           init_ed_k (&ed_kv, s->in2out.addr, 0, s->ext_host_addr, 0, 0,
202                      s->in2out.port);
203         }
204       else
205         {
206           l_port = s->in2out.port;
207           r_port = s->ext_host_port;
208           l_addr = &s->in2out.addr;
209           r_addr = &s->ext_host_addr;
210           proto = nat_proto_to_ip_proto (s->nat_proto);
211           init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index,
212                      proto);
213         }
214       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
215         nat_elog_warn ("in2out_ed key del failed");
216       return;
217     }
218
219   /* session lookup tables */
220   if (is_ed_session (s))
221     {
222       if (is_affinity_sessions (s))
223         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
224                              s->nat_proto, s->out2in.port);
225       l_addr = &s->out2in.addr;
226       r_addr = &s->ext_host_addr;
227       fib_index = s->out2in.fib_index;
228       if (snat_is_unk_proto_session (s))
229         {
230           proto = s->in2out.port;
231           r_port = 0;
232           l_port = 0;
233         }
234       else
235         {
236           proto = nat_proto_to_ip_proto (s->nat_proto);
237           l_port = s->out2in.port;
238           r_port = s->ext_host_port;
239         }
240       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
241       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0))
242         nat_elog_warn ("out2in_ed key del failed");
243       l_addr = &s->in2out.addr;
244       fib_index = s->in2out.fib_index;
245       if (!snat_is_unk_proto_session (s))
246         l_port = s->in2out.port;
247       if (is_twice_nat_session (s))
248         {
249           r_addr = &s->ext_host_nat_addr;
250           r_port = s->ext_host_nat_port;
251         }
252       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
253       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
254         nat_elog_warn ("in2out_ed key del failed");
255
256       if (!is_ha)
257         nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
258                                &s->in2out.addr, s->in2out.port,
259                                &s->ext_host_nat_addr, s->ext_host_nat_port,
260                                &s->out2in.addr, s->out2in.port,
261                                &s->ext_host_addr, s->ext_host_port,
262                                s->nat_proto, is_twice_nat_session (s));
263     }
264   else
265     {
266       init_nat_i2o_k (&kv, s);
267       if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0))
268         nat_elog_warn ("in2out key del failed");
269       init_nat_o2i_k (&kv, s);
270       if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0))
271         nat_elog_warn ("out2in key del failed");
272
273       if (!is_ha)
274         nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index,
275                                  &s->in2out.addr, s->in2out.port,
276                                  &s->out2in.addr, s->out2in.port,
277                                  s->nat_proto);
278     }
279
280   if (snat_is_unk_proto_session (s))
281     return;
282
283   if (!is_ha)
284     {
285       /* log NAT event */
286       snat_ipfix_logging_nat44_ses_delete (thread_index,
287                                            s->in2out.addr.as_u32,
288                                            s->out2in.addr.as_u32,
289                                            s->nat_proto,
290                                            s->in2out.port,
291                                            s->out2in.port,
292                                            s->in2out.fib_index);
293
294       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
295                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
296                    thread_index);
297     }
298
299   /* Twice NAT address and port for external host */
300   if (is_twice_nat_session (s))
301     {
302       snat_free_outside_address_and_port (sm->twice_nat_addresses,
303                                           thread_index,
304                                           &s->ext_host_nat_addr,
305                                           s->ext_host_nat_port, s->nat_proto);
306     }
307
308   if (snat_is_session_static (s))
309     return;
310
311   snat_free_outside_address_and_port (sm->addresses, thread_index,
312                                       &s->out2in.addr, s->out2in.port,
313                                       s->nat_proto);
314 }
315
316 void
317 nat44_free_session_data (snat_main_t * sm, snat_session_t * s,
318                          u32 thread_index, u8 is_ha)
319 {
320   u8 proto;
321   u16 r_port, l_port;
322   ip4_address_t *l_addr, *r_addr;
323   u32 fib_index;
324   clib_bihash_kv_16_8_t ed_kv;
325   snat_main_per_thread_data_t *tsm =
326     vec_elt_at_index (sm->per_thread_data, thread_index);
327
328   if (is_fwd_bypass_session (s))
329     {
330       if (snat_is_unk_proto_session (s))
331         {
332           proto = s->in2out.port;
333           r_port = 0;
334           l_port = 0;
335         }
336       else
337         {
338           proto = nat_proto_to_ip_proto (s->nat_proto);
339           l_port = s->in2out.port;
340           r_port = s->ext_host_port;
341         }
342
343       l_addr = &s->in2out.addr;
344       r_addr = &s->ext_host_addr;
345       fib_index = 0;
346       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
347
348       if (PREDICT_FALSE
349           (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)))
350         nat_elog_warn ("in2out_ed key del failed");
351       return;
352     }
353
354   /* session lookup tables */
355   if (is_affinity_sessions (s))
356     nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
357                          s->nat_proto, s->out2in.port);
358   l_addr = &s->out2in.addr;
359   r_addr = &s->ext_host_addr;
360   fib_index = s->out2in.fib_index;
361   if (snat_is_unk_proto_session (s))
362     {
363       proto = s->in2out.port;
364       r_port = 0;
365       l_port = 0;
366     }
367   else
368     {
369       proto = nat_proto_to_ip_proto (s->nat_proto);
370       l_port = s->out2in.port;
371       r_port = s->ext_host_port;
372     }
373   init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
374
375   if (PREDICT_FALSE (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0)))
376     nat_elog_warn ("out2in_ed key del failed");
377
378   l_addr = &s->in2out.addr;
379   fib_index = s->in2out.fib_index;
380
381   if (!snat_is_unk_proto_session (s))
382     l_port = s->in2out.port;
383
384   if (is_twice_nat_session (s))
385     {
386       r_addr = &s->ext_host_nat_addr;
387       r_port = s->ext_host_nat_port;
388     }
389   init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
390
391   if (PREDICT_FALSE (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)))
392     nat_elog_warn ("in2out_ed key del failed");
393
394   if (!is_ha)
395     {
396       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
397                              &s->in2out.addr, s->in2out.port,
398                              &s->ext_host_nat_addr, s->ext_host_nat_port,
399                              &s->out2in.addr, s->out2in.port,
400                              &s->ext_host_addr, s->ext_host_port,
401                              s->nat_proto, is_twice_nat_session (s));
402     }
403
404   if (snat_is_unk_proto_session (s))
405     return;
406
407   if (!is_ha)
408     {
409       snat_ipfix_logging_nat44_ses_delete (thread_index,
410                                            s->in2out.addr.as_u32,
411                                            s->out2in.addr.as_u32,
412                                            s->nat_proto,
413                                            s->in2out.port,
414                                            s->out2in.port,
415                                            s->in2out.fib_index);
416       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
417                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
418                    thread_index);
419     }
420
421   /* Twice NAT address and port for external host */
422   if (is_twice_nat_session (s))
423     {
424       snat_free_outside_address_and_port (sm->twice_nat_addresses,
425                                           thread_index,
426                                           &s->ext_host_nat_addr,
427                                           s->ext_host_nat_port, s->nat_proto);
428     }
429
430   if (snat_is_session_static (s))
431     return;
432
433   snat_free_outside_address_and_port (sm->addresses, thread_index,
434                                       &s->out2in.addr, s->out2in.port,
435                                       s->nat_proto);
436 }
437
438
439 snat_user_t *
440 nat_user_get_or_create (snat_main_t * sm, ip4_address_t * addr, u32 fib_index,
441                         u32 thread_index)
442 {
443   snat_user_t *u = 0;
444   snat_user_key_t user_key;
445   clib_bihash_kv_8_8_t kv, value;
446   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
447   dlist_elt_t *per_user_list_head_elt;
448
449   user_key.addr.as_u32 = addr->as_u32;
450   user_key.fib_index = fib_index;
451   kv.key = user_key.as_u64;
452
453   /* Ever heard of the "user" = src ip4 address before? */
454   if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
455     {
456       if (pool_elts (tsm->users) >= sm->max_users_per_thread)
457         {
458           vlib_increment_simple_counter (&sm->user_limit_reached,
459                                          thread_index, 0, 1);
460           nat_elog_warn ("maximum user limit reached");
461           return NULL;
462         }
463       /* no, make a new one */
464       pool_get (tsm->users, u);
465       clib_memset (u, 0, sizeof (*u));
466
467       u->addr.as_u32 = addr->as_u32;
468       u->fib_index = fib_index;
469
470       pool_get (tsm->list_pool, per_user_list_head_elt);
471
472       u->sessions_per_user_list_head_index = per_user_list_head_elt -
473         tsm->list_pool;
474
475       clib_dlist_init (tsm->list_pool, u->sessions_per_user_list_head_index);
476
477       kv.value = u - tsm->users;
478
479       /* add user */
480       if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
481         {
482           nat_elog_warn ("user_hash key add failed");
483           nat44_delete_user_with_no_session (sm, u, thread_index);
484           return NULL;
485         }
486
487       vlib_set_simple_counter (&sm->total_users, thread_index, 0,
488                                pool_elts (tsm->users));
489     }
490   else
491     {
492       u = pool_elt_at_index (tsm->users, value.value);
493     }
494
495   return u;
496 }
497
498 snat_session_t *
499 nat_session_alloc_or_recycle (snat_main_t * sm, snat_user_t * u,
500                               u32 thread_index, f64 now)
501 {
502   snat_session_t *s;
503   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
504   u32 oldest_per_user_translation_list_index, session_index;
505   dlist_elt_t *oldest_per_user_translation_list_elt;
506   dlist_elt_t *per_user_translation_list_elt;
507
508   /* Over quota? Recycle the least recently used translation */
509   if ((u->nsessions + u->nstaticsessions) >= sm->max_translations_per_user)
510     {
511       oldest_per_user_translation_list_index =
512         clib_dlist_remove_head (tsm->list_pool,
513                                 u->sessions_per_user_list_head_index);
514
515       ASSERT (oldest_per_user_translation_list_index != ~0);
516
517       /* Add it back to the end of the LRU list */
518       clib_dlist_addtail (tsm->list_pool,
519                           u->sessions_per_user_list_head_index,
520                           oldest_per_user_translation_list_index);
521       /* Get the list element */
522       oldest_per_user_translation_list_elt =
523         pool_elt_at_index (tsm->list_pool,
524                            oldest_per_user_translation_list_index);
525
526       /* Get the session index from the list element */
527       session_index = oldest_per_user_translation_list_elt->value;
528
529       /* Get the session */
530       s = pool_elt_at_index (tsm->sessions, session_index);
531       nat_free_session_data (sm, s, thread_index, 0);
532       if (snat_is_session_static (s))
533         u->nstaticsessions--;
534       else
535         u->nsessions--;
536       s->flags = 0;
537       s->total_bytes = 0;
538       s->total_pkts = 0;
539       s->state = 0;
540       s->ext_host_addr.as_u32 = 0;
541       s->ext_host_port = 0;
542       s->ext_host_nat_addr.as_u32 = 0;
543       s->ext_host_nat_port = 0;
544     }
545   else
546     {
547       pool_get (tsm->sessions, s);
548       clib_memset (s, 0, sizeof (*s));
549
550       /* Create list elts */
551       pool_get (tsm->list_pool, per_user_translation_list_elt);
552       clib_dlist_init (tsm->list_pool,
553                        per_user_translation_list_elt - tsm->list_pool);
554
555       per_user_translation_list_elt->value = s - tsm->sessions;
556       s->per_user_index = per_user_translation_list_elt - tsm->list_pool;
557       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
558
559       clib_dlist_addtail (tsm->list_pool,
560                           s->per_user_list_head_index,
561                           per_user_translation_list_elt - tsm->list_pool);
562
563       s->user_index = u - tsm->users;
564       vlib_set_simple_counter (&sm->total_sessions, thread_index, 0,
565                                pool_elts (tsm->sessions));
566     }
567
568   s->ha_last_refreshed = now;
569
570   return s;
571 }
572
573 void
574 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
575                           int is_add)
576 {
577   fib_prefix_t prefix = {
578     .fp_len = p_len,
579     .fp_proto = FIB_PROTOCOL_IP4,
580     .fp_addr = {
581                 .ip4.as_u32 = addr->as_u32,
582                 },
583   };
584   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
585
586   if (is_add)
587     fib_table_entry_update_one_path (fib_index,
588                                      &prefix,
589                                      nat_fib_src_low,
590                                      (FIB_ENTRY_FLAG_CONNECTED |
591                                       FIB_ENTRY_FLAG_LOCAL |
592                                       FIB_ENTRY_FLAG_EXCLUSIVE),
593                                      DPO_PROTO_IP4,
594                                      NULL,
595                                      sw_if_index,
596                                      ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
597   else
598     fib_table_entry_delete (fib_index, &prefix, nat_fib_src_low);
599 }
600
601 int
602 snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
603                   u8 twice_nat)
604 {
605   snat_address_t *ap;
606   snat_interface_t *i;
607   vlib_thread_main_t *tm = vlib_get_thread_main ();
608
609   if (twice_nat && !sm->endpoint_dependent)
610     return VNET_API_ERROR_FEATURE_DISABLED;
611
612   /* Check if address already exists */
613   /* *INDENT-OFF* */
614   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
615     {
616       if (ap->addr.as_u32 == addr->as_u32)
617         return VNET_API_ERROR_VALUE_EXIST;
618     }
619   /* *INDENT-ON* */
620
621   if (twice_nat)
622     vec_add2 (sm->twice_nat_addresses, ap, 1);
623   else
624     vec_add2 (sm->addresses, ap, 1);
625
626   ap->addr = *addr;
627   if (vrf_id != ~0)
628     ap->fib_index =
629       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
630                                          nat_fib_src_low);
631   else
632     ap->fib_index = ~0;
633 #define _(N, i, n, s) \
634   clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
635   ap->busy_##n##_ports = 0; \
636   ap->busy_##n##_ports_per_thread = 0;\
637   vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
638   foreach_nat_protocol
639 #undef _
640     if (twice_nat)
641     return 0;
642
643   /* Add external address to FIB */
644   /* *INDENT-OFF* */
645   pool_foreach (i, sm->interfaces,
646   ({
647     if (nat_interface_is_inside(i) || sm->out2in_dpo)
648       continue;
649
650     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
651     break;
652   }));
653   pool_foreach (i, sm->output_feature_interfaces,
654   ({
655     if (nat_interface_is_inside(i) || sm->out2in_dpo)
656       continue;
657
658     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
659     break;
660   }));
661   /* *INDENT-ON* */
662
663   return 0;
664 }
665
666 static int
667 is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
668 {
669   snat_static_mapping_t *m;
670   /* *INDENT-OFF* */
671   pool_foreach (m, sm->static_mappings,
672   ({
673       if (is_addr_only_static_mapping (m) ||
674           is_out2in_only_static_mapping (m) ||
675           is_identity_static_mapping (m))
676         continue;
677       if (m->external_addr.as_u32 == addr.as_u32)
678         return 1;
679   }));
680   /* *INDENT-ON* */
681
682   return 0;
683 }
684
685 static void
686 snat_add_static_mapping_when_resolved (snat_main_t * sm,
687                                        ip4_address_t l_addr,
688                                        u16 l_port,
689                                        u32 sw_if_index,
690                                        u16 e_port,
691                                        u32 vrf_id,
692                                        nat_protocol_t proto,
693                                        int addr_only, int is_add, u8 * tag,
694                                        int twice_nat, int out2in_only,
695                                        int identity_nat)
696 {
697   snat_static_map_resolve_t *rp;
698
699   vec_add2 (sm->to_resolve, rp, 1);
700   rp->l_addr.as_u32 = l_addr.as_u32;
701   rp->l_port = l_port;
702   rp->sw_if_index = sw_if_index;
703   rp->e_port = e_port;
704   rp->vrf_id = vrf_id;
705   rp->proto = proto;
706   rp->addr_only = addr_only;
707   rp->is_add = is_add;
708   rp->twice_nat = twice_nat;
709   rp->out2in_only = out2in_only;
710   rp->identity_nat = identity_nat;
711   rp->tag = vec_dup (tag);
712 }
713
714 static u32
715 get_thread_idx_by_port (u16 e_port)
716 {
717   snat_main_t *sm = &snat_main;
718   u32 thread_idx = sm->num_workers;
719   if (sm->num_workers > 1)
720     {
721       thread_idx =
722         sm->first_worker_index +
723         sm->workers[(e_port - 1024) / sm->port_per_thread];
724     }
725   return thread_idx;
726 }
727
728 void
729 snat_static_mapping_del_sessions (snat_main_t * sm,
730                                   snat_main_per_thread_data_t * tsm,
731                                   snat_user_key_t u_key, int addr_only,
732                                   ip4_address_t e_addr, u16 e_port)
733 {
734   clib_bihash_kv_8_8_t kv, value;
735   kv.key = u_key.as_u64;
736   u64 user_index;
737   dlist_elt_t *head, *elt;
738   snat_user_t *u;
739   snat_session_t *s;
740   u32 elt_index, head_index, ses_index;
741   if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
742     {
743       user_index = value.value;
744       u = pool_elt_at_index (tsm->users, user_index);
745       if (u->nstaticsessions)
746         {
747           head_index = u->sessions_per_user_list_head_index;
748           head = pool_elt_at_index (tsm->list_pool, head_index);
749           elt_index = head->next;
750           elt = pool_elt_at_index (tsm->list_pool, elt_index);
751           ses_index = elt->value;
752           while (ses_index != ~0)
753             {
754               s = pool_elt_at_index (tsm->sessions, ses_index);
755               elt = pool_elt_at_index (tsm->list_pool, elt->next);
756               ses_index = elt->value;
757
758               if (!addr_only)
759                 {
760                   if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
761                       (s->out2in.port != e_port))
762                     continue;
763                 }
764
765               if (is_lb_session (s))
766                 continue;
767
768               if (!snat_is_session_static (s))
769                 continue;
770
771               nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
772               nat44_delete_session (sm, s, tsm - sm->per_thread_data);
773
774               if (!addr_only)
775                 break;
776             }
777         }
778     }
779 }
780
781 void
782 snat_ed_static_mapping_del_sessions (snat_main_t * sm,
783                                      snat_main_per_thread_data_t * tsm,
784                                      ip4_address_t l_addr,
785                                      u16 l_port,
786                                      u8 protocol,
787                                      u32 fib_index, int addr_only,
788                                      ip4_address_t e_addr, u16 e_port)
789 {
790   snat_session_t *s;
791   u32 *indexes_to_free = NULL;
792   /* *INDENT-OFF* */
793   pool_foreach (s, tsm->sessions, {
794     if (s->in2out.fib_index != fib_index ||
795         s->in2out.addr.as_u32 != l_addr.as_u32)
796       {
797         continue;
798       }
799     if (!addr_only)
800       {
801         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
802             s->out2in.port != e_port ||
803             s->in2out.port != l_port ||
804             s->nat_proto != protocol)
805           continue;
806       }
807
808     if (is_lb_session (s))
809       continue;
810     if (!snat_is_session_static (s))
811       continue;
812     nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
813     vec_add1 (indexes_to_free, s - tsm->sessions);
814     if (!addr_only)
815       break;
816   });
817   /* *INDENT-ON* */
818   u32 *ses_index;
819   vec_foreach (ses_index, indexes_to_free)
820   {
821     s = pool_elt_at_index (tsm->sessions, *ses_index);
822     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
823   }
824   vec_free (indexes_to_free);
825 }
826
827 int
828 snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
829                          u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
830                          u32 sw_if_index, nat_protocol_t proto, int is_add,
831                          twice_nat_type_t twice_nat, u8 out2in_only, u8 * tag,
832                          u8 identity_nat)
833 {
834   snat_main_t *sm = &snat_main;
835   snat_static_mapping_t *m;
836   clib_bihash_kv_8_8_t kv, value;
837   snat_address_t *a = 0;
838   u32 fib_index = ~0;
839   snat_interface_t *interface;
840   int i;
841   snat_main_per_thread_data_t *tsm;
842   snat_user_key_t u_key;
843   snat_user_t *u;
844   dlist_elt_t *head, *elt;
845   u32 elt_index, head_index;
846   u32 ses_index;
847   u64 user_index;
848   snat_session_t *s;
849   snat_static_map_resolve_t *rp, *rp_match = 0;
850   nat44_lb_addr_port_t *local;
851   u32 find = ~0;
852
853   if (!sm->endpoint_dependent)
854     {
855       if (twice_nat || out2in_only)
856         return VNET_API_ERROR_FEATURE_DISABLED;
857     }
858
859   /* If the external address is a specific interface address */
860   if (sw_if_index != ~0)
861     {
862       ip4_address_t *first_int_addr;
863
864       for (i = 0; i < vec_len (sm->to_resolve); i++)
865         {
866           rp = sm->to_resolve + i;
867           if (rp->sw_if_index != sw_if_index ||
868               rp->l_addr.as_u32 != l_addr.as_u32 ||
869               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
870             continue;
871
872           if (!addr_only)
873             {
874               if ((rp->l_port != l_port && rp->e_port != e_port)
875                   || rp->proto != proto)
876                 continue;
877             }
878
879           rp_match = rp;
880           break;
881         }
882
883       /* Might be already set... */
884       first_int_addr = ip4_interface_first_address
885         (sm->ip4_main, sw_if_index, 0 /* just want the address */ );
886
887       if (is_add)
888         {
889           if (rp_match)
890             return VNET_API_ERROR_VALUE_EXIST;
891
892           snat_add_static_mapping_when_resolved
893             (sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto,
894              addr_only, is_add, tag, twice_nat, out2in_only, identity_nat);
895
896           /* DHCP resolution required? */
897           if (first_int_addr == 0)
898             {
899               return 0;
900             }
901           else
902             {
903               e_addr.as_u32 = first_int_addr->as_u32;
904               /* Identity mapping? */
905               if (l_addr.as_u32 == 0)
906                 l_addr.as_u32 = e_addr.as_u32;
907             }
908         }
909       else
910         {
911           if (!rp_match)
912             return VNET_API_ERROR_NO_SUCH_ENTRY;
913
914           vec_del1 (sm->to_resolve, i);
915
916           if (first_int_addr)
917             {
918               e_addr.as_u32 = first_int_addr->as_u32;
919               /* Identity mapping? */
920               if (l_addr.as_u32 == 0)
921                 l_addr.as_u32 = e_addr.as_u32;
922             }
923           else
924             return 0;
925         }
926     }
927
928   init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
929   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
930     m = 0;
931   else
932     m = pool_elt_at_index (sm->static_mappings, value.value);
933
934   if (is_add)
935     {
936       if (m)
937         {
938           if (is_identity_static_mapping (m))
939             {
940               /* *INDENT-OFF* */
941               pool_foreach (local, m->locals,
942               ({
943                 if (local->vrf_id == vrf_id)
944                   return VNET_API_ERROR_VALUE_EXIST;
945               }));
946               /* *INDENT-ON* */
947               pool_get (m->locals, local);
948               local->vrf_id = vrf_id;
949               local->fib_index =
950                 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
951                                                    nat_fib_src_low);
952               init_nat_kv (&kv, m->local_addr, m->local_port,
953                            local->fib_index, m->proto,
954                            m - sm->static_mappings);
955               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
956               return 0;
957             }
958           else
959             return VNET_API_ERROR_VALUE_EXIST;
960         }
961
962       if (twice_nat && addr_only)
963         return VNET_API_ERROR_UNSUPPORTED;
964
965       /* Convert VRF id to FIB index */
966       if (vrf_id != ~0)
967         fib_index =
968           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
969                                              nat_fib_src_low);
970       /* If not specified use inside VRF id from SNAT plugin startup config */
971       else
972         {
973           fib_index = sm->inside_fib_index;
974           vrf_id = sm->inside_vrf_id;
975           fib_table_lock (fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
976         }
977
978       if (!(out2in_only || identity_nat))
979         {
980           init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
981                       addr_only ? 0 : proto);
982           if (!clib_bihash_search_8_8
983               (&sm->static_mapping_by_local, &kv, &value))
984             return VNET_API_ERROR_VALUE_EXIST;
985         }
986
987       /* Find external address in allocated addresses and reserve port for
988          address and port pair mapping when dynamic translations enabled */
989       if (!(addr_only || sm->static_mapping_only || out2in_only))
990         {
991           for (i = 0; i < vec_len (sm->addresses); i++)
992             {
993               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
994                 {
995                   a = sm->addresses + i;
996                   /* External port must be unused */
997                   switch (proto)
998                     {
999 #define _(N, j, n, s) \
1000                     case NAT_PROTOCOL_##N: \
1001                       if (a->busy_##n##_port_refcounts[e_port]) \
1002                         return VNET_API_ERROR_INVALID_VALUE; \
1003                       ++a->busy_##n##_port_refcounts[e_port]; \
1004                       if (e_port > 1024) \
1005                         { \
1006                           a->busy_##n##_ports++; \
1007                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1008                         } \
1009                       break;
1010                       foreach_nat_protocol
1011 #undef _
1012                     default:
1013                       nat_elog_info ("unknown protocol");
1014                       return VNET_API_ERROR_INVALID_VALUE_2;
1015                     }
1016                   break;
1017                 }
1018             }
1019           /* External address must be allocated */
1020           if (!a && (l_addr.as_u32 != e_addr.as_u32))
1021             {
1022               if (sw_if_index != ~0)
1023                 {
1024                   for (i = 0; i < vec_len (sm->to_resolve); i++)
1025                     {
1026                       rp = sm->to_resolve + i;
1027                       if (rp->addr_only)
1028                         continue;
1029                       if (rp->sw_if_index != sw_if_index &&
1030                           rp->l_addr.as_u32 != l_addr.as_u32 &&
1031                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
1032                           rp->e_port != e_port && rp->proto != proto)
1033                         continue;
1034
1035                       vec_del1 (sm->to_resolve, i);
1036                       break;
1037                     }
1038                 }
1039               return VNET_API_ERROR_NO_SUCH_ENTRY;
1040             }
1041         }
1042
1043       pool_get (sm->static_mappings, m);
1044       clib_memset (m, 0, sizeof (*m));
1045       m->tag = vec_dup (tag);
1046       m->local_addr = l_addr;
1047       m->external_addr = e_addr;
1048       m->twice_nat = twice_nat;
1049       if (out2in_only)
1050         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
1051       if (addr_only)
1052         m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY;
1053       if (identity_nat)
1054         {
1055           m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT;
1056           pool_get (m->locals, local);
1057           local->vrf_id = vrf_id;
1058           local->fib_index = fib_index;
1059         }
1060       else
1061         {
1062           m->vrf_id = vrf_id;
1063           m->fib_index = fib_index;
1064         }
1065       if (!addr_only)
1066         {
1067           m->local_port = l_port;
1068           m->external_port = e_port;
1069           m->proto = proto;
1070         }
1071
1072       if (sm->num_workers > 1)
1073         {
1074           ip4_header_t ip = {
1075             .src_address = m->local_addr,
1076           };
1077           vec_add1 (m->workers, sm->worker_in2out_cb (&ip, m->fib_index, 0));
1078           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1079         }
1080       else
1081         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1082
1083       init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto,
1084                    m - sm->static_mappings);
1085       if (!out2in_only)
1086         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1087
1088       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto,
1089                    m - sm->static_mappings);
1090       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
1091
1092       /* Delete dynamic sessions matching local address (+ local port) */
1093       if (!(sm->static_mapping_only))
1094         {
1095           u_key.addr = m->local_addr;
1096           u_key.fib_index = m->fib_index;
1097           kv.key = u_key.as_u64;
1098           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1099             {
1100               user_index = value.value;
1101               u = pool_elt_at_index (tsm->users, user_index);
1102               if (u->nsessions)
1103                 {
1104                   head_index = u->sessions_per_user_list_head_index;
1105                   head = pool_elt_at_index (tsm->list_pool, head_index);
1106                   elt_index = head->next;
1107                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1108                   ses_index = elt->value;
1109                   while (ses_index != ~0)
1110                     {
1111                       s = pool_elt_at_index (tsm->sessions, ses_index);
1112                       elt = pool_elt_at_index (tsm->list_pool, elt->next);
1113                       ses_index = elt->value;
1114
1115                       if (snat_is_session_static (s))
1116                         continue;
1117
1118                       if (!addr_only && s->in2out.port != m->local_port)
1119                         continue;
1120
1121                       nat_free_session_data (sm, s,
1122                                              tsm - sm->per_thread_data, 0);
1123                       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
1124
1125                       if (!addr_only && !sm->endpoint_dependent)
1126                         break;
1127                     }
1128                 }
1129             }
1130         }
1131     }
1132   else
1133     {
1134       if (!m)
1135         {
1136           if (sw_if_index != ~0)
1137             return 0;
1138           else
1139             return VNET_API_ERROR_NO_SUCH_ENTRY;
1140         }
1141
1142       if (identity_nat)
1143         {
1144           if (vrf_id == ~0)
1145             vrf_id = sm->inside_vrf_id;
1146
1147           /* *INDENT-OFF* */
1148           pool_foreach (local, m->locals,
1149           ({
1150             if (local->vrf_id == vrf_id)
1151               find = local - m->locals;
1152           }));
1153           /* *INDENT-ON* */
1154           if (find == ~0)
1155             return VNET_API_ERROR_NO_SUCH_ENTRY;
1156
1157           local = pool_elt_at_index (m->locals, find);
1158           fib_index = local->fib_index;
1159           pool_put (m->locals, local);
1160         }
1161       else
1162         fib_index = m->fib_index;
1163
1164       /* Free external address port */
1165       if (!(addr_only || sm->static_mapping_only || out2in_only))
1166         {
1167           for (i = 0; i < vec_len (sm->addresses); i++)
1168             {
1169               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1170                 {
1171                   a = sm->addresses + i;
1172                   switch (proto)
1173                     {
1174 #define _(N, j, n, s) \
1175                     case NAT_PROTOCOL_##N: \
1176                       --a->busy_##n##_port_refcounts[e_port]; \
1177                       if (e_port > 1024) \
1178                         { \
1179                           a->busy_##n##_ports--; \
1180                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1181                         } \
1182                       break;
1183                       foreach_nat_protocol
1184 #undef _
1185                     default:
1186                       nat_elog_info ("unknown protocol");
1187                       return VNET_API_ERROR_INVALID_VALUE_2;
1188                     }
1189                   break;
1190                 }
1191             }
1192         }
1193
1194       if (sm->num_workers > 1)
1195         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1196       else
1197         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1198
1199       init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
1200       if (!out2in_only)
1201         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
1202
1203       /* Delete session(s) for static mapping if exist */
1204       if (!(sm->static_mapping_only) ||
1205           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
1206         {
1207           if (sm->endpoint_dependent)
1208             {
1209               snat_ed_static_mapping_del_sessions (sm, tsm, m->local_addr,
1210                                                    m->local_port, m->proto,
1211                                                    fib_index, addr_only,
1212                                                    e_addr, e_port);
1213             }
1214           else
1215             {
1216               u_key.addr = m->local_addr;
1217               u_key.fib_index = fib_index;
1218               kv.key = u_key.as_u64;
1219               snat_static_mapping_del_sessions (sm, tsm, u_key, addr_only,
1220                                                 e_addr, e_port);
1221             }
1222         }
1223
1224       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1225       if (pool_elts (m->locals))
1226         return 0;
1227
1228       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1229       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
1230
1231       vec_free (m->tag);
1232       vec_free (m->workers);
1233       /* Delete static mapping from pool */
1234       pool_put (sm->static_mappings, m);
1235     }
1236
1237   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
1238     return 0;
1239
1240   /* Add/delete external address to FIB */
1241   /* *INDENT-OFF* */
1242   pool_foreach (interface, sm->interfaces,
1243   ({
1244     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1245       continue;
1246
1247     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1248     break;
1249   }));
1250   pool_foreach (interface, sm->output_feature_interfaces,
1251   ({
1252     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1253       continue;
1254
1255     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1256     break;
1257   }));
1258   /* *INDENT-ON* */
1259
1260   return 0;
1261 }
1262
1263 int
1264 nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
1265                                  nat_protocol_t proto,
1266                                  nat44_lb_addr_port_t * locals, u8 is_add,
1267                                  twice_nat_type_t twice_nat, u8 out2in_only,
1268                                  u8 * tag, u32 affinity)
1269 {
1270   snat_main_t *sm = &snat_main;
1271   snat_static_mapping_t *m;
1272   clib_bihash_kv_8_8_t kv, value;
1273   snat_address_t *a = 0;
1274   int i;
1275   nat44_lb_addr_port_t *local;
1276   snat_main_per_thread_data_t *tsm;
1277   snat_session_t *s;
1278   uword *bitmap = 0;
1279
1280   if (!sm->endpoint_dependent)
1281     return VNET_API_ERROR_FEATURE_DISABLED;
1282
1283   init_nat_k (&kv, e_addr, e_port, 0, proto);
1284   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1285     m = 0;
1286   else
1287     m = pool_elt_at_index (sm->static_mappings, value.value);
1288
1289   if (is_add)
1290     {
1291       if (m)
1292         return VNET_API_ERROR_VALUE_EXIST;
1293
1294       if (vec_len (locals) < 2)
1295         return VNET_API_ERROR_INVALID_VALUE;
1296
1297       /* Find external address in allocated addresses and reserve port for
1298          address and port pair mapping when dynamic translations enabled */
1299       if (!(sm->static_mapping_only || out2in_only))
1300         {
1301           for (i = 0; i < vec_len (sm->addresses); i++)
1302             {
1303               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1304                 {
1305                   a = sm->addresses + i;
1306                   /* External port must be unused */
1307                   switch (proto)
1308                     {
1309 #define _(N, j, n, s) \
1310                     case NAT_PROTOCOL_##N: \
1311                       if (a->busy_##n##_port_refcounts[e_port]) \
1312                         return VNET_API_ERROR_INVALID_VALUE; \
1313                       ++a->busy_##n##_port_refcounts[e_port]; \
1314                       if (e_port > 1024) \
1315                         { \
1316                           a->busy_##n##_ports++; \
1317                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1318                         } \
1319                       break;
1320                       foreach_nat_protocol
1321 #undef _
1322                     default:
1323                       nat_elog_info ("unknown protocol");
1324                       return VNET_API_ERROR_INVALID_VALUE_2;
1325                     }
1326                   break;
1327                 }
1328             }
1329           /* External address must be allocated */
1330           if (!a)
1331             return VNET_API_ERROR_NO_SUCH_ENTRY;
1332         }
1333
1334       pool_get (sm->static_mappings, m);
1335       clib_memset (m, 0, sizeof (*m));
1336       m->tag = vec_dup (tag);
1337       m->external_addr = e_addr;
1338       m->external_port = e_port;
1339       m->proto = proto;
1340       m->twice_nat = twice_nat;
1341       m->flags |= NAT_STATIC_MAPPING_FLAG_LB;
1342       if (out2in_only)
1343         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
1344       m->affinity = affinity;
1345
1346       if (affinity)
1347         m->affinity_per_service_list_head_index =
1348           nat_affinity_get_per_service_list_head_index ();
1349       else
1350         m->affinity_per_service_list_head_index = ~0;
1351
1352       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto,
1353                    m - sm->static_mappings);
1354       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
1355         {
1356           nat_elog_err ("static_mapping_by_external key add failed");
1357           return VNET_API_ERROR_UNSPECIFIED;
1358         }
1359
1360       for (i = 0; i < vec_len (locals); i++)
1361         {
1362           locals[i].fib_index =
1363             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
1364                                                locals[i].vrf_id,
1365                                                nat_fib_src_low);
1366           if (!out2in_only)
1367             {
1368               init_nat_kv (&kv, locals[i].addr, locals[i].port,
1369                            locals[i].fib_index, m->proto,
1370                            m - sm->static_mappings);
1371               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1372             }
1373           locals[i].prefix = (i == 0) ? locals[i].probability :
1374             (locals[i - 1].prefix + locals[i].probability);
1375           pool_get (m->locals, local);
1376           *local = locals[i];
1377           if (sm->num_workers > 1)
1378             {
1379               ip4_header_t ip = {
1380                 .src_address = locals[i].addr,
1381               };
1382               bitmap =
1383                 clib_bitmap_set (bitmap,
1384                                  sm->worker_in2out_cb (&ip, m->fib_index, 0),
1385                                  1);
1386             }
1387         }
1388
1389       /* Assign workers */
1390       if (sm->num_workers > 1)
1391         {
1392           /* *INDENT-OFF* */
1393           clib_bitmap_foreach (i, bitmap,
1394             ({
1395                vec_add1(m->workers, i);
1396             }));
1397           /* *INDENT-ON* */
1398         }
1399     }
1400   else
1401     {
1402       if (!m)
1403         return VNET_API_ERROR_NO_SUCH_ENTRY;
1404
1405       if (!is_lb_static_mapping (m))
1406         return VNET_API_ERROR_INVALID_VALUE;
1407
1408       /* Free external address port */
1409       if (!(sm->static_mapping_only || out2in_only))
1410         {
1411           for (i = 0; i < vec_len (sm->addresses); i++)
1412             {
1413               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1414                 {
1415                   a = sm->addresses + i;
1416                   switch (proto)
1417                     {
1418 #define _(N, j, n, s) \
1419                     case NAT_PROTOCOL_##N: \
1420                       --a->busy_##n##_port_refcounts[e_port]; \
1421                       if (e_port > 1024) \
1422                         { \
1423                           a->busy_##n##_ports--; \
1424                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1425                         } \
1426                       break;
1427                       foreach_nat_protocol
1428 #undef _
1429                     default:
1430                       nat_elog_info ("unknown protocol");
1431                       return VNET_API_ERROR_INVALID_VALUE_2;
1432                     }
1433                   break;
1434                 }
1435             }
1436         }
1437
1438       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1439       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
1440         {
1441           nat_elog_err ("static_mapping_by_external key del failed");
1442           return VNET_API_ERROR_UNSPECIFIED;
1443         }
1444
1445       /* *INDENT-OFF* */
1446       pool_foreach (local, m->locals,
1447       ({
1448           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1449                             nat_fib_src_low);
1450           if (!out2in_only)
1451             {
1452 init_nat_k(&              kv, local->addr, local->port, local->fib_index, m->proto);
1453               if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0))
1454                 {
1455                   nat_elog_err ("static_mapping_by_local key del failed");
1456                   return VNET_API_ERROR_UNSPECIFIED;
1457                 }
1458             }
1459
1460           if (sm->num_workers > 1)
1461             {
1462               ip4_header_t ip = {
1463                 .src_address = local->addr,
1464               };
1465               tsm = vec_elt_at_index (sm->per_thread_data,
1466                                       sm->worker_in2out_cb (&ip, m->fib_index, 0));
1467             }
1468           else
1469             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1470
1471           /* Delete sessions */
1472           pool_foreach (s, tsm->sessions, {
1473             if (!(is_lb_session (s)))
1474               continue;
1475
1476             if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1477                 s->in2out.port != local->port)
1478               continue;
1479
1480             nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1481             nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1482           });
1483       }));
1484       /* *INDENT-ON* */
1485       if (m->affinity)
1486         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1487       pool_free (m->locals);
1488       vec_free (m->tag);
1489       vec_free (m->workers);
1490
1491       pool_put (sm->static_mappings, m);
1492     }
1493
1494   return 0;
1495 }
1496
1497 int
1498 nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
1499                                        ip4_address_t l_addr, u16 l_port,
1500                                        nat_protocol_t proto, u32 vrf_id,
1501                                        u8 probability, u8 is_add)
1502 {
1503   snat_main_t *sm = &snat_main;
1504   snat_static_mapping_t *m = 0;
1505   clib_bihash_kv_8_8_t kv, value;
1506   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1507   snat_main_per_thread_data_t *tsm;
1508   snat_session_t *s;
1509   u32 *locals = 0;
1510   uword *bitmap = 0;
1511   int i;
1512
1513   if (!sm->endpoint_dependent)
1514     return VNET_API_ERROR_FEATURE_DISABLED;
1515
1516   init_nat_k (&kv, e_addr, e_port, 0, proto);
1517   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1518     m = pool_elt_at_index (sm->static_mappings, value.value);
1519
1520   if (!m)
1521     return VNET_API_ERROR_NO_SUCH_ENTRY;
1522
1523   if (!is_lb_static_mapping (m))
1524     return VNET_API_ERROR_INVALID_VALUE;
1525
1526   /* *INDENT-OFF* */
1527   pool_foreach (local, m->locals,
1528   ({
1529     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1530         (local->vrf_id == vrf_id))
1531       {
1532         match_local = local;
1533         break;
1534       }
1535   }));
1536   /* *INDENT-ON* */
1537
1538   if (is_add)
1539     {
1540       if (match_local)
1541         return VNET_API_ERROR_VALUE_EXIST;
1542
1543       pool_get (m->locals, local);
1544       clib_memset (local, 0, sizeof (*local));
1545       local->addr.as_u32 = l_addr.as_u32;
1546       local->port = l_port;
1547       local->probability = probability;
1548       local->vrf_id = vrf_id;
1549       local->fib_index =
1550         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1551                                            nat_fib_src_low);
1552
1553       if (!is_out2in_only_static_mapping (m))
1554         {
1555           init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto,
1556                        m - sm->static_mappings);
1557           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
1558             nat_elog_err ("static_mapping_by_local key add failed");
1559         }
1560     }
1561   else
1562     {
1563       if (!match_local)
1564         return VNET_API_ERROR_NO_SUCH_ENTRY;
1565
1566       if (pool_elts (m->locals) < 3)
1567         return VNET_API_ERROR_UNSPECIFIED;
1568
1569       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1570                         nat_fib_src_low);
1571
1572       if (!is_out2in_only_static_mapping (m))
1573         {
1574           init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
1575           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
1576             nat_elog_err ("static_mapping_by_local key del failed");
1577         }
1578
1579       if (sm->num_workers > 1)
1580         {
1581           ip4_header_t ip = {
1582             .src_address = local->addr,
1583           };
1584           tsm = vec_elt_at_index (sm->per_thread_data,
1585                                   sm->worker_in2out_cb (&ip, m->fib_index,
1586                                                         0));
1587         }
1588       else
1589         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1590
1591       /* Delete sessions */
1592       /* *INDENT-OFF* */
1593       pool_foreach (s, tsm->sessions, {
1594         if (!(is_lb_session (s)))
1595           continue;
1596
1597         if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1598             s->in2out.port != match_local->port)
1599           continue;
1600
1601         nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1602         nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1603       });
1604       /* *INDENT-ON* */
1605
1606       pool_put (m->locals, match_local);
1607     }
1608
1609   vec_free (m->workers);
1610
1611   /* *INDENT-OFF* */
1612   pool_foreach (local, m->locals,
1613   ({
1614     vec_add1 (locals, local - m->locals);
1615     if (sm->num_workers > 1)
1616       {
1617         ip4_header_t ip;
1618         ip.src_address.as_u32 = local->addr.as_u32,
1619         bitmap = clib_bitmap_set (bitmap,
1620                                   sm->worker_in2out_cb (&ip, local->fib_index, 0),
1621                                   1);
1622       }
1623   }));
1624   /* *INDENT-ON* */
1625
1626   ASSERT (vec_len (locals) > 1);
1627
1628   local = pool_elt_at_index (m->locals, locals[0]);
1629   local->prefix = local->probability;
1630   for (i = 1; i < vec_len (locals); i++)
1631     {
1632       local = pool_elt_at_index (m->locals, locals[i]);
1633       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1634       local->prefix = local->probability + prev_local->prefix;
1635     }
1636
1637   /* Assign workers */
1638   if (sm->num_workers > 1)
1639     {
1640       /* *INDENT-OFF* */
1641       clib_bitmap_foreach (i, bitmap, ({ vec_add1(m->workers, i); }));
1642       /* *INDENT-ON* */
1643     }
1644
1645   return 0;
1646 }
1647
1648 int
1649 snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
1650                   u8 twice_nat)
1651 {
1652   snat_address_t *a = 0;
1653   snat_session_t *ses;
1654   u32 *ses_to_be_removed = 0, *ses_index;
1655   snat_main_per_thread_data_t *tsm;
1656   snat_static_mapping_t *m;
1657   snat_interface_t *interface;
1658   int i;
1659   snat_address_t *addresses =
1660     twice_nat ? sm->twice_nat_addresses : sm->addresses;
1661
1662   /* Find SNAT address */
1663   for (i = 0; i < vec_len (addresses); i++)
1664     {
1665       if (addresses[i].addr.as_u32 == addr.as_u32)
1666         {
1667           a = addresses + i;
1668           break;
1669         }
1670     }
1671   if (!a)
1672     return VNET_API_ERROR_NO_SUCH_ENTRY;
1673
1674   if (delete_sm)
1675     {
1676       /* *INDENT-OFF* */
1677       pool_foreach (m, sm->static_mappings,
1678       ({
1679           if (m->external_addr.as_u32 == addr.as_u32)
1680             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1681                                             m->local_port, m->external_port,
1682                                             m->vrf_id, is_addr_only_static_mapping(m), ~0,
1683                                             m->proto, 0, m->twice_nat,
1684                                             is_out2in_only_static_mapping(m), m->tag, is_identity_static_mapping(m));
1685       }));
1686       /* *INDENT-ON* */
1687     }
1688   else
1689     {
1690       /* Check if address is used in some static mapping */
1691       if (is_snat_address_used_in_static_mapping (sm, addr))
1692         {
1693           nat_elog_notice ("address used in static mapping");
1694           return VNET_API_ERROR_UNSPECIFIED;
1695         }
1696     }
1697
1698   if (a->fib_index != ~0)
1699     fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1700
1701   /* Delete sessions using address */
1702   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1703     {
1704       /* *INDENT-OFF* */
1705       vec_foreach (tsm, sm->per_thread_data)
1706         {
1707           pool_foreach (ses, tsm->sessions, ({
1708             if (ses->out2in.addr.as_u32 == addr.as_u32)
1709               {
1710                 nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
1711                 vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1712               }
1713           }));
1714
1715           if (sm->endpoint_dependent){
1716               vec_foreach (ses_index, ses_to_be_removed)
1717                 {
1718                   ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1719                   nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
1720                 }
1721           }else{
1722               vec_foreach (ses_index, ses_to_be_removed)
1723                 {
1724                   ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1725                   nat44_delete_session (sm, ses, tsm - sm->per_thread_data);
1726                 }
1727           }
1728
1729           vec_free (ses_to_be_removed);
1730         }
1731       /* *INDENT-ON* */
1732     }
1733
1734 #define _(N, i, n, s) \
1735   vec_free (a->busy_##n##_ports_per_thread);
1736   foreach_nat_protocol
1737 #undef _
1738     if (twice_nat)
1739     {
1740       vec_del1 (sm->twice_nat_addresses, i);
1741       return 0;
1742     }
1743   else
1744     vec_del1 (sm->addresses, i);
1745
1746   /* Delete external address from FIB */
1747   /* *INDENT-OFF* */
1748   pool_foreach (interface, sm->interfaces,
1749   ({
1750     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1751       continue;
1752
1753     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1754     break;
1755   }));
1756   pool_foreach (interface, sm->output_feature_interfaces,
1757   ({
1758     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1759       continue;
1760
1761     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1762     break;
1763   }));
1764   /* *INDENT-ON* */
1765
1766   return 0;
1767 }
1768
1769 static void
1770 nat_validate_counters (snat_main_t * sm, u32 sw_if_index)
1771 {
1772 #define _(x)                                                                  \
1773   vlib_validate_simple_counter (&sm->counters.fastpath.in2out.x,              \
1774                                 sw_if_index);                                 \
1775   vlib_zero_simple_counter (&sm->counters.fastpath.in2out.x, sw_if_index);    \
1776   vlib_validate_simple_counter (&sm->counters.fastpath.out2in.x,              \
1777                                 sw_if_index);                                 \
1778   vlib_zero_simple_counter (&sm->counters.fastpath.out2in.x, sw_if_index);    \
1779   vlib_validate_simple_counter (&sm->counters.slowpath.in2out.x,              \
1780                                 sw_if_index);                                 \
1781   vlib_zero_simple_counter (&sm->counters.slowpath.in2out.x, sw_if_index);    \
1782   vlib_validate_simple_counter (&sm->counters.slowpath.out2in.x,              \
1783                                 sw_if_index);                                 \
1784   vlib_zero_simple_counter (&sm->counters.slowpath.out2in.x, sw_if_index);    \
1785   vlib_validate_simple_counter (&sm->counters.fastpath.in2out_ed.x,           \
1786                                 sw_if_index);                                 \
1787   vlib_zero_simple_counter (&sm->counters.fastpath.in2out_ed.x, sw_if_index); \
1788   vlib_validate_simple_counter (&sm->counters.fastpath.out2in_ed.x,           \
1789                                 sw_if_index);                                 \
1790   vlib_zero_simple_counter (&sm->counters.fastpath.out2in_ed.x, sw_if_index); \
1791   vlib_validate_simple_counter (&sm->counters.slowpath.in2out_ed.x,           \
1792                                 sw_if_index);                                 \
1793   vlib_zero_simple_counter (&sm->counters.slowpath.in2out_ed.x, sw_if_index); \
1794   vlib_validate_simple_counter (&sm->counters.slowpath.out2in_ed.x,           \
1795                                 sw_if_index);                                 \
1796   vlib_zero_simple_counter (&sm->counters.slowpath.out2in_ed.x, sw_if_index);
1797   foreach_nat_counter;
1798 #undef _
1799   vlib_validate_simple_counter (&sm->counters.hairpinning, sw_if_index);
1800   vlib_zero_simple_counter (&sm->counters.hairpinning, sw_if_index);
1801 }
1802
1803 void
1804 expire_per_vrf_sessions (u32 fib_index)
1805 {
1806   per_vrf_sessions_t *per_vrf_sessions;
1807   snat_main_per_thread_data_t *tsm;
1808   snat_main_t *sm = &snat_main;
1809
1810   /* *INDENT-OFF* */
1811   vec_foreach (tsm, sm->per_thread_data)
1812     {
1813       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1814         {
1815           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1816               (per_vrf_sessions->tx_fib_index == fib_index))
1817             {
1818               per_vrf_sessions->expired = 1;
1819             }
1820         }
1821     }
1822   /* *INDENT-ON* */
1823 }
1824
1825 void
1826 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1827 {
1828   snat_main_t *sm = &snat_main;
1829   nat_fib_t *fib;
1830
1831   // we don't care if it is outside/inside fib
1832   // we just care about their ref_count
1833   // if it reaches 0 sessions should expire
1834   // because the fib isn't valid for NAT anymore
1835
1836   vec_foreach (fib, sm->fibs)
1837   {
1838     if (fib->fib_index == fib_index)
1839       {
1840         if (is_del)
1841           {
1842             fib->ref_count--;
1843             if (!fib->ref_count)
1844               {
1845                 vec_del1 (sm->fibs, fib - sm->fibs);
1846                 expire_per_vrf_sessions (fib_index);
1847               }
1848             return;
1849           }
1850         else
1851           fib->ref_count++;
1852       }
1853   }
1854   if (!is_del)
1855     {
1856       vec_add2 (sm->fibs, fib, 1);
1857       fib->ref_count = 1;
1858       fib->fib_index = fib_index;
1859     }
1860 }
1861
1862 int
1863 snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
1864 {
1865   snat_main_t *sm = &snat_main;
1866   snat_interface_t *i;
1867   const char *feature_name, *del_feature_name;
1868   snat_address_t *ap;
1869   snat_static_mapping_t *m;
1870   nat_outside_fib_t *outside_fib;
1871   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1872                                                        sw_if_index);
1873
1874   if (sm->out2in_dpo && !is_inside)
1875     return VNET_API_ERROR_UNSUPPORTED;
1876
1877   /* *INDENT-OFF* */
1878   pool_foreach (i, sm->output_feature_interfaces,
1879   ({
1880     if (i->sw_if_index == sw_if_index)
1881       return VNET_API_ERROR_VALUE_EXIST;
1882   }));
1883   /* *INDENT-ON* */
1884
1885   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1886     feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast";
1887   else
1888     {
1889       if (sm->num_workers > 1)
1890         feature_name =
1891           is_inside ? "nat44-in2out-worker-handoff" :
1892           "nat44-out2in-worker-handoff";
1893       else if (sm->endpoint_dependent)
1894         {
1895           feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1896         }
1897       else
1898         feature_name = is_inside ? "nat44-in2out" : "nat44-out2in";
1899     }
1900
1901   if (sm->fq_in2out_index == ~0 && sm->num_workers > 1)
1902     sm->fq_in2out_index =
1903       vlib_frame_queue_main_init (sm->in2out_node_index, NAT_FQ_NELTS);
1904
1905   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1906     sm->fq_out2in_index =
1907       vlib_frame_queue_main_init (sm->out2in_node_index, NAT_FQ_NELTS);
1908
1909   if (sm->endpoint_dependent)
1910     update_per_vrf_sessions_vec (fib_index, is_del);
1911
1912   if (!is_inside)
1913     {
1914       /* *INDENT-OFF* */
1915       vec_foreach (outside_fib, sm->outside_fibs)
1916         {
1917           if (outside_fib->fib_index == fib_index)
1918             {
1919               if (is_del)
1920                 {
1921                   outside_fib->refcount--;
1922                   if (!outside_fib->refcount)
1923                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1924                 }
1925               else
1926                 outside_fib->refcount++;
1927               goto feature_set;
1928             }
1929         }
1930       /* *INDENT-ON* */
1931       if (!is_del)
1932         {
1933           vec_add2 (sm->outside_fibs, outside_fib, 1);
1934           outside_fib->refcount = 1;
1935           outside_fib->fib_index = fib_index;
1936         }
1937     }
1938
1939 feature_set:
1940   /* *INDENT-OFF* */
1941   pool_foreach (i, sm->interfaces,
1942   ({
1943     if (i->sw_if_index == sw_if_index)
1944       {
1945         if (is_del)
1946           {
1947             if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
1948               {
1949                 if (is_inside)
1950                   i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
1951                 else
1952                   i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
1953
1954                 if (sm->num_workers > 1)
1955                   {
1956                     del_feature_name = "nat44-handoff-classify";
1957                     feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
1958                                                  "nat44-out2in-worker-handoff";
1959                   }
1960                 else if (sm->endpoint_dependent)
1961                   {
1962                     del_feature_name = "nat44-ed-classify";
1963                     feature_name = !is_inside ?  "nat-pre-in2out" :
1964                                                  "nat-pre-out2in";
1965                   }
1966                 else
1967                   {
1968                     del_feature_name = "nat44-classify";
1969                     feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
1970                   }
1971
1972                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1973                 if (rv)
1974                   return rv;
1975                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1976                                              sw_if_index, 0, 0, 0);
1977                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1978                                              sw_if_index, 1, 0, 0);
1979                 if (!is_inside)
1980                   {
1981                     if (sm->endpoint_dependent)
1982                       vnet_feature_enable_disable ("ip4-local",
1983                                                    "nat44-ed-hairpinning",
1984                                                    sw_if_index, 1, 0, 0);
1985                     else
1986                       vnet_feature_enable_disable ("ip4-local",
1987                                                    "nat44-hairpinning",
1988                                                    sw_if_index, 1, 0, 0);
1989                   }
1990               }
1991             else
1992               {
1993                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1994                 if (rv)
1995                   return rv;
1996                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1997                                              sw_if_index, 0, 0, 0);
1998                 pool_put (sm->interfaces, i);
1999                 if (is_inside)
2000                   {
2001                     if (sm->endpoint_dependent)
2002                       vnet_feature_enable_disable ("ip4-local",
2003                                                    "nat44-ed-hairpinning",
2004                                                    sw_if_index, 0, 0, 0);
2005                     else
2006                       vnet_feature_enable_disable ("ip4-local",
2007                                                    "nat44-hairpinning",
2008                                                    sw_if_index, 0, 0, 0);
2009                   }
2010               }
2011           }
2012         else
2013           {
2014             if ((nat_interface_is_inside(i) && is_inside) ||
2015                 (nat_interface_is_outside(i) && !is_inside))
2016               return 0;
2017
2018             if (sm->num_workers > 1)
2019               {
2020                 del_feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
2021                                                  "nat44-out2in-worker-handoff";
2022                 feature_name = "nat44-handoff-classify";
2023               }
2024             else if (sm->endpoint_dependent)
2025               {
2026                 del_feature_name = !is_inside ?  "nat-pre-in2out" :
2027                                                  "nat-pre-out2in";
2028
2029                 feature_name = "nat44-ed-classify";
2030               }
2031             else
2032               {
2033                 del_feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
2034                 feature_name = "nat44-classify";
2035               }
2036
2037             int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
2038             if (rv)
2039               return rv;
2040             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
2041                                          sw_if_index, 0, 0, 0);
2042             vnet_feature_enable_disable ("ip4-unicast", feature_name,
2043                                          sw_if_index, 1, 0, 0);
2044             if (!is_inside)
2045               {
2046                 if (sm->endpoint_dependent)
2047                   vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
2048                                                sw_if_index, 0, 0, 0);
2049                 else
2050                   vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
2051                                                sw_if_index, 0, 0, 0);
2052               }
2053             goto set_flags;
2054           }
2055
2056         goto fib;
2057       }
2058   }));
2059   /* *INDENT-ON* */
2060
2061   if (is_del)
2062     return VNET_API_ERROR_NO_SUCH_ENTRY;
2063
2064   pool_get (sm->interfaces, i);
2065   i->sw_if_index = sw_if_index;
2066   i->flags = 0;
2067   nat_validate_counters (sm, sw_if_index);
2068
2069   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0,
2070                                0);
2071
2072   int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
2073   if (rv)
2074     return rv;
2075
2076   if (is_inside && !sm->out2in_dpo)
2077     {
2078       if (sm->endpoint_dependent)
2079         vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
2080                                      sw_if_index, 1, 0, 0);
2081       else
2082         vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
2083                                      sw_if_index, 1, 0, 0);
2084     }
2085
2086 set_flags:
2087   if (is_inside)
2088     {
2089       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
2090       return 0;
2091     }
2092   else
2093     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
2094
2095   /* Add/delete external addresses to FIB */
2096 fib:
2097   /* *INDENT-OFF* */
2098   vec_foreach (ap, sm->addresses)
2099     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
2100
2101   pool_foreach (m, sm->static_mappings,
2102   ({
2103     if (!(is_addr_only_static_mapping(m)) || (m->local_addr.as_u32 == m->external_addr.as_u32))
2104       continue;
2105
2106     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
2107   }));
2108   /* *INDENT-ON* */
2109
2110   return 0;
2111 }
2112
2113 int
2114 snat_interface_add_del_output_feature (u32 sw_if_index,
2115                                        u8 is_inside, int is_del)
2116 {
2117   snat_main_t *sm = &snat_main;
2118   snat_interface_t *i;
2119   snat_address_t *ap;
2120   snat_static_mapping_t *m;
2121   nat_outside_fib_t *outside_fib;
2122   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2123                                                        sw_if_index);
2124
2125   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
2126     return VNET_API_ERROR_UNSUPPORTED;
2127
2128   /* *INDENT-OFF* */
2129   pool_foreach (i, sm->interfaces,
2130   ({
2131     if (i->sw_if_index == sw_if_index)
2132       return VNET_API_ERROR_VALUE_EXIST;
2133   }));
2134   /* *INDENT-ON* */
2135
2136   if (sm->endpoint_dependent)
2137     update_per_vrf_sessions_vec (fib_index, is_del);
2138
2139   if (!is_inside)
2140     {
2141       /* *INDENT-OFF* */
2142       vec_foreach (outside_fib, sm->outside_fibs)
2143         {
2144           if (outside_fib->fib_index == fib_index)
2145             {
2146               if (is_del)
2147                 {
2148                   outside_fib->refcount--;
2149                   if (!outside_fib->refcount)
2150                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2151                 }
2152               else
2153                 outside_fib->refcount++;
2154               goto feature_set;
2155             }
2156         }
2157       /* *INDENT-ON* */
2158       if (!is_del)
2159         {
2160           vec_add2 (sm->outside_fibs, outside_fib, 1);
2161           outside_fib->refcount = 1;
2162           outside_fib->fib_index = fib_index;
2163         }
2164     }
2165
2166 feature_set:
2167   if (is_inside)
2168     {
2169       if (sm->endpoint_dependent)
2170         {
2171           int rv =
2172             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2173           if (rv)
2174             return rv;
2175           rv =
2176             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2177                                                             !is_del);
2178           if (rv)
2179             return rv;
2180           vnet_feature_enable_disable ("ip4-unicast", "nat44-ed-hairpin-dst",
2181                                        sw_if_index, !is_del, 0, 0);
2182           vnet_feature_enable_disable ("ip4-output", "nat44-ed-hairpin-src",
2183                                        sw_if_index, !is_del, 0, 0);
2184         }
2185       else
2186         {
2187           int rv =
2188             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2189           if (rv)
2190             return rv;
2191           rv =
2192             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2193                                                             !is_del);
2194           if (rv)
2195             return rv;
2196           vnet_feature_enable_disable ("ip4-unicast", "nat44-hairpin-dst",
2197                                        sw_if_index, !is_del, 0, 0);
2198           vnet_feature_enable_disable ("ip4-output", "nat44-hairpin-src",
2199                                        sw_if_index, !is_del, 0, 0);
2200         }
2201       goto fq;
2202     }
2203
2204   if (sm->num_workers > 1)
2205     {
2206       int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2207       if (rv)
2208         return rv;
2209       rv =
2210         ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
2211       if (rv)
2212         return rv;
2213       vnet_feature_enable_disable ("ip4-unicast",
2214                                    "nat44-out2in-worker-handoff",
2215                                    sw_if_index, !is_del, 0, 0);
2216       vnet_feature_enable_disable ("ip4-output",
2217                                    "nat44-in2out-output-worker-handoff",
2218                                    sw_if_index, !is_del, 0, 0);
2219     }
2220   else
2221     {
2222       if (sm->endpoint_dependent)
2223         {
2224           int rv =
2225             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2226           if (rv)
2227             return rv;
2228           rv =
2229             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2230                                                             !is_del);
2231           if (rv)
2232             return rv;
2233           vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
2234                                        sw_if_index, !is_del, 0, 0);
2235           vnet_feature_enable_disable ("ip4-output", "nat44-ed-in2out-output",
2236                                        sw_if_index, !is_del, 0, 0);
2237         }
2238       else
2239         {
2240           int rv =
2241             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2242           if (rv)
2243             return rv;
2244           rv =
2245             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2246                                                             !is_del);
2247           if (rv)
2248             return rv;
2249           vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in",
2250                                        sw_if_index, !is_del, 0, 0);
2251           vnet_feature_enable_disable ("ip4-output", "nat44-in2out-output",
2252                                        sw_if_index, !is_del, 0, 0);
2253         }
2254     }
2255
2256 fq:
2257   if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
2258     sm->fq_in2out_output_index =
2259       vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
2260
2261   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
2262     sm->fq_out2in_index =
2263       vlib_frame_queue_main_init (sm->out2in_node_index, 0);
2264
2265   /* *INDENT-OFF* */
2266   pool_foreach (i, sm->output_feature_interfaces,
2267   ({
2268     if (i->sw_if_index == sw_if_index)
2269       {
2270         if (is_del)
2271           pool_put (sm->output_feature_interfaces, i);
2272         else
2273           return VNET_API_ERROR_VALUE_EXIST;
2274
2275         goto fib;
2276       }
2277   }));
2278   /* *INDENT-ON* */
2279
2280   if (is_del)
2281     return VNET_API_ERROR_NO_SUCH_ENTRY;
2282
2283   pool_get (sm->output_feature_interfaces, i);
2284   i->sw_if_index = sw_if_index;
2285   i->flags = 0;
2286   nat_validate_counters (sm, sw_if_index);
2287   if (is_inside)
2288     i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
2289   else
2290     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
2291
2292   /* Add/delete external addresses to FIB */
2293 fib:
2294   if (is_inside)
2295     return 0;
2296
2297   /* *INDENT-OFF* */
2298   vec_foreach (ap, sm->addresses)
2299     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
2300
2301   pool_foreach (m, sm->static_mappings,
2302   ({
2303     if (!((is_addr_only_static_mapping(m)))  || (m->local_addr.as_u32 == m->external_addr.as_u32))
2304       continue;
2305
2306     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
2307   }));
2308   /* *INDENT-ON* */
2309
2310   return 0;
2311 }
2312
2313 int
2314 snat_set_workers (uword * bitmap)
2315 {
2316   snat_main_t *sm = &snat_main;
2317   int i, j = 0;
2318
2319   if (sm->num_workers < 2)
2320     return VNET_API_ERROR_FEATURE_DISABLED;
2321
2322   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
2323     return VNET_API_ERROR_INVALID_WORKER;
2324
2325   vec_free (sm->workers);
2326   /* *INDENT-OFF* */
2327   clib_bitmap_foreach (i, bitmap,
2328     ({
2329       vec_add1(sm->workers, i);
2330       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
2331       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
2332       j++;
2333     }));
2334   /* *INDENT-ON* */
2335
2336   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
2337
2338   return 0;
2339 }
2340
2341 static void
2342 snat_update_outside_fib (u32 sw_if_index, u32 new_fib_index,
2343                          u32 old_fib_index)
2344 {
2345   snat_main_t *sm = &snat_main;
2346   nat_outside_fib_t *outside_fib;
2347   snat_interface_t *i;
2348   u8 is_add = 1;
2349   u8 match = 0;
2350
2351   if (new_fib_index == old_fib_index)
2352     return;
2353
2354   if (!vec_len (sm->outside_fibs))
2355     return;
2356
2357   /* *INDENT-OFF* */
2358   pool_foreach (i, sm->interfaces,
2359     ({
2360       if (i->sw_if_index == sw_if_index)
2361         {
2362           if (!(nat_interface_is_outside (i)))
2363             return;
2364           match = 1;
2365         }
2366     }));
2367
2368   pool_foreach (i, sm->output_feature_interfaces,
2369     ({
2370       if (i->sw_if_index == sw_if_index)
2371         {
2372           if (!(nat_interface_is_outside (i)))
2373             return;
2374           match = 1;
2375         }
2376     }));
2377   /* *INDENT-ON* */
2378
2379   if (!match)
2380     return;
2381
2382   vec_foreach (outside_fib, sm->outside_fibs)
2383   {
2384     if (outside_fib->fib_index == old_fib_index)
2385       {
2386         outside_fib->refcount--;
2387         if (!outside_fib->refcount)
2388           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2389         break;
2390       }
2391   }
2392
2393   vec_foreach (outside_fib, sm->outside_fibs)
2394   {
2395     if (outside_fib->fib_index == new_fib_index)
2396       {
2397         outside_fib->refcount++;
2398         is_add = 0;
2399         break;
2400       }
2401   }
2402
2403   if (is_add)
2404     {
2405       vec_add2 (sm->outside_fibs, outside_fib, 1);
2406       outside_fib->refcount = 1;
2407       outside_fib->fib_index = new_fib_index;
2408     }
2409 }
2410
2411 static void
2412 snat_ip4_table_bind (ip4_main_t * im,
2413                      uword opaque,
2414                      u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
2415 {
2416   snat_update_outside_fib (sw_if_index, new_fib_index, old_fib_index);
2417 }
2418
2419 static void
2420 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
2421                                        uword opaque,
2422                                        u32 sw_if_index,
2423                                        ip4_address_t * address,
2424                                        u32 address_length,
2425                                        u32 if_address_index, u32 is_delete);
2426
2427 static void
2428 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
2429                                  uword opaque,
2430                                  u32 sw_if_index,
2431                                  ip4_address_t * address,
2432                                  u32 address_length,
2433                                  u32 if_address_index, u32 is_delete);
2434
2435 static int
2436 nat_alloc_addr_and_port_default (snat_address_t * addresses, u32 fib_index,
2437                                  u32 thread_index, nat_protocol_t proto,
2438                                  ip4_address_t * addr, u16 * port,
2439                                  u16 port_per_thread, u32 snat_thread_index);
2440
2441 void
2442 test_key_calc_split ()
2443 {
2444   ip4_address_t l_addr;
2445   l_addr.as_u8[0] = 1;
2446   l_addr.as_u8[1] = 1;
2447   l_addr.as_u8[2] = 1;
2448   l_addr.as_u8[3] = 1;
2449   ip4_address_t r_addr;
2450   r_addr.as_u8[0] = 2;
2451   r_addr.as_u8[1] = 2;
2452   r_addr.as_u8[2] = 2;
2453   r_addr.as_u8[3] = 2;
2454   u16 l_port = 40001;
2455   u16 r_port = 40301;
2456   u8 proto = 9;
2457   u32 fib_index = 9000001;
2458   u32 thread_index = 3000000001;
2459   u32 session_index = 3000000221;
2460   clib_bihash_kv_16_8_t kv;
2461   init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
2462               thread_index, session_index);
2463   ip4_address_t l_addr2;
2464   ip4_address_t r_addr2;
2465   clib_memset (&l_addr2, 0, sizeof (l_addr2));
2466   clib_memset (&r_addr2, 0, sizeof (r_addr2));
2467   u16 l_port2 = 0;
2468   u16 r_port2 = 0;
2469   u8 proto2 = 0;
2470   u32 fib_index2 = 0;
2471   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
2472                &r_port2);
2473   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2474   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
2475   ASSERT (l_port == l_port2);
2476   ASSERT (r_port == r_port2);
2477   ASSERT (proto == proto2);
2478   ASSERT (fib_index == fib_index2);
2479   ASSERT (thread_index == ed_value_get_thread_index (&kv));
2480   ASSERT (session_index == ed_value_get_session_index (&kv));
2481
2482   fib_index = 7001;
2483   proto = 5;
2484   nat_protocol_t proto3 = ~0;
2485   u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
2486   split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
2487   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2488   ASSERT (l_port == l_port2);
2489   ASSERT (proto == proto3);
2490   ASSERT (fib_index == fib_index2);
2491 }
2492
2493 static clib_error_t *
2494 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
2495 {
2496   snat_main_t *sm = &snat_main;
2497   u32 fib_index;
2498
2499   if (sm->endpoint_dependent)
2500     {
2501       // TODO: consider removing all NAT interfaces
2502
2503       if (!is_add)
2504         {
2505           fib_index = ip4_fib_index_from_table_id (table_id);
2506           if (fib_index != ~0)
2507             expire_per_vrf_sessions (fib_index);
2508         }
2509     }
2510   return 0;
2511 }
2512
2513 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
2514
2515
2516 static clib_error_t *
2517 snat_init (vlib_main_t * vm)
2518 {
2519   snat_main_t *sm = &snat_main;
2520   clib_error_t *error = 0;
2521   ip4_main_t *im = &ip4_main;
2522   ip_lookup_main_t *lm = &im->lookup_main;
2523   uword *p;
2524   vlib_thread_registration_t *tr;
2525   vlib_thread_main_t *tm = vlib_get_thread_main ();
2526   uword *bitmap = 0;
2527   u32 i;
2528   ip4_add_del_interface_address_callback_t cb4;
2529   vlib_node_t *node;
2530
2531   sm->vnet_main = vnet_get_main ();
2532   sm->ip4_main = im;
2533   sm->ip4_lookup_main = lm;
2534   sm->api_main = vlibapi_get_main ();
2535   sm->first_worker_index = 0;
2536   sm->num_workers = 0;
2537   sm->workers = 0;
2538   sm->port_per_thread = 0xffff - 1024;
2539   sm->fq_in2out_index = ~0;
2540   sm->fq_in2out_output_index = ~0;
2541   sm->fq_out2in_index = ~0;
2542
2543   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
2544   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
2545   sm->forwarding_enabled = 0;
2546   sm->log_class = vlib_log_register_class ("nat", 0);
2547   sm->log_level = SNAT_LOG_ERROR;
2548   sm->mss_clamping = 0;
2549
2550   node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
2551   sm->error_node_index = node->index;
2552
2553   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-in2out");
2554   sm->pre_in2out_node_index = node->index;
2555   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-out2in");
2556   sm->pre_out2in_node_index = node->index;
2557
2558   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-in2out");
2559   sm->pre_in2out_node_index = node->index;
2560
2561   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-out2in");
2562   sm->pre_out2in_node_index = node->index;
2563
2564   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out");
2565   sm->in2out_node_index = node->index;
2566   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output");
2567   sm->in2out_output_node_index = node->index;
2568   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-fast");
2569   sm->in2out_fast_node_index = node->index;
2570   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-slowpath");
2571   sm->in2out_slowpath_node_index = node->index;
2572   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output-slowpath");
2573   sm->in2out_slowpath_output_node_index = node->index;
2574
2575   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
2576   sm->ed_in2out_node_index = node->index;
2577   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-slowpath");
2578   sm->ed_in2out_slowpath_node_index = node->index;
2579
2580   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in");
2581   sm->out2in_node_index = node->index;
2582   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in-fast");
2583   sm->out2in_fast_node_index = node->index;
2584
2585   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
2586   sm->ed_out2in_node_index = node->index;
2587   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in-slowpath");
2588   sm->ed_out2in_slowpath_node_index = node->index;
2589
2590   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpinning");
2591   sm->hairpinning_node_index = node->index;
2592   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-dst");
2593   sm->hairpin_dst_node_index = node->index;
2594   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-src");
2595   sm->hairpin_src_node_index = node->index;
2596   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpinning");
2597   sm->ed_hairpinning_node_index = node->index;
2598   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-dst");
2599   sm->ed_hairpin_dst_node_index = node->index;
2600   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-src");
2601   sm->ed_hairpin_src_node_index = node->index;
2602
2603   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2604   if (p)
2605     {
2606       tr = (vlib_thread_registration_t *) p[0];
2607       if (tr)
2608         {
2609           sm->num_workers = tr->count;
2610           sm->first_worker_index = tr->first_index;
2611         }
2612     }
2613
2614   vec_validate (sm->per_thread_data, tm->n_vlib_mains - 1);
2615
2616   /* Use all available workers by default */
2617   if (sm->num_workers > 1)
2618     {
2619       for (i = 0; i < sm->num_workers; i++)
2620         bitmap = clib_bitmap_set (bitmap, i, 1);
2621       snat_set_workers (bitmap);
2622       clib_bitmap_free (bitmap);
2623     }
2624   else
2625     {
2626       sm->per_thread_data[0].snat_thread_index = 0;
2627     }
2628
2629   error = snat_api_init (vm, sm);
2630   if (error)
2631     return error;
2632
2633   /* Set up the interface address add/del callback */
2634   cb4.function = snat_ip4_add_del_interface_address_cb;
2635   cb4.function_opaque = 0;
2636
2637   vec_add1 (im->add_del_interface_address_callbacks, cb4);
2638
2639   cb4.function = nat_ip4_add_del_addr_only_sm_cb;
2640   cb4.function_opaque = 0;
2641
2642   vec_add1 (im->add_del_interface_address_callbacks, cb4);
2643
2644   nat_dpo_module_init ();
2645
2646   /* Init counters */
2647   sm->total_users.name = "total-users";
2648   sm->total_users.stat_segment_name = "/nat44/total-users";
2649   vlib_validate_simple_counter (&sm->total_users, 0);
2650   vlib_zero_simple_counter (&sm->total_users, 0);
2651   sm->total_sessions.name = "total-sessions";
2652   sm->total_sessions.stat_segment_name = "/nat44/total-sessions";
2653   vlib_validate_simple_counter (&sm->total_sessions, 0);
2654   vlib_zero_simple_counter (&sm->total_sessions, 0);
2655   sm->user_limit_reached.name = "user-limit-reached";
2656   sm->user_limit_reached.stat_segment_name = "/nat44/user-limit-reached";
2657   vlib_validate_simple_counter (&sm->user_limit_reached, 0);
2658   vlib_zero_simple_counter (&sm->user_limit_reached, 0);
2659
2660 #define _(x)                                            \
2661   sm->counters.fastpath.in2out.x.name = #x;             \
2662   sm->counters.fastpath.in2out.x.stat_segment_name =    \
2663       "/nat44/in2out/fastpath/" #x;                     \
2664   sm->counters.slowpath.in2out.x.name = #x;             \
2665   sm->counters.slowpath.in2out.x.stat_segment_name =    \
2666       "/nat44/in2out/slowpath/" #x;                     \
2667   sm->counters.fastpath.out2in.x.name = #x;             \
2668   sm->counters.fastpath.out2in.x.stat_segment_name =    \
2669       "/nat44/out2in/fastpath/" #x;                     \
2670   sm->counters.slowpath.out2in.x.name = #x;             \
2671   sm->counters.slowpath.out2in.x.stat_segment_name =    \
2672       "/nat44/out2in/slowpath/" #x;                     \
2673   sm->counters.fastpath.in2out_ed.x.name = #x;          \
2674   sm->counters.fastpath.in2out_ed.x.stat_segment_name = \
2675       "/nat44/ed/in2out/fastpath/" #x;                  \
2676   sm->counters.slowpath.in2out_ed.x.name = #x;          \
2677   sm->counters.slowpath.in2out_ed.x.stat_segment_name = \
2678       "/nat44/ed/in2out/slowpath/" #x;                  \
2679   sm->counters.fastpath.out2in_ed.x.name = #x;          \
2680   sm->counters.fastpath.out2in_ed.x.stat_segment_name = \
2681       "/nat44/ed/out2in/fastpath/" #x;                  \
2682   sm->counters.slowpath.out2in_ed.x.name = #x;          \
2683   sm->counters.slowpath.out2in_ed.x.stat_segment_name = \
2684       "/nat44/ed/out2in/slowpath/" #x;
2685   foreach_nat_counter;
2686 #undef _
2687   sm->counters.hairpinning.name = "hairpinning";
2688   sm->counters.hairpinning.stat_segment_name = "/nat44/hairpinning";
2689
2690   /* Init IPFIX logging */
2691   snat_ipfix_logging_init (vm);
2692
2693   /* Init NAT64 */
2694   error = nat64_init (vm);
2695   if (error)
2696     return error;
2697
2698   ip4_table_bind_callback_t cbt4 = {
2699     .function = snat_ip4_table_bind,
2700   };
2701   vec_add1 (ip4_main.table_bind_callbacks, cbt4);
2702
2703   nat_fib_src_hi = fib_source_allocate ("nat-hi",
2704                                         FIB_SOURCE_PRIORITY_HI,
2705                                         FIB_SOURCE_BH_SIMPLE);
2706   nat_fib_src_low = fib_source_allocate ("nat-low",
2707                                          FIB_SOURCE_PRIORITY_LOW,
2708                                          FIB_SOURCE_BH_SIMPLE);
2709
2710   test_key_calc_split ();
2711   return error;
2712 }
2713
2714 VLIB_INIT_FUNCTION (snat_init);
2715
2716 void
2717 snat_free_outside_address_and_port (snat_address_t * addresses,
2718                                     u32 thread_index,
2719                                     ip4_address_t * addr,
2720                                     u16 port, nat_protocol_t protocol)
2721 {
2722   snat_address_t *a;
2723   u32 address_index;
2724   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2725
2726   for (address_index = 0; address_index < vec_len (addresses);
2727        address_index++)
2728     {
2729       if (addresses[address_index].addr.as_u32 == addr->as_u32)
2730         break;
2731     }
2732
2733   ASSERT (address_index < vec_len (addresses));
2734
2735   a = addresses + address_index;
2736
2737   switch (protocol)
2738     {
2739 #define _(N, i, n, s) \
2740     case NAT_PROTOCOL_##N: \
2741       ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
2742       --a->busy_##n##_port_refcounts[port_host_byte_order]; \
2743       a->busy_##n##_ports--; \
2744       a->busy_##n##_ports_per_thread[thread_index]--; \
2745       break;
2746       foreach_nat_protocol
2747 #undef _
2748     default:
2749       nat_elog_info ("unknown protocol");
2750       return;
2751     }
2752 }
2753
2754 static int
2755 nat_set_outside_address_and_port (snat_address_t * addresses,
2756                                   u32 thread_index, ip4_address_t addr,
2757                                   u16 port, nat_protocol_t protocol)
2758 {
2759   snat_address_t *a = 0;
2760   u32 address_index;
2761   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2762
2763   for (address_index = 0; address_index < vec_len (addresses);
2764        address_index++)
2765     {
2766       if (addresses[address_index].addr.as_u32 != addr.as_u32)
2767         continue;
2768
2769       a = addresses + address_index;
2770       switch (protocol)
2771         {
2772 #define _(N, j, n, s) \
2773         case NAT_PROTOCOL_##N: \
2774           if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
2775             return VNET_API_ERROR_INSTANCE_IN_USE; \
2776           ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
2777           a->busy_##n##_ports_per_thread[thread_index]++; \
2778           a->busy_##n##_ports++; \
2779           return 0;
2780           foreach_nat_protocol
2781 #undef _
2782         default:
2783           nat_elog_info ("unknown protocol");
2784           return 1;
2785         }
2786     }
2787
2788   return VNET_API_ERROR_NO_SUCH_ENTRY;
2789 }
2790
2791 int
2792 snat_static_mapping_match (snat_main_t * sm,
2793                            ip4_address_t match_addr,
2794                            u16 match_port,
2795                            u32 match_fib_index,
2796                            nat_protocol_t match_protocol,
2797                            ip4_address_t * mapping_addr,
2798                            u16 * mapping_port,
2799                            u32 * mapping_fib_index,
2800                            u8 by_external,
2801                            u8 * is_addr_only,
2802                            twice_nat_type_t * twice_nat,
2803                            lb_nat_type_t * lb, ip4_address_t * ext_host_addr,
2804                            u8 * is_identity_nat)
2805 {
2806   clib_bihash_kv_8_8_t kv, value;
2807   snat_static_mapping_t *m;
2808   clib_bihash_8_8_t *mapping_hash = &sm->static_mapping_by_local;
2809   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
2810   u8 backend_index;
2811   nat44_lb_addr_port_t *local;
2812
2813   if (by_external)
2814     {
2815       mapping_hash = &sm->static_mapping_by_external;
2816       init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
2817       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2818         {
2819           /* Try address only mapping */
2820           init_nat_k (&kv, match_addr, 0, 0, 0);
2821           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2822             return 1;
2823         }
2824
2825     }
2826   else
2827     {
2828       init_nat_k (&kv, match_addr, match_port, match_fib_index,
2829                   match_protocol);
2830       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2831         {
2832           /* Try address only mapping */
2833           init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
2834           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2835             return 1;
2836         }
2837
2838     }
2839
2840   m = pool_elt_at_index (sm->static_mappings, value.value);
2841
2842   if (by_external)
2843     {
2844       if (is_lb_static_mapping (m))
2845         {
2846           if (PREDICT_FALSE (lb != 0))
2847             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
2848           if (m->affinity && !nat_affinity_find_and_lock (ext_host_addr[0],
2849                                                           match_addr,
2850                                                           match_protocol,
2851                                                           match_port,
2852                                                           &backend_index))
2853             {
2854               local = pool_elt_at_index (m->locals, backend_index);
2855               *mapping_addr = local->addr;
2856               *mapping_port = local->port;
2857               *mapping_fib_index = local->fib_index;
2858               goto end;
2859             }
2860           // pick locals matching this worker
2861           if (PREDICT_FALSE (sm->num_workers > 1))
2862             {
2863               u32 thread_index = vlib_get_thread_index ();
2864               /* *INDENT-OFF* */
2865               pool_foreach_index (i, m->locals,
2866               ({
2867                 local = pool_elt_at_index (m->locals, i);
2868
2869                 ip4_header_t ip = {
2870                   .src_address = local->addr,
2871                 };
2872
2873                 if (sm->worker_in2out_cb (&ip, m->fib_index, 0) ==
2874                     thread_index)
2875                   {
2876                     vec_add1 (tmp, i);
2877                   }
2878               }));
2879               /* *INDENT-ON* */
2880               ASSERT (vec_len (tmp) != 0);
2881             }
2882           else
2883             {
2884               /* *INDENT-OFF* */
2885               pool_foreach_index (i, m->locals,
2886               ({
2887                 vec_add1 (tmp, i);
2888               }));
2889               /* *INDENT-ON* */
2890             }
2891           hi = vec_len (tmp) - 1;
2892           local = pool_elt_at_index (m->locals, tmp[hi]);
2893           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
2894           while (lo < hi)
2895             {
2896               mid = ((hi - lo) >> 1) + lo;
2897               local = pool_elt_at_index (m->locals, tmp[mid]);
2898               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
2899             }
2900           local = pool_elt_at_index (m->locals, tmp[lo]);
2901           if (!(local->prefix >= rand))
2902             return 1;
2903           *mapping_addr = local->addr;
2904           *mapping_port = local->port;
2905           *mapping_fib_index = local->fib_index;
2906           if (m->affinity)
2907             {
2908               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
2909                                                 match_protocol, match_port,
2910                                                 tmp[lo], m->affinity,
2911                                                 m->affinity_per_service_list_head_index))
2912                 nat_elog_info ("create affinity record failed");
2913             }
2914           vec_free (tmp);
2915         }
2916       else
2917         {
2918           if (PREDICT_FALSE (lb != 0))
2919             *lb = NO_LB_NAT;
2920           *mapping_fib_index = m->fib_index;
2921           *mapping_addr = m->local_addr;
2922           /* Address only mapping doesn't change port */
2923           *mapping_port = is_addr_only_static_mapping (m) ? match_port
2924             : m->local_port;
2925         }
2926     }
2927   else
2928     {
2929       *mapping_addr = m->external_addr;
2930       /* Address only mapping doesn't change port */
2931       *mapping_port = is_addr_only_static_mapping (m) ? match_port
2932         : m->external_port;
2933       *mapping_fib_index = sm->outside_fib_index;
2934     }
2935
2936 end:
2937   if (PREDICT_FALSE (is_addr_only != 0))
2938     *is_addr_only = is_addr_only_static_mapping (m);
2939
2940   if (PREDICT_FALSE (twice_nat != 0))
2941     *twice_nat = m->twice_nat;
2942
2943   if (PREDICT_FALSE (is_identity_nat != 0))
2944     *is_identity_nat = is_identity_static_mapping (m);
2945
2946   return 0;
2947 }
2948
2949 int
2950 snat_alloc_outside_address_and_port (snat_address_t * addresses,
2951                                      u32 fib_index,
2952                                      u32 thread_index,
2953                                      nat_protocol_t proto,
2954                                      ip4_address_t * addr,
2955                                      u16 * port,
2956                                      u16 port_per_thread,
2957                                      u32 snat_thread_index)
2958 {
2959   snat_main_t *sm = &snat_main;
2960
2961   return sm->alloc_addr_and_port (addresses, fib_index, thread_index, proto,
2962                                   addr, port, port_per_thread,
2963                                   snat_thread_index);
2964 }
2965
2966 static int
2967 nat_alloc_addr_and_port_default (snat_address_t * addresses,
2968                                  u32 fib_index,
2969                                  u32 thread_index,
2970                                  nat_protocol_t proto,
2971                                  ip4_address_t * addr,
2972                                  u16 * port,
2973                                  u16 port_per_thread, u32 snat_thread_index)
2974 {
2975   int i;
2976   snat_address_t *a, *ga = 0;
2977   u32 portnum;
2978
2979   for (i = 0; i < vec_len (addresses); i++)
2980     {
2981       a = addresses + i;
2982       switch (proto)
2983         {
2984 #define _(N, j, n, s) \
2985         case NAT_PROTOCOL_##N: \
2986           if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
2987             { \
2988               if (a->fib_index == fib_index) \
2989                 { \
2990                   while (1) \
2991                     { \
2992                       portnum = (port_per_thread * \
2993                         snat_thread_index) + \
2994                         snat_random_port(0, port_per_thread - 1) + 1024; \
2995                       if (a->busy_##n##_port_refcounts[portnum]) \
2996                         continue; \
2997                       --a->busy_##n##_port_refcounts[portnum]; \
2998                       a->busy_##n##_ports_per_thread[thread_index]++; \
2999                       a->busy_##n##_ports++; \
3000                       *addr = a->addr; \
3001                       *port = clib_host_to_net_u16(portnum); \
3002                       return 0; \
3003                     } \
3004                 } \
3005               else if (a->fib_index == ~0) \
3006                 { \
3007                   ga = a; \
3008                 } \
3009             } \
3010           break;
3011           foreach_nat_protocol
3012 #undef _
3013         default:
3014           nat_elog_info ("unknown protocol");
3015           return 1;
3016         }
3017
3018     }
3019
3020   if (ga)
3021     {
3022       a = ga;
3023       switch (proto)
3024         {
3025 #define _(N, j, n, s) \
3026         case NAT_PROTOCOL_##N: \
3027           while (1) \
3028             { \
3029               portnum = (port_per_thread * \
3030                 snat_thread_index) + \
3031                 snat_random_port(0, port_per_thread - 1) + 1024; \
3032               if (a->busy_##n##_port_refcounts[portnum]) \
3033                 continue; \
3034               ++a->busy_##n##_port_refcounts[portnum]; \
3035               a->busy_##n##_ports_per_thread[thread_index]++; \
3036               a->busy_##n##_ports++; \
3037               *addr = a->addr; \
3038               *port = clib_host_to_net_u16(portnum); \
3039               return 0; \
3040             }
3041           break;
3042           foreach_nat_protocol
3043 #undef _
3044         default:
3045           nat_elog_info ("unknown protocol");
3046           return 1;
3047         }
3048     }
3049
3050   /* Totally out of translations to use... */
3051   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
3052   return 1;
3053 }
3054
3055 static int
3056 nat_alloc_addr_and_port_mape (snat_address_t * addresses, u32 fib_index,
3057                               u32 thread_index, nat_protocol_t proto,
3058                               ip4_address_t * addr, u16 * port,
3059                               u16 port_per_thread, u32 snat_thread_index)
3060 {
3061   snat_main_t *sm = &snat_main;
3062   snat_address_t *a = addresses;
3063   u16 m, ports, portnum, A, j;
3064   m = 16 - (sm->psid_offset + sm->psid_length);
3065   ports = (1 << (16 - sm->psid_length)) - (1 << m);
3066
3067   if (!vec_len (addresses))
3068     goto exhausted;
3069
3070   switch (proto)
3071     {
3072 #define _(N, i, n, s) \
3073     case NAT_PROTOCOL_##N: \
3074       if (a->busy_##n##_ports < ports) \
3075         { \
3076           while (1) \
3077             { \
3078               A = snat_random_port(1, pow2_mask(sm->psid_offset)); \
3079               j = snat_random_port(0, pow2_mask(m)); \
3080               portnum = A | (sm->psid << sm->psid_offset) | (j << (16 - m)); \
3081               if (a->busy_##n##_port_refcounts[portnum]) \
3082                 continue; \
3083               ++a->busy_##n##_port_refcounts[portnum]; \
3084               a->busy_##n##_ports++; \
3085               *addr = a->addr; \
3086               *port = clib_host_to_net_u16 (portnum); \
3087               return 0; \
3088             } \
3089         } \
3090       break;
3091       foreach_nat_protocol
3092 #undef _
3093     default:
3094       nat_elog_info ("unknown protocol");
3095       return 1;
3096     }
3097
3098 exhausted:
3099   /* Totally out of translations to use... */
3100   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
3101   return 1;
3102 }
3103
3104 static int
3105 nat_alloc_addr_and_port_range (snat_address_t * addresses, u32 fib_index,
3106                                u32 thread_index, nat_protocol_t proto,
3107                                ip4_address_t * addr, u16 * port,
3108                                u16 port_per_thread, u32 snat_thread_index)
3109 {
3110   snat_main_t *sm = &snat_main;
3111   snat_address_t *a = addresses;
3112   u16 portnum, ports;
3113
3114   ports = sm->end_port - sm->start_port + 1;
3115
3116   if (!vec_len (addresses))
3117     goto exhausted;
3118
3119   switch (proto)
3120     {
3121 #define _(N, i, n, s) \
3122     case NAT_PROTOCOL_##N: \
3123       if (a->busy_##n##_ports < ports) \
3124         { \
3125           while (1) \
3126             { \
3127               portnum = snat_random_port(sm->start_port, sm->end_port); \
3128               if (a->busy_##n##_port_refcounts[portnum]) \
3129                 continue; \
3130               ++a->busy_##n##_port_refcounts[portnum]; \
3131               a->busy_##n##_ports++; \
3132               *addr = a->addr; \
3133               *port = clib_host_to_net_u16 (portnum); \
3134               return 0; \
3135             } \
3136         } \
3137       break;
3138       foreach_nat_protocol
3139 #undef _
3140     default:
3141       nat_elog_info ("unknown protocol");
3142       return 1;
3143     }
3144
3145 exhausted:
3146   /* Totally out of translations to use... */
3147   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
3148   return 1;
3149 }
3150
3151 void
3152 nat44_add_del_address_dpo (ip4_address_t addr, u8 is_add)
3153 {
3154   dpo_id_t dpo_v4 = DPO_INVALID;
3155   fib_prefix_t pfx = {
3156     .fp_proto = FIB_PROTOCOL_IP4,
3157     .fp_len = 32,
3158     .fp_addr.ip4.as_u32 = addr.as_u32,
3159   };
3160
3161   if (is_add)
3162     {
3163       nat_dpo_create (DPO_PROTO_IP4, 0, &dpo_v4);
3164       fib_table_entry_special_dpo_add (0, &pfx, nat_fib_src_hi,
3165                                        FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4);
3166       dpo_reset (&dpo_v4);
3167     }
3168   else
3169     {
3170       fib_table_entry_special_remove (0, &pfx, nat_fib_src_hi);
3171     }
3172 }
3173
3174 u8 *
3175 format_session_kvp (u8 * s, va_list * args)
3176 {
3177   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3178
3179   s = format (s, "%U session-index %llu", format_snat_key, v->key, v->value);
3180
3181   return s;
3182 }
3183
3184 u8 *
3185 format_static_mapping_kvp (u8 * s, va_list * args)
3186 {
3187   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3188
3189   s = format (s, "%U static-mapping-index %llu",
3190               format_snat_key, v->key, v->value);
3191
3192   return s;
3193 }
3194
3195 u8 *
3196 format_user_kvp (u8 * s, va_list * args)
3197 {
3198   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3199   snat_user_key_t k;
3200
3201   k.as_u64 = v->key;
3202
3203   s = format (s, "%U fib %d user-index %llu", format_ip4_address, &k.addr,
3204               k.fib_index, v->value);
3205
3206   return s;
3207 }
3208
3209 u8 *
3210 format_ed_session_kvp (u8 * s, va_list * args)
3211 {
3212   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
3213
3214   u8 proto;
3215   u16 r_port, l_port;
3216   ip4_address_t l_addr, r_addr;
3217   u32 fib_index;
3218
3219   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
3220   s =
3221     format (s,
3222             "local %U:%d remote %U:%d proto %U fib %d thread-index %u session-index %u",
3223             format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
3224             format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
3225             format_ip_protocol, proto, fib_index,
3226             ed_value_get_session_index (v), ed_value_get_thread_index (v));
3227
3228   return s;
3229 }
3230
3231 static u32
3232 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0,
3233                            u8 is_output)
3234 {
3235   snat_main_t *sm = &snat_main;
3236   u32 next_worker_index = 0;
3237   u32 hash;
3238
3239   next_worker_index = sm->first_worker_index;
3240   hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) +
3241     (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >> 24);
3242
3243   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
3244     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
3245   else
3246     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
3247
3248   return next_worker_index;
3249 }
3250
3251 static u32
3252 snat_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip0,
3253                            u32 rx_fib_index0, u8 is_output)
3254 {
3255   snat_main_t *sm = &snat_main;
3256   udp_header_t *udp;
3257   u16 port;
3258   clib_bihash_kv_8_8_t kv, value;
3259   snat_static_mapping_t *m;
3260   u32 proto;
3261   u32 next_worker_index = 0;
3262
3263   /* first try static mappings without port */
3264   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3265     {
3266       init_nat_k (&kv, ip0->dst_address, 0, rx_fib_index0, 0);
3267       if (!clib_bihash_search_8_8
3268           (&sm->static_mapping_by_external, &kv, &value))
3269         {
3270           m = pool_elt_at_index (sm->static_mappings, value.value);
3271           return m->workers[0];
3272         }
3273     }
3274
3275   proto = ip_proto_to_nat_proto (ip0->protocol);
3276   udp = ip4_next_header (ip0);
3277   port = udp->dst_port;
3278
3279   /* unknown protocol */
3280   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
3281     {
3282       /* use current thread */
3283       return vlib_get_thread_index ();
3284     }
3285
3286   if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_ICMP))
3287     {
3288       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3289       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3290       if (!icmp_type_is_error_message
3291           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3292         port = vnet_buffer (b)->ip.reass.l4_src_port;
3293       else
3294         {
3295           /* if error message, then it's not fragmented and we can access it */
3296           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3297           proto = ip_proto_to_nat_proto (inner_ip->protocol);
3298           void *l4_header = ip4_next_header (inner_ip);
3299           switch (proto)
3300             {
3301             case NAT_PROTOCOL_ICMP:
3302               icmp = (icmp46_header_t *) l4_header;
3303               echo = (icmp_echo_header_t *) (icmp + 1);
3304               port = echo->identifier;
3305               break;
3306             case NAT_PROTOCOL_UDP:
3307             case NAT_PROTOCOL_TCP:
3308               port = ((tcp_udp_header_t *) l4_header)->src_port;
3309               break;
3310             default:
3311               return vlib_get_thread_index ();
3312             }
3313         }
3314     }
3315
3316   /* try static mappings with port */
3317   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3318     {
3319       init_nat_k (&kv, ip0->dst_address, port, rx_fib_index0, proto);
3320       if (!clib_bihash_search_8_8
3321           (&sm->static_mapping_by_external, &kv, &value))
3322         {
3323           m = pool_elt_at_index (sm->static_mappings, value.value);
3324           return m->workers[0];
3325         }
3326     }
3327
3328   /* worker by outside port */
3329   next_worker_index = sm->first_worker_index;
3330   next_worker_index +=
3331     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3332   return next_worker_index;
3333 }
3334
3335 static u32
3336 nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index,
3337                                u8 is_output)
3338 {
3339   snat_main_t *sm = &snat_main;
3340   u32 next_worker_index = sm->first_worker_index;
3341   u32 hash;
3342
3343   clib_bihash_kv_16_8_t kv16, value16;
3344   snat_main_per_thread_data_t *tsm;
3345   udp_header_t *udp;
3346
3347   if (PREDICT_FALSE (is_output))
3348     {
3349       u32 fib_index = sm->outside_fib_index;
3350       nat_outside_fib_t *outside_fib;
3351       fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3352       fib_prefix_t pfx = {
3353         .fp_proto = FIB_PROTOCOL_IP4,
3354         .fp_len = 32,
3355         .fp_addr = {
3356                     .ip4.as_u32 = ip->dst_address.as_u32,
3357                     }
3358         ,
3359       };
3360
3361       udp = ip4_next_header (ip);
3362
3363       switch (vec_len (sm->outside_fibs))
3364         {
3365         case 0:
3366           fib_index = sm->outside_fib_index;
3367           break;
3368         case 1:
3369           fib_index = sm->outside_fibs[0].fib_index;
3370           break;
3371         default:
3372             /* *INDENT-OFF* */
3373             vec_foreach (outside_fib, sm->outside_fibs)
3374               {
3375                 fei = fib_table_lookup (outside_fib->fib_index, &pfx);
3376                 if (FIB_NODE_INDEX_INVALID != fei)
3377                   {
3378                     if (fib_entry_get_resolving_interface (fei) != ~0)
3379                       {
3380                         fib_index = outside_fib->fib_index;
3381                         break;
3382                       }
3383                   }
3384               }
3385             /* *INDENT-ON* */
3386           break;
3387         }
3388
3389       init_ed_k (&kv16, ip->src_address, udp->src_port, ip->dst_address,
3390                  udp->dst_port, fib_index, ip->protocol);
3391
3392       if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3393                                                   &kv16, &value16)))
3394         {
3395           tsm =
3396             vec_elt_at_index (sm->per_thread_data,
3397                               ed_value_get_thread_index (&value16));
3398           next_worker_index += tsm->thread_index;
3399
3400           nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE (session)",
3401                                   next_worker_index, fib_index,
3402                                   clib_net_to_host_u32 (ip->
3403                                                         src_address.as_u32),
3404                                   clib_net_to_host_u32 (ip->
3405                                                         dst_address.as_u32));
3406
3407           return next_worker_index;
3408         }
3409     }
3410
3411   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3412     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3413
3414   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
3415     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
3416   else
3417     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
3418
3419   if (PREDICT_TRUE (!is_output))
3420     {
3421       nat_elog_debug_handoff ("HANDOFF IN2OUT",
3422                               next_worker_index, rx_fib_index,
3423                               clib_net_to_host_u32 (ip->src_address.as_u32),
3424                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3425     }
3426   else
3427     {
3428       nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE",
3429                               next_worker_index, rx_fib_index,
3430                               clib_net_to_host_u32 (ip->src_address.as_u32),
3431                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3432     }
3433
3434   return next_worker_index;
3435 }
3436
3437 static u32
3438 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
3439                                u32 rx_fib_index, u8 is_output)
3440 {
3441   snat_main_t *sm = &snat_main;
3442   clib_bihash_kv_8_8_t kv, value;
3443   clib_bihash_kv_16_8_t kv16, value16;
3444   snat_main_per_thread_data_t *tsm;
3445
3446   u32 proto, next_worker_index = 0;
3447   udp_header_t *udp;
3448   u16 port;
3449   snat_static_mapping_t *m;
3450   u32 hash;
3451
3452   proto = ip_proto_to_nat_proto (ip->protocol);
3453
3454   if (PREDICT_TRUE (proto == NAT_PROTOCOL_UDP || proto == NAT_PROTOCOL_TCP))
3455     {
3456       udp = ip4_next_header (ip);
3457
3458       init_ed_k (&kv16, ip->dst_address, udp->dst_port, ip->src_address,
3459                  udp->src_port, rx_fib_index, ip->protocol);
3460
3461       if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3462                                                   &kv16, &value16)))
3463         {
3464           tsm =
3465             vec_elt_at_index (sm->per_thread_data,
3466                               ed_value_get_thread_index (&value16));
3467           vnet_buffer2 (b)->nat.ed_out2in_nat_session_index =
3468             ed_value_get_session_index (&value16);
3469           next_worker_index = sm->first_worker_index + tsm->thread_index;
3470           nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
3471                                   next_worker_index, rx_fib_index,
3472                                   clib_net_to_host_u32 (ip->
3473                                                         src_address.as_u32),
3474                                   clib_net_to_host_u32 (ip->
3475                                                         dst_address.as_u32));
3476           return next_worker_index;
3477         }
3478     }
3479   else if (proto == NAT_PROTOCOL_ICMP)
3480     {
3481       if (!get_icmp_o2i_ed_key (b, ip, rx_fib_index, ~0, ~0, 0, 0, 0, &kv16))
3482         {
3483           if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3484                                                       &kv16, &value16)))
3485             {
3486               tsm =
3487                 vec_elt_at_index (sm->per_thread_data,
3488                                   ed_value_get_thread_index (&value16));
3489               next_worker_index = sm->first_worker_index + tsm->thread_index;
3490               nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
3491                                       next_worker_index, rx_fib_index,
3492                                       clib_net_to_host_u32 (ip->
3493                                                             src_address.as_u32),
3494                                       clib_net_to_host_u32 (ip->
3495                                                             dst_address.as_u32));
3496               return next_worker_index;
3497             }
3498         }
3499     }
3500
3501   /* first try static mappings without port */
3502   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3503     {
3504       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
3505       if (!clib_bihash_search_8_8
3506           (&sm->static_mapping_by_external, &kv, &value))
3507         {
3508           m = pool_elt_at_index (sm->static_mappings, value.value);
3509           next_worker_index = m->workers[0];
3510           goto done;
3511         }
3512     }
3513
3514   /* unknown protocol */
3515   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
3516     {
3517       /* use current thread */
3518       next_worker_index = vlib_get_thread_index ();
3519       goto done;
3520     }
3521
3522   udp = ip4_next_header (ip);
3523   port = udp->dst_port;
3524
3525   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
3526     {
3527       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3528       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3529       if (!icmp_type_is_error_message
3530           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3531         port = vnet_buffer (b)->ip.reass.l4_src_port;
3532       else
3533         {
3534           /* if error message, then it's not fragmented and we can access it */
3535           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3536           proto = ip_proto_to_nat_proto (inner_ip->protocol);
3537           void *l4_header = ip4_next_header (inner_ip);
3538           switch (proto)
3539             {
3540             case NAT_PROTOCOL_ICMP:
3541               icmp = (icmp46_header_t *) l4_header;
3542               echo = (icmp_echo_header_t *) (icmp + 1);
3543               port = echo->identifier;
3544               break;
3545             case NAT_PROTOCOL_UDP:
3546             case NAT_PROTOCOL_TCP:
3547               port = ((tcp_udp_header_t *) l4_header)->src_port;
3548               break;
3549             default:
3550               next_worker_index = vlib_get_thread_index ();
3551               goto done;
3552             }
3553         }
3554     }
3555
3556   /* try static mappings with port */
3557   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3558     {
3559       init_nat_k (&kv, ip->dst_address, proto, 0, port);
3560       if (!clib_bihash_search_8_8
3561           (&sm->static_mapping_by_external, &kv, &value))
3562         {
3563           m = pool_elt_at_index (sm->static_mappings, value.value);
3564           if (!is_lb_static_mapping (m))
3565             {
3566               next_worker_index = m->workers[0];
3567               goto done;
3568             }
3569
3570           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3571             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3572
3573           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
3574             next_worker_index =
3575               m->workers[hash & (_vec_len (m->workers) - 1)];
3576           else
3577             next_worker_index = m->workers[hash % _vec_len (m->workers)];
3578           goto done;
3579         }
3580     }
3581
3582   /* worker by outside port */
3583   next_worker_index = sm->first_worker_index;
3584   next_worker_index +=
3585     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3586
3587 done:
3588   nat_elog_debug_handoff ("HANDOFF OUT2IN", next_worker_index, rx_fib_index,
3589                           clib_net_to_host_u32 (ip->src_address.as_u32),
3590                           clib_net_to_host_u32 (ip->dst_address.as_u32));
3591   return next_worker_index;
3592 }
3593
3594 void
3595 nat_ha_sadd_cb (ip4_address_t * in_addr, u16 in_port,
3596                 ip4_address_t * out_addr, u16 out_port,
3597                 ip4_address_t * eh_addr, u16 eh_port,
3598                 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
3599                 u32 fib_index, u16 flags, u32 thread_index)
3600 {
3601   snat_main_t *sm = &snat_main;
3602   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
3603   snat_user_t *u;
3604   snat_session_t *s;
3605   clib_bihash_kv_8_8_t kv;
3606   vlib_main_t *vm = vlib_get_main ();
3607   f64 now = vlib_time_now (vm);
3608   nat_outside_fib_t *outside_fib;
3609   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3610   fib_prefix_t pfx = {
3611     .fp_proto = FIB_PROTOCOL_IP4,
3612     .fp_len = 32,
3613     .fp_addr = {
3614                 .ip4.as_u32 = eh_addr->as_u32,
3615                 },
3616   };
3617
3618   if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
3619     {
3620       if (nat_set_outside_address_and_port
3621           (sm->addresses, thread_index, *out_addr, out_port, proto))
3622         return;
3623     }
3624
3625   u = nat_user_get_or_create (sm, in_addr, fib_index, thread_index);
3626   if (!u)
3627     return;
3628
3629   s = nat_session_alloc_or_recycle (sm, u, thread_index, now);
3630   if (!s)
3631     return;
3632
3633   if (sm->endpoint_dependent)
3634     {
3635       nat_ed_lru_insert (tsm, s, now, nat_proto_to_ip_proto (proto));
3636     }
3637
3638   s->out2in.addr.as_u32 = out_addr->as_u32;
3639   s->out2in.port = out_port;
3640   s->nat_proto = proto;
3641   s->last_heard = now;
3642   s->flags = flags;
3643   s->ext_host_addr.as_u32 = eh_addr->as_u32;
3644   s->ext_host_port = eh_port;
3645   user_session_increment (sm, u, snat_is_session_static (s));
3646   switch (vec_len (sm->outside_fibs))
3647     {
3648     case 0:
3649       s->out2in.fib_index = sm->outside_fib_index;
3650       break;
3651     case 1:
3652       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
3653       break;
3654     default:
3655       /* *INDENT-OFF* */
3656       vec_foreach (outside_fib, sm->outside_fibs)
3657         {
3658           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
3659           if (FIB_NODE_INDEX_INVALID != fei)
3660             {
3661               if (fib_entry_get_resolving_interface (fei) != ~0)
3662                 {
3663                   s->out2in.fib_index = outside_fib->fib_index;
3664                   break;
3665                 }
3666             }
3667         }
3668       /* *INDENT-ON* */
3669       break;
3670     }
3671   init_nat_o2i_kv (&kv, s, s - tsm->sessions);
3672   if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 1))
3673     nat_elog_warn ("out2in key add failed");
3674
3675   s->in2out.addr.as_u32 = in_addr->as_u32;
3676   s->in2out.port = in_port;
3677   s->in2out.fib_index = fib_index;
3678   init_nat_i2o_kv (&kv, s, s - tsm->sessions);
3679   if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 1))
3680     nat_elog_warn ("in2out key add failed");
3681 }
3682
3683 void
3684 nat_ha_sdel_cb (ip4_address_t * out_addr, u16 out_port,
3685                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
3686                 u32 ti)
3687 {
3688   snat_main_t *sm = &snat_main;
3689   clib_bihash_kv_8_8_t kv, value;
3690   u32 thread_index;
3691   snat_session_t *s;
3692   snat_main_per_thread_data_t *tsm;
3693
3694   if (sm->num_workers > 1)
3695     thread_index =
3696       sm->first_worker_index +
3697       (sm->workers[(clib_net_to_host_u16 (out_port) -
3698                     1024) / sm->port_per_thread]);
3699   else
3700     thread_index = sm->num_workers;
3701   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
3702
3703   init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
3704   if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
3705     return;
3706
3707   s = pool_elt_at_index (tsm->sessions, value.value);
3708   nat_free_session_data (sm, s, thread_index, 1);
3709   nat44_delete_session (sm, s, thread_index);
3710 }
3711
3712 void
3713 nat_ha_sref_cb (ip4_address_t * out_addr, u16 out_port,
3714                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
3715                 u32 total_pkts, u64 total_bytes, u32 thread_index)
3716 {
3717   snat_main_t *sm = &snat_main;
3718   clib_bihash_kv_8_8_t kv, value;
3719   snat_session_t *s;
3720   snat_main_per_thread_data_t *tsm;
3721
3722   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
3723
3724   init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
3725   if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
3726     return;
3727
3728   s = pool_elt_at_index (tsm->sessions, value.value);
3729   s->total_pkts = total_pkts;
3730   s->total_bytes = total_bytes;
3731 }
3732
3733 void
3734 nat_ha_sadd_ed_cb (ip4_address_t * in_addr, u16 in_port,
3735                    ip4_address_t * out_addr, u16 out_port,
3736                    ip4_address_t * eh_addr, u16 eh_port,
3737                    ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
3738                    u32 fib_index, u16 flags, u32 thread_index)
3739 {
3740   snat_main_t *sm = &snat_main;
3741   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
3742   snat_session_t *s;
3743   clib_bihash_kv_16_8_t kv;
3744   vlib_main_t *vm = vlib_get_main ();
3745   f64 now = vlib_time_now (vm);
3746   nat_outside_fib_t *outside_fib;
3747   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3748   fib_prefix_t pfx = {
3749     .fp_proto = FIB_PROTOCOL_IP4,
3750     .fp_len = 32,
3751     .fp_addr = {
3752                 .ip4.as_u32 = eh_addr->as_u32,
3753                 },
3754   };
3755
3756
3757   if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
3758     {
3759       if (nat_set_outside_address_and_port
3760           (sm->addresses, thread_index, *out_addr, out_port, proto))
3761         return;
3762     }
3763
3764   if (flags & SNAT_SESSION_FLAG_TWICE_NAT)
3765     {
3766       if (nat_set_outside_address_and_port
3767           (sm->addresses, thread_index, *ehn_addr, ehn_port, proto))
3768         return;
3769     }
3770
3771   s = nat_ed_session_alloc (sm, thread_index, now, proto);
3772   if (!s)
3773     return;
3774
3775   s->last_heard = now;
3776   s->flags = flags;
3777   s->ext_host_nat_addr.as_u32 = s->ext_host_addr.as_u32 = eh_addr->as_u32;
3778   s->ext_host_nat_port = s->ext_host_port = eh_port;
3779   if (is_twice_nat_session (s))
3780     {
3781       s->ext_host_nat_addr.as_u32 = ehn_addr->as_u32;
3782       s->ext_host_nat_port = ehn_port;
3783     }
3784   switch (vec_len (sm->outside_fibs))
3785     {
3786     case 0:
3787       s->out2in.fib_index = sm->outside_fib_index;
3788       break;
3789     case 1:
3790       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
3791       break;
3792     default:
3793       /* *INDENT-OFF* */
3794       vec_foreach (outside_fib, sm->outside_fibs)
3795         {
3796           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
3797           if (FIB_NODE_INDEX_INVALID != fei)
3798             {
3799               if (fib_entry_get_resolving_interface (fei) != ~0)
3800                 {
3801                   s->out2in.fib_index = outside_fib->fib_index;
3802                   break;
3803                 }
3804             }
3805         }
3806       /* *INDENT-ON* */
3807       break;
3808     }
3809   s->nat_proto = proto;
3810   s->out2in.addr.as_u32 = out_addr->as_u32;
3811   s->out2in.port = out_port;
3812
3813   s->in2out.addr.as_u32 = in_addr->as_u32;
3814   s->in2out.port = in_port;
3815   s->in2out.fib_index = fib_index;
3816
3817   init_ed_kv (&kv, *in_addr, in_port, s->ext_host_nat_addr,
3818               s->ext_host_nat_port, fib_index, nat_proto_to_ip_proto (proto),
3819               thread_index, s - tsm->sessions);
3820   if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
3821     nat_elog_warn ("in2out key add failed");
3822
3823   init_ed_kv (&kv, *out_addr, out_port, *eh_addr, eh_port,
3824               s->out2in.fib_index, nat_proto_to_ip_proto (proto),
3825               thread_index, s - tsm->sessions);
3826   if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &kv, 1))
3827     nat_elog_warn ("out2in key add failed");
3828 }
3829
3830 void
3831 nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
3832                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
3833                    u32 fib_index, u32 ti)
3834 {
3835   snat_main_t *sm = &snat_main;
3836   clib_bihash_kv_16_8_t kv, value;
3837   u32 thread_index;
3838   snat_session_t *s;
3839   snat_main_per_thread_data_t *tsm;
3840
3841   if (sm->num_workers > 1)
3842     thread_index =
3843       sm->first_worker_index +
3844       (sm->workers[(clib_net_to_host_u16 (out_port) -
3845                     1024) / sm->port_per_thread]);
3846   else
3847     thread_index = sm->num_workers;
3848   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
3849
3850   init_ed_k (&kv, *out_addr, out_port, *eh_addr, eh_port, fib_index, proto);
3851   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
3852     return;
3853
3854   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3855   nat_free_session_data (sm, s, thread_index, 1);
3856   nat44_delete_session (sm, s, thread_index);
3857 }
3858
3859 void
3860 nat_ha_sref_ed_cb (ip4_address_t * out_addr, u16 out_port,
3861                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
3862                    u32 fib_index, u32 total_pkts, u64 total_bytes,
3863                    u32 thread_index)
3864 {
3865   snat_main_t *sm = &snat_main;
3866   clib_bihash_kv_16_8_t kv, value;
3867   snat_session_t *s;
3868   snat_main_per_thread_data_t *tsm;
3869
3870   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
3871
3872   init_ed_k (&kv, *out_addr, out_port, *eh_addr, eh_port, fib_index, proto);
3873   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
3874     return;
3875
3876   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3877   s->total_pkts = total_pkts;
3878   s->total_bytes = total_bytes;
3879 }
3880
3881 static u32
3882 nat_calc_bihash_buckets (u32 n_elts)
3883 {
3884   return 1 << (max_log2 (n_elts >> 1) + 1);
3885 }
3886
3887 static u32
3888 nat_calc_bihash_memory (u32 n_buckets, uword kv_size)
3889 {
3890   return n_buckets * (8 + kv_size * 4);
3891 }
3892
3893 u32
3894 nat44_get_max_session_limit ()
3895 {
3896   snat_main_t *sm = &snat_main;
3897   u32 max_limit = 0, len = 0;
3898
3899   for (; len < vec_len (sm->max_translations_per_fib); len++)
3900     {
3901       if (max_limit < sm->max_translations_per_fib[len])
3902         max_limit = sm->max_translations_per_fib[len];
3903     }
3904   return max_limit;
3905 }
3906
3907 int
3908 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
3909 {
3910   snat_main_t *sm = &snat_main;
3911   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
3912   u32 len = vec_len (sm->max_translations_per_fib);
3913
3914   if (len <= fib_index)
3915     {
3916       vec_validate (sm->max_translations_per_fib, fib_index + 1);
3917
3918       for (; len < vec_len (sm->max_translations_per_fib); len++)
3919         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
3920     }
3921
3922   sm->max_translations_per_fib[fib_index] = session_limit;
3923   return 0;
3924 }
3925
3926 int
3927 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
3928 {
3929   snat_main_t *sm = &snat_main;
3930
3931   if (nat44_set_session_limit (session_limit, vrf_id))
3932     return 1;
3933   sm->max_translations_per_thread = nat44_get_max_session_limit ();
3934
3935   sm->translation_buckets =
3936     nat_calc_bihash_buckets (sm->max_translations_per_thread);
3937
3938   if (!sm->translation_memory_size_set)
3939     {
3940       sm->translation_memory_size =
3941         nat_calc_bihash_memory (sm->translation_buckets,
3942                                 sizeof (clib_bihash_16_8_t));
3943     }
3944
3945   nat44_sessions_clear ();
3946   return 0;
3947 }
3948
3949 void
3950 nat44_db_init (snat_main_per_thread_data_t * tsm)
3951 {
3952   snat_main_t *sm = &snat_main;
3953
3954   pool_alloc (tsm->sessions, sm->max_translations_per_thread);
3955   pool_alloc (tsm->lru_pool, sm->max_translations_per_thread);
3956
3957   dlist_elt_t *head;
3958
3959   pool_get (tsm->lru_pool, head);
3960   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
3961   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
3962
3963   pool_get (tsm->lru_pool, head);
3964   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
3965   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
3966
3967   pool_get (tsm->lru_pool, head);
3968   tsm->udp_lru_head_index = head - tsm->lru_pool;
3969   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
3970
3971   pool_get (tsm->lru_pool, head);
3972   tsm->icmp_lru_head_index = head - tsm->lru_pool;
3973   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
3974
3975   pool_get (tsm->lru_pool, head);
3976   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
3977   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
3978
3979   if (sm->endpoint_dependent)
3980     {
3981       clib_bihash_init_16_8 (&tsm->in2out_ed, "in2out-ed",
3982                              sm->translation_buckets,
3983                              sm->translation_memory_size);
3984       clib_bihash_set_kvp_format_fn_16_8 (&tsm->in2out_ed,
3985                                           format_ed_session_kvp);
3986
3987     }
3988   else
3989     {
3990       clib_bihash_init_8_8 (&tsm->in2out, "in2out",
3991                             sm->translation_buckets,
3992                             sm->translation_memory_size);
3993       clib_bihash_set_kvp_format_fn_8_8 (&tsm->in2out, format_session_kvp);
3994       clib_bihash_init_8_8 (&tsm->out2in, "out2in",
3995                             sm->translation_buckets,
3996                             sm->translation_memory_size);
3997       clib_bihash_set_kvp_format_fn_8_8 (&tsm->out2in, format_session_kvp);
3998     }
3999
4000   // TODO: resolve static mappings (put only to !ED)
4001   pool_alloc (tsm->list_pool, sm->max_translations_per_thread);
4002   clib_bihash_init_8_8 (&tsm->user_hash, "users", sm->user_buckets,
4003                         sm->user_memory_size);
4004   clib_bihash_set_kvp_format_fn_8_8 (&tsm->user_hash, format_user_kvp);
4005 }
4006
4007 void
4008 nat44_db_free (snat_main_per_thread_data_t * tsm)
4009 {
4010   snat_main_t *sm = &snat_main;
4011
4012   pool_free (tsm->sessions);
4013   pool_free (tsm->lru_pool);
4014
4015   if (sm->endpoint_dependent)
4016     {
4017       clib_bihash_free_16_8 (&tsm->in2out_ed);
4018       vec_free (tsm->per_vrf_sessions_vec);
4019     }
4020   else
4021     {
4022       clib_bihash_free_8_8 (&tsm->in2out);
4023       clib_bihash_free_8_8 (&tsm->out2in);
4024     }
4025
4026   // TODO: resolve static mappings (put only to !ED)
4027   pool_free (tsm->users);
4028   pool_free (tsm->list_pool);
4029   clib_bihash_free_8_8 (&tsm->user_hash);
4030 }
4031
4032 void
4033 nat44_sessions_clear ()
4034 {
4035   snat_main_t *sm = &snat_main;
4036   snat_main_per_thread_data_t *tsm;
4037
4038   if (sm->endpoint_dependent)
4039     {
4040       clib_bihash_free_16_8 (&sm->out2in_ed);
4041       clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
4042                              clib_max (1, sm->num_workers) *
4043                              sm->translation_buckets,
4044                              clib_max (1, sm->num_workers) *
4045                              sm->translation_memory_size);
4046       clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed,
4047                                           format_ed_session_kvp);
4048     }
4049
4050   /* *INDENT-OFF* */
4051   vec_foreach (tsm, sm->per_thread_data)
4052     {
4053       u32 ti;
4054
4055       nat44_db_free (tsm);
4056       nat44_db_init (tsm);
4057
4058       ti = tsm->snat_thread_index;
4059       vlib_set_simple_counter (&sm->total_users, ti, 0, 0);
4060       vlib_set_simple_counter (&sm->total_sessions, ti, 0, 0);
4061     }
4062   /* *INDENT-ON* */
4063 }
4064
4065 static clib_error_t *
4066 snat_config (vlib_main_t * vm, unformat_input_t * input)
4067 {
4068   snat_main_t *sm = &snat_main;
4069   snat_main_per_thread_data_t *tsm;
4070
4071   u32 static_mapping_buckets = 1024;
4072   uword static_mapping_memory_size = 64 << 20;
4073
4074   u32 nat64_bib_buckets = 1024;
4075   u32 nat64_bib_memory_size = 128 << 20;
4076
4077   u32 nat64_st_buckets = 2048;
4078   uword nat64_st_memory_size = 256 << 20;
4079
4080   u32 max_users_per_thread = 0;
4081   u32 user_memory_size = 0;
4082   u32 max_translations_per_thread = 0;
4083   u32 translation_memory_size = 0;
4084
4085   u32 max_translations_per_user = ~0;
4086
4087   u32 outside_vrf_id = 0;
4088   u32 outside_ip6_vrf_id = 0;
4089   u32 inside_vrf_id = 0;
4090   u8 static_mapping_only = 0;
4091   u8 static_mapping_connection_tracking = 0;
4092
4093   u32 udp_timeout = SNAT_UDP_TIMEOUT;
4094   u32 icmp_timeout = SNAT_ICMP_TIMEOUT;
4095   u32 tcp_transitory_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
4096   u32 tcp_established_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
4097
4098   sm->out2in_dpo = 0;
4099   sm->endpoint_dependent = 0;
4100
4101   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
4102     {
4103       if (unformat
4104           (input, "max translations per thread %d",
4105            &max_translations_per_thread))
4106         ;
4107       else if (unformat (input, "udp timeout %d", &udp_timeout))
4108         ;
4109       else if (unformat (input, "icmp timeout %d", &icmp_timeout))
4110         ;
4111       else if (unformat (input, "tcp transitory timeout %d",
4112                          &tcp_transitory_timeout));
4113       else if (unformat (input, "tcp established timeout %d",
4114                          &tcp_established_timeout));
4115       else if (unformat (input, "translation hash memory %d",
4116                          &translation_memory_size));
4117       else
4118         if (unformat
4119             (input, "max users per thread %d", &max_users_per_thread))
4120         ;
4121       else if (unformat (input, "user hash memory %d", &user_memory_size))
4122         ;
4123       else if (unformat (input, "max translations per user %d",
4124                          &max_translations_per_user))
4125         ;
4126       else if (unformat (input, "outside VRF id %d", &outside_vrf_id))
4127         ;
4128       else if (unformat (input, "outside ip6 VRF id %d", &outside_ip6_vrf_id))
4129         ;
4130       else if (unformat (input, "inside VRF id %d", &inside_vrf_id))
4131         ;
4132       else if (unformat (input, "static mapping only"))
4133         {
4134           static_mapping_only = 1;
4135           if (unformat (input, "connection tracking"))
4136             static_mapping_connection_tracking = 1;
4137         }
4138       else if (unformat (input, "nat64 bib hash buckets %d",
4139                          &nat64_bib_buckets))
4140         ;
4141       else if (unformat (input, "nat64 bib hash memory %d",
4142                          &nat64_bib_memory_size))
4143         ;
4144       else
4145         if (unformat (input, "nat64 st hash buckets %d", &nat64_st_buckets))
4146         ;
4147       else if (unformat (input, "nat64 st hash memory %d",
4148                          &nat64_st_memory_size))
4149         ;
4150       else if (unformat (input, "out2in dpo"))
4151         sm->out2in_dpo = 1;
4152       else if (unformat (input, "endpoint-dependent"))
4153         sm->endpoint_dependent = 1;
4154       else
4155         return clib_error_return (0, "unknown input '%U'",
4156                                   format_unformat_error, input);
4157     }
4158
4159   if (static_mapping_only && (sm->endpoint_dependent))
4160     return clib_error_return (0,
4161                               "static mapping only mode available only for simple nat");
4162
4163   if (sm->out2in_dpo && (sm->endpoint_dependent))
4164     return clib_error_return (0,
4165                               "out2in dpo mode available only for simple nat");
4166   if (sm->endpoint_dependent && max_users_per_thread > 0)
4167     {
4168       return clib_error_return (0,
4169                                 "setting 'max users' in endpoint-dependent mode is not supported");
4170     }
4171
4172   if (sm->endpoint_dependent && max_translations_per_user != ~0)
4173     {
4174       return clib_error_return (0,
4175                                 "setting 'max translations per user' in endpoint-dependent mode is not supported");
4176     }
4177
4178   /* optionally configurable timeouts for testing purposes */
4179   sm->udp_timeout = udp_timeout;
4180   sm->tcp_transitory_timeout = tcp_transitory_timeout;
4181   sm->tcp_established_timeout = tcp_established_timeout;
4182   sm->icmp_timeout = icmp_timeout;
4183
4184   if (0 == max_users_per_thread)
4185     {
4186       max_users_per_thread = 1024;
4187     }
4188   sm->max_users_per_thread = max_users_per_thread;
4189   sm->user_buckets = nat_calc_bihash_buckets (sm->max_users_per_thread);
4190
4191   if (0 == max_translations_per_thread)
4192     {
4193       // default value based on legacy setting of load factor 10 * default
4194       // translation buckets 1024
4195       max_translations_per_thread = 10 * 1024;
4196     }
4197   sm->translation_memory_size_set = translation_memory_size != 0;
4198
4199   sm->max_translations_per_thread = max_translations_per_thread;
4200   sm->translation_buckets =
4201     nat_calc_bihash_buckets (sm->max_translations_per_thread);
4202   if (0 == translation_memory_size)
4203     {
4204       translation_memory_size =
4205         nat_calc_bihash_memory (sm->translation_buckets,
4206                                 sizeof (clib_bihash_16_8_t));
4207     }
4208   sm->translation_memory_size = translation_memory_size;
4209   if (0 == user_memory_size)
4210     {
4211       user_memory_size =
4212         nat_calc_bihash_memory (sm->max_users_per_thread,
4213                                 sizeof (clib_bihash_8_8_t));
4214     }
4215   sm->user_memory_size = user_memory_size;
4216   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
4217
4218   sm->max_translations_per_user = max_translations_per_user == ~0 ?
4219     sm->max_translations_per_thread : max_translations_per_user;
4220
4221   sm->outside_vrf_id = outside_vrf_id;
4222   sm->outside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
4223                                                              outside_vrf_id,
4224                                                              nat_fib_src_hi);
4225   sm->inside_vrf_id = inside_vrf_id;
4226   sm->inside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
4227                                                             inside_vrf_id,
4228                                                             nat_fib_src_hi);
4229   sm->static_mapping_only = static_mapping_only;
4230   sm->static_mapping_connection_tracking = static_mapping_connection_tracking;
4231
4232   nat64_set_hash (nat64_bib_buckets, nat64_bib_memory_size, nat64_st_buckets,
4233                   nat64_st_memory_size);
4234
4235   if (sm->endpoint_dependent)
4236     {
4237       sm->worker_in2out_cb = nat44_ed_get_worker_in2out_cb;
4238       sm->worker_out2in_cb = nat44_ed_get_worker_out2in_cb;
4239
4240       sm->in2out_node_index = nat44_ed_in2out_node.index;
4241       sm->in2out_output_node_index = nat44_ed_in2out_output_node.index;
4242       sm->out2in_node_index = nat44_ed_out2in_node.index;
4243
4244       sm->icmp_match_in2out_cb = icmp_match_in2out_ed;
4245       sm->icmp_match_out2in_cb = icmp_match_out2in_ed;
4246       nat_affinity_init (vm);
4247       nat_ha_init (vm, nat_ha_sadd_ed_cb, nat_ha_sdel_ed_cb,
4248                    nat_ha_sref_ed_cb);
4249       clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
4250                              clib_max (1, sm->num_workers) *
4251                              sm->translation_buckets,
4252                              clib_max (1, sm->num_workers) *
4253                              sm->translation_memory_size);
4254       clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed,
4255                                           format_ed_session_kvp);
4256     }
4257   else
4258     {
4259       sm->worker_in2out_cb = snat_get_worker_in2out_cb;
4260       sm->worker_out2in_cb = snat_get_worker_out2in_cb;
4261
4262       sm->in2out_node_index = snat_in2out_node.index;
4263       sm->in2out_output_node_index = snat_in2out_output_node.index;
4264       sm->out2in_node_index = snat_out2in_node.index;
4265
4266       sm->icmp_match_in2out_cb = icmp_match_in2out_slow;
4267       sm->icmp_match_out2in_cb = icmp_match_out2in_slow;
4268       nat_ha_init (vm, nat_ha_sadd_cb, nat_ha_sdel_cb, nat_ha_sref_cb);
4269     }
4270   if (!static_mapping_only ||
4271       (static_mapping_only && static_mapping_connection_tracking))
4272     {
4273           /* *INDENT-OFF* */
4274           vec_foreach (tsm, sm->per_thread_data)
4275             {
4276               nat44_db_init (tsm);
4277             }
4278           /* *INDENT-ON* */
4279     }
4280   else
4281     {
4282       sm->icmp_match_in2out_cb = icmp_match_in2out_fast;
4283       sm->icmp_match_out2in_cb = icmp_match_out2in_fast;
4284     }
4285   clib_bihash_init_8_8 (&sm->static_mapping_by_local,
4286                         "static_mapping_by_local", static_mapping_buckets,
4287                         static_mapping_memory_size);
4288   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
4289                                      format_static_mapping_kvp);
4290
4291   clib_bihash_init_8_8 (&sm->static_mapping_by_external,
4292                         "static_mapping_by_external",
4293                         static_mapping_buckets, static_mapping_memory_size);
4294   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
4295                                      format_static_mapping_kvp);
4296
4297   return 0;
4298 }
4299
4300 VLIB_CONFIG_FUNCTION (snat_config, "nat");
4301
4302 static void
4303 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
4304                                  uword opaque,
4305                                  u32 sw_if_index,
4306                                  ip4_address_t * address,
4307                                  u32 address_length,
4308                                  u32 if_address_index, u32 is_delete)
4309 {
4310   snat_main_t *sm = &snat_main;
4311   snat_static_map_resolve_t *rp;
4312   snat_static_mapping_t *m;
4313   clib_bihash_kv_8_8_t kv, value;
4314   int i, rv;
4315   ip4_address_t l_addr;
4316
4317   for (i = 0; i < vec_len (sm->to_resolve); i++)
4318     {
4319       rp = sm->to_resolve + i;
4320       if (rp->addr_only == 0)
4321         continue;
4322       if (rp->sw_if_index == sw_if_index)
4323         goto match;
4324     }
4325
4326   return;
4327
4328 match:
4329   init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
4330               sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
4331   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
4332     m = 0;
4333   else
4334     m = pool_elt_at_index (sm->static_mappings, value.value);
4335
4336   if (!is_delete)
4337     {
4338       /* Don't trip over lease renewal, static config */
4339       if (m)
4340         return;
4341     }
4342   else
4343     {
4344       if (!m)
4345         return;
4346     }
4347
4348   /* Indetity mapping? */
4349   if (rp->l_addr.as_u32 == 0)
4350     l_addr.as_u32 = address[0].as_u32;
4351   else
4352     l_addr.as_u32 = rp->l_addr.as_u32;
4353   /* Add the static mapping */
4354   rv = snat_add_static_mapping (l_addr,
4355                                 address[0],
4356                                 rp->l_port,
4357                                 rp->e_port,
4358                                 rp->vrf_id,
4359                                 rp->addr_only, ~0 /* sw_if_index */ ,
4360                                 rp->proto, !is_delete, rp->twice_nat,
4361                                 rp->out2in_only, rp->tag, rp->identity_nat);
4362   if (rv)
4363     nat_elog_notice_X1 ("snat_add_static_mapping returned %d", "i4", rv);
4364 }
4365
4366 static void
4367 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
4368                                        uword opaque,
4369                                        u32 sw_if_index,
4370                                        ip4_address_t * address,
4371                                        u32 address_length,
4372                                        u32 if_address_index, u32 is_delete)
4373 {
4374   snat_main_t *sm = &snat_main;
4375   snat_static_map_resolve_t *rp;
4376   ip4_address_t l_addr;
4377   int i, j;
4378   int rv;
4379   u8 twice_nat = 0;
4380   snat_address_t *addresses = sm->addresses;
4381
4382   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
4383     {
4384       if (sw_if_index == sm->auto_add_sw_if_indices[i])
4385         goto match;
4386     }
4387
4388   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
4389     {
4390       twice_nat = 1;
4391       addresses = sm->twice_nat_addresses;
4392       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
4393         goto match;
4394     }
4395
4396   return;
4397
4398 match:
4399   if (!is_delete)
4400     {
4401       /* Don't trip over lease renewal, static config */
4402       for (j = 0; j < vec_len (addresses); j++)
4403         if (addresses[j].addr.as_u32 == address->as_u32)
4404           return;
4405
4406       (void) snat_add_address (sm, address, ~0, twice_nat);
4407       /* Scan static map resolution vector */
4408       for (j = 0; j < vec_len (sm->to_resolve); j++)
4409         {
4410           rp = sm->to_resolve + j;
4411           if (rp->addr_only)
4412             continue;
4413           /* On this interface? */
4414           if (rp->sw_if_index == sw_if_index)
4415             {
4416               /* Indetity mapping? */
4417               if (rp->l_addr.as_u32 == 0)
4418                 l_addr.as_u32 = address[0].as_u32;
4419               else
4420                 l_addr.as_u32 = rp->l_addr.as_u32;
4421               /* Add the static mapping */
4422               rv = snat_add_static_mapping (l_addr,
4423                                             address[0],
4424                                             rp->l_port,
4425                                             rp->e_port,
4426                                             rp->vrf_id,
4427                                             rp->addr_only,
4428                                             ~0 /* sw_if_index */ ,
4429                                             rp->proto,
4430                                             rp->is_add, rp->twice_nat,
4431                                             rp->out2in_only, rp->tag,
4432                                             rp->identity_nat);
4433               if (rv)
4434                 nat_elog_notice_X1 ("snat_add_static_mapping returned %d",
4435                                     "i4", rv);
4436             }
4437         }
4438       return;
4439     }
4440   else
4441     {
4442       (void) snat_del_address (sm, address[0], 1, twice_nat);
4443       return;
4444     }
4445 }
4446
4447
4448 int
4449 snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
4450                             u8 twice_nat)
4451 {
4452   ip4_main_t *ip4_main = sm->ip4_main;
4453   ip4_address_t *first_int_addr;
4454   snat_static_map_resolve_t *rp;
4455   u32 *indices_to_delete = 0;
4456   int i, j;
4457   u32 *auto_add_sw_if_indices =
4458     twice_nat ? sm->
4459     auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
4460
4461   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0        /* just want the address */
4462     );
4463
4464   for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
4465     {
4466       if (auto_add_sw_if_indices[i] == sw_if_index)
4467         {
4468           if (is_del)
4469             {
4470               /* if have address remove it */
4471               if (first_int_addr)
4472                 (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
4473               else
4474                 {
4475                   for (j = 0; j < vec_len (sm->to_resolve); j++)
4476                     {
4477                       rp = sm->to_resolve + j;
4478                       if (rp->sw_if_index == sw_if_index)
4479                         vec_add1 (indices_to_delete, j);
4480                     }
4481                   if (vec_len (indices_to_delete))
4482                     {
4483                       for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
4484                         vec_del1 (sm->to_resolve, j);
4485                       vec_free (indices_to_delete);
4486                     }
4487                 }
4488               if (twice_nat)
4489                 vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
4490               else
4491                 vec_del1 (sm->auto_add_sw_if_indices, i);
4492             }
4493           else
4494             return VNET_API_ERROR_VALUE_EXIST;
4495
4496           return 0;
4497         }
4498     }
4499
4500   if (is_del)
4501     return VNET_API_ERROR_NO_SUCH_ENTRY;
4502
4503   /* add to the auto-address list */
4504   if (twice_nat)
4505     vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
4506   else
4507     vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
4508
4509   /* If the address is already bound - or static - add it now */
4510   if (first_int_addr)
4511     (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
4512
4513   return 0;
4514 }
4515
4516 int
4517 nat44_del_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
4518                    nat_protocol_t proto, u32 vrf_id, int is_in)
4519 {
4520   snat_main_per_thread_data_t *tsm;
4521   clib_bihash_kv_8_8_t kv, value;
4522   ip4_header_t ip;
4523   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4524   snat_session_t *s;
4525   clib_bihash_8_8_t *t;
4526
4527   if (sm->endpoint_dependent)
4528     return VNET_API_ERROR_UNSUPPORTED;
4529
4530   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
4531   if (sm->num_workers > 1)
4532     tsm =
4533       vec_elt_at_index (sm->per_thread_data,
4534                         sm->worker_in2out_cb (&ip, fib_index, 0));
4535   else
4536     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
4537
4538   init_nat_k (&kv, *addr, port, fib_index, proto);
4539   t = is_in ? &tsm->in2out : &tsm->out2in;
4540   if (!clib_bihash_search_8_8 (t, &kv, &value))
4541     {
4542       if (pool_is_free_index (tsm->sessions, value.value))
4543         return VNET_API_ERROR_UNSPECIFIED;
4544
4545       s = pool_elt_at_index (tsm->sessions, value.value);
4546       nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
4547       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
4548       return 0;
4549     }
4550
4551   return VNET_API_ERROR_NO_SUCH_ENTRY;
4552 }
4553
4554 int
4555 nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
4556                       ip4_address_t * eh_addr, u16 eh_port, u8 proto,
4557                       u32 vrf_id, int is_in)
4558 {
4559   ip4_header_t ip;
4560   clib_bihash_16_8_t *t;
4561   clib_bihash_kv_16_8_t kv, value;
4562   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4563   snat_session_t *s;
4564   snat_main_per_thread_data_t *tsm;
4565
4566   if (!sm->endpoint_dependent)
4567     return VNET_API_ERROR_FEATURE_DISABLED;
4568
4569   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
4570   if (sm->num_workers > 1)
4571     tsm =
4572       vec_elt_at_index (sm->per_thread_data,
4573                         sm->worker_in2out_cb (&ip, fib_index, 0));
4574   else
4575     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
4576
4577   t = is_in ? &tsm->in2out_ed : &sm->out2in_ed;
4578   init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
4579   if (clib_bihash_search_16_8 (t, &kv, &value))
4580     {
4581       return VNET_API_ERROR_NO_SUCH_ENTRY;
4582     }
4583
4584   if (pool_is_free_index (tsm->sessions, value.value))
4585     return VNET_API_ERROR_UNSPECIFIED;
4586   s = pool_elt_at_index (tsm->sessions, value.value);
4587   nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
4588   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
4589   return 0;
4590 }
4591
4592 void
4593 nat_set_alloc_addr_and_port_mape (u16 psid, u16 psid_offset, u16 psid_length)
4594 {
4595   snat_main_t *sm = &snat_main;
4596
4597   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_MAPE;
4598   sm->alloc_addr_and_port = nat_alloc_addr_and_port_mape;
4599   sm->psid = psid;
4600   sm->psid_offset = psid_offset;
4601   sm->psid_length = psid_length;
4602 }
4603
4604 void
4605 nat_set_alloc_addr_and_port_range (u16 start_port, u16 end_port)
4606 {
4607   snat_main_t *sm = &snat_main;
4608
4609   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_RANGE;
4610   sm->alloc_addr_and_port = nat_alloc_addr_and_port_range;
4611   sm->start_port = start_port;
4612   sm->end_port = end_port;
4613 }
4614
4615 void
4616 nat_set_alloc_addr_and_port_default (void)
4617 {
4618   snat_main_t *sm = &snat_main;
4619
4620   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
4621   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
4622 }
4623
4624 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
4625                                  vlib_node_runtime_t * node,
4626                                  vlib_frame_t * frame)
4627 {
4628   return 0;
4629 }
4630
4631 /* *INDENT-OFF* */
4632 VLIB_REGISTER_NODE (nat_default_node) = {
4633   .name = "nat-default",
4634   .vector_size = sizeof (u32),
4635   .format_trace = 0,
4636   .type = VLIB_NODE_TYPE_INTERNAL,
4637   .n_errors = 0,
4638   .n_next_nodes = NAT_N_NEXT,
4639   .next_nodes = {
4640     [NAT_NEXT_DROP] = "error-drop",
4641     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4642     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
4643     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
4644     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
4645     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
4646     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
4647     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
4648     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
4649   },
4650 };
4651 /* *INDENT-ON* */
4652
4653 /*
4654  * fd.io coding-style-patch-verification: ON
4655  *
4656  * Local Variables:
4657  * eval: (c-set-style "gnu")
4658  * End:
4659  */