66a5243af1c35bb46114d8066c92e645bea9088c
[vpp.git] / src / plugins / nat / nat.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/vnet.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/ip/ip4.h>
21 #include <vnet/plugin/plugin.h>
22 #include <nat/nat.h>
23 #include <nat/nat_dpo.h>
24 #include <nat/nat_ipfix_logging.h>
25 #include <nat/nat64.h>
26 #include <nat/nat_inlines.h>
27 #include <nat/nat44/inlines.h>
28 #include <nat/nat_affinity.h>
29 #include <nat/nat_syslog.h>
30 #include <nat/nat_ha.h>
31 #include <vnet/fib/fib_table.h>
32 #include <vnet/fib/ip4_fib.h>
33 #include <vnet/ip/reass/ip4_sv_reass.h>
34 #include <vppinfra/bihash_16_8.h>
35 #include <nat/nat44/ed_inlines.h>
36
37 #include <vpp/app/version.h>
38
39 snat_main_t snat_main;
40
41 fib_source_t nat_fib_src_hi;
42 fib_source_t nat_fib_src_low;
43
44 /* *INDENT-OFF* */
45 /* Hook up input features */
46 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
47   .arc_name = "ip4-unicast",
48   .node_name = "nat-pre-in2out",
49   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
50                                "ip4-sv-reassembly-feature"),
51 };
52 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
53   .arc_name = "ip4-unicast",
54   .node_name = "nat-pre-out2in",
55   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
56                                "ip4-dhcp-client-detect",
57                                "ip4-sv-reassembly-feature"),
58 };
59 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
60   .arc_name = "ip4-unicast",
61   .node_name = "nat44-in2out-worker-handoff",
62   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
63 };
64 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
65   .arc_name = "ip4-unicast",
66   .node_name = "nat44-out2in-worker-handoff",
67   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
68                                "ip4-dhcp-client-detect"),
69 };
70 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
71   .arc_name = "ip4-unicast",
72   .node_name = "nat44-in2out",
73   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
74 };
75 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
76   .arc_name = "ip4-unicast",
77   .node_name = "nat44-out2in",
78   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
79                                "ip4-dhcp-client-detect"),
80 };
81 VNET_FEATURE_INIT (ip4_nat_classify, static) = {
82   .arc_name = "ip4-unicast",
83   .node_name = "nat44-classify",
84   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
85 };
86 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
87   .arc_name = "ip4-unicast",
88   .node_name = "nat44-ed-in2out",
89   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
90 };
91 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
92   .arc_name = "ip4-unicast",
93   .node_name = "nat44-ed-out2in",
94   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
95                                "ip4-dhcp-client-detect"),
96 };
97 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
98   .arc_name = "ip4-unicast",
99   .node_name = "nat44-ed-classify",
100   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
101 };
102 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
103   .arc_name = "ip4-unicast",
104   .node_name = "nat44-handoff-classify",
105   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
106 };
107 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
108   .arc_name = "ip4-unicast",
109   .node_name = "nat44-in2out-fast",
110   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
111 };
112 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
113   .arc_name = "ip4-unicast",
114   .node_name = "nat44-out2in-fast",
115   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
116                                "ip4-dhcp-client-detect"),
117 };
118 VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = {
119   .arc_name = "ip4-unicast",
120   .node_name = "nat44-hairpin-dst",
121   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
122 };
123 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_dst, static) = {
124   .arc_name = "ip4-unicast",
125   .node_name = "nat44-ed-hairpin-dst",
126   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
127 };
128
129 /* Hook up output features */
130 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
131   .arc_name = "ip4-output",
132   .node_name = "nat44-in2out-output",
133   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
134 };
135 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
136   .arc_name = "ip4-output",
137   .node_name = "nat44-in2out-output-worker-handoff",
138   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
139 };
140 VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = {
141   .arc_name = "ip4-output",
142   .node_name = "nat44-hairpin-src",
143   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
144 };
145 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
146   .arc_name = "ip4-output",
147   .node_name = "nat44-ed-in2out-output",
148   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
149   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
150 };
151 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_src, static) = {
152   .arc_name = "ip4-output",
153   .node_name = "nat44-ed-hairpin-src",
154   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
155   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
156 };
157
158 /* Hook up ip4-local features */
159 VNET_FEATURE_INIT (ip4_nat_hairpinning, static) =
160 {
161   .arc_name = "ip4-local",
162   .node_name = "nat44-hairpinning",
163   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
164 };
165 VNET_FEATURE_INIT (ip4_nat44_ed_hairpinning, static) =
166 {
167   .arc_name = "ip4-local",
168   .node_name = "nat44-ed-hairpinning",
169   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
170 };
171
172
173 VLIB_PLUGIN_REGISTER () = {
174     .version = VPP_BUILD_VER,
175     .description = "Network Address Translation (NAT)",
176 };
177 /* *INDENT-ON* */
178
179 void
180 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
181                        u8 is_ha)
182 {
183   clib_bihash_kv_8_8_t kv;
184   u8 proto;
185   u16 r_port, l_port;
186   ip4_address_t *l_addr, *r_addr;
187   u32 fib_index = 0;
188   clib_bihash_kv_16_8_t ed_kv;
189   snat_main_per_thread_data_t *tsm =
190     vec_elt_at_index (sm->per_thread_data, thread_index);
191
192   if (is_ed_session (s))
193     {
194       per_vrf_sessions_unregister_session (s, thread_index);
195     }
196
197   if (is_fwd_bypass_session (s))
198     {
199       if (snat_is_unk_proto_session (s))
200         {
201           init_ed_k (&ed_kv, s->in2out.addr, 0, s->ext_host_addr, 0, 0,
202                      s->in2out.port);
203         }
204       else
205         {
206           l_port = s->in2out.port;
207           r_port = s->ext_host_port;
208           l_addr = &s->in2out.addr;
209           r_addr = &s->ext_host_addr;
210           proto = nat_proto_to_ip_proto (s->nat_proto);
211           init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index,
212                      proto);
213         }
214       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
215         nat_elog_warn ("in2out_ed key del failed");
216       return;
217     }
218
219   /* session lookup tables */
220   if (is_ed_session (s))
221     {
222       if (is_affinity_sessions (s))
223         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
224                              s->nat_proto, s->out2in.port);
225       l_addr = &s->out2in.addr;
226       r_addr = &s->ext_host_addr;
227       fib_index = s->out2in.fib_index;
228       if (snat_is_unk_proto_session (s))
229         {
230           proto = s->in2out.port;
231           r_port = 0;
232           l_port = 0;
233         }
234       else
235         {
236           proto = nat_proto_to_ip_proto (s->nat_proto);
237           l_port = s->out2in.port;
238           r_port = s->ext_host_port;
239         }
240       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
241       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0))
242         nat_elog_warn ("out2in_ed key del failed");
243       l_addr = &s->in2out.addr;
244       fib_index = s->in2out.fib_index;
245       if (!snat_is_unk_proto_session (s))
246         l_port = s->in2out.port;
247       if (is_twice_nat_session (s))
248         {
249           r_addr = &s->ext_host_nat_addr;
250           r_port = s->ext_host_nat_port;
251         }
252       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
253       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
254         nat_elog_warn ("in2out_ed key del failed");
255
256       if (!is_ha)
257         nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
258                                &s->in2out.addr, s->in2out.port,
259                                &s->ext_host_nat_addr, s->ext_host_nat_port,
260                                &s->out2in.addr, s->out2in.port,
261                                &s->ext_host_addr, s->ext_host_port,
262                                s->nat_proto, is_twice_nat_session (s));
263     }
264   else
265     {
266       init_nat_i2o_k (&kv, s);
267       if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0))
268         nat_elog_warn ("in2out key del failed");
269       init_nat_o2i_k (&kv, s);
270       if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0))
271         nat_elog_warn ("out2in key del failed");
272
273       if (!is_ha)
274         nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index,
275                                  &s->in2out.addr, s->in2out.port,
276                                  &s->out2in.addr, s->out2in.port,
277                                  s->nat_proto);
278     }
279
280   if (snat_is_unk_proto_session (s))
281     return;
282
283   if (!is_ha)
284     {
285       /* log NAT event */
286       snat_ipfix_logging_nat44_ses_delete (thread_index,
287                                            s->in2out.addr.as_u32,
288                                            s->out2in.addr.as_u32,
289                                            s->nat_proto,
290                                            s->in2out.port,
291                                            s->out2in.port,
292                                            s->in2out.fib_index);
293
294       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
295                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
296                    thread_index);
297     }
298
299   /* Twice NAT address and port for external host */
300   if (is_twice_nat_session (s))
301     {
302       snat_free_outside_address_and_port (sm->twice_nat_addresses,
303                                           thread_index,
304                                           &s->ext_host_nat_addr,
305                                           s->ext_host_nat_port, s->nat_proto);
306     }
307
308   if (snat_is_session_static (s))
309     return;
310
311   snat_free_outside_address_and_port (sm->addresses, thread_index,
312                                       &s->out2in.addr, s->out2in.port,
313                                       s->nat_proto);
314 }
315
316 int
317 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
318 {
319   snat_main_t *sm = &snat_main;
320   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
321   u32 len = vec_len (sm->max_translations_per_fib);
322
323   if (len <= fib_index)
324     {
325       vec_validate (sm->max_translations_per_fib, fib_index + 1);
326
327       for (; len < vec_len (sm->max_translations_per_fib); len++)
328         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
329     }
330
331   sm->max_translations_per_fib[fib_index] = session_limit;
332   return 0;
333 }
334
335 void
336 nat44_free_session_data (snat_main_t * sm, snat_session_t * s,
337                          u32 thread_index, u8 is_ha)
338 {
339   u8 proto;
340   u16 r_port, l_port;
341   ip4_address_t *l_addr, *r_addr;
342   u32 fib_index;
343   clib_bihash_kv_16_8_t ed_kv;
344   snat_main_per_thread_data_t *tsm =
345     vec_elt_at_index (sm->per_thread_data, thread_index);
346
347   if (is_fwd_bypass_session (s))
348     {
349       if (snat_is_unk_proto_session (s))
350         {
351           proto = s->in2out.port;
352           r_port = 0;
353           l_port = 0;
354         }
355       else
356         {
357           proto = nat_proto_to_ip_proto (s->nat_proto);
358           l_port = s->in2out.port;
359           r_port = s->ext_host_port;
360         }
361
362       l_addr = &s->in2out.addr;
363       r_addr = &s->ext_host_addr;
364       fib_index = 0;
365       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
366
367       if (PREDICT_FALSE
368           (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)))
369         nat_elog_warn ("in2out_ed key del failed");
370       return;
371     }
372
373   /* session lookup tables */
374   if (is_affinity_sessions (s))
375     nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
376                          s->nat_proto, s->out2in.port);
377   l_addr = &s->out2in.addr;
378   r_addr = &s->ext_host_addr;
379   fib_index = s->out2in.fib_index;
380   if (snat_is_unk_proto_session (s))
381     {
382       proto = s->in2out.port;
383       r_port = 0;
384       l_port = 0;
385     }
386   else
387     {
388       proto = nat_proto_to_ip_proto (s->nat_proto);
389       l_port = s->out2in.port;
390       r_port = s->ext_host_port;
391     }
392   init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
393
394   if (PREDICT_FALSE (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0)))
395     nat_elog_warn ("out2in_ed key del failed");
396
397   l_addr = &s->in2out.addr;
398   fib_index = s->in2out.fib_index;
399
400   if (!snat_is_unk_proto_session (s))
401     l_port = s->in2out.port;
402
403   if (is_twice_nat_session (s))
404     {
405       r_addr = &s->ext_host_nat_addr;
406       r_port = s->ext_host_nat_port;
407     }
408   init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
409
410   if (PREDICT_FALSE (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)))
411     nat_elog_warn ("in2out_ed key del failed");
412
413   if (!is_ha)
414     {
415       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
416                              &s->in2out.addr, s->in2out.port,
417                              &s->ext_host_nat_addr, s->ext_host_nat_port,
418                              &s->out2in.addr, s->out2in.port,
419                              &s->ext_host_addr, s->ext_host_port,
420                              s->nat_proto, is_twice_nat_session (s));
421     }
422
423   if (snat_is_unk_proto_session (s))
424     return;
425
426   if (!is_ha)
427     {
428       snat_ipfix_logging_nat44_ses_delete (thread_index,
429                                            s->in2out.addr.as_u32,
430                                            s->out2in.addr.as_u32,
431                                            s->nat_proto,
432                                            s->in2out.port,
433                                            s->out2in.port,
434                                            s->in2out.fib_index);
435       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
436                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
437                    thread_index);
438     }
439
440   /* Twice NAT address and port for external host */
441   if (is_twice_nat_session (s))
442     {
443       snat_free_outside_address_and_port (sm->twice_nat_addresses,
444                                           thread_index,
445                                           &s->ext_host_nat_addr,
446                                           s->ext_host_nat_port, s->nat_proto);
447     }
448
449   if (snat_is_session_static (s))
450     return;
451
452   snat_free_outside_address_and_port (sm->addresses, thread_index,
453                                       &s->out2in.addr, s->out2in.port,
454                                       s->nat_proto);
455 }
456
457
458 snat_user_t *
459 nat_user_get_or_create (snat_main_t * sm, ip4_address_t * addr, u32 fib_index,
460                         u32 thread_index)
461 {
462   snat_user_t *u = 0;
463   snat_user_key_t user_key;
464   clib_bihash_kv_8_8_t kv, value;
465   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
466   dlist_elt_t *per_user_list_head_elt;
467
468   user_key.addr.as_u32 = addr->as_u32;
469   user_key.fib_index = fib_index;
470   kv.key = user_key.as_u64;
471
472   /* Ever heard of the "user" = src ip4 address before? */
473   if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
474     {
475       if (pool_elts (tsm->users) >= sm->max_users_per_thread)
476         {
477           vlib_increment_simple_counter (&sm->user_limit_reached,
478                                          thread_index, 0, 1);
479           nat_elog_warn ("maximum user limit reached");
480           return NULL;
481         }
482       /* no, make a new one */
483       pool_get (tsm->users, u);
484       clib_memset (u, 0, sizeof (*u));
485
486       u->addr.as_u32 = addr->as_u32;
487       u->fib_index = fib_index;
488
489       pool_get (tsm->list_pool, per_user_list_head_elt);
490
491       u->sessions_per_user_list_head_index = per_user_list_head_elt -
492         tsm->list_pool;
493
494       clib_dlist_init (tsm->list_pool, u->sessions_per_user_list_head_index);
495
496       kv.value = u - tsm->users;
497
498       /* add user */
499       if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
500         {
501           nat_elog_warn ("user_hash key add failed");
502           nat44_delete_user_with_no_session (sm, u, thread_index);
503           return NULL;
504         }
505
506       vlib_set_simple_counter (&sm->total_users, thread_index, 0,
507                                pool_elts (tsm->users));
508     }
509   else
510     {
511       u = pool_elt_at_index (tsm->users, value.value);
512     }
513
514   return u;
515 }
516
517 snat_session_t *
518 nat_session_alloc_or_recycle (snat_main_t * sm, snat_user_t * u,
519                               u32 thread_index, f64 now)
520 {
521   snat_session_t *s;
522   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
523   u32 oldest_per_user_translation_list_index, session_index;
524   dlist_elt_t *oldest_per_user_translation_list_elt;
525   dlist_elt_t *per_user_translation_list_elt;
526
527   /* Over quota? Recycle the least recently used translation */
528   if ((u->nsessions + u->nstaticsessions) >= sm->max_translations_per_user)
529     {
530       oldest_per_user_translation_list_index =
531         clib_dlist_remove_head (tsm->list_pool,
532                                 u->sessions_per_user_list_head_index);
533
534       ASSERT (oldest_per_user_translation_list_index != ~0);
535
536       /* Add it back to the end of the LRU list */
537       clib_dlist_addtail (tsm->list_pool,
538                           u->sessions_per_user_list_head_index,
539                           oldest_per_user_translation_list_index);
540       /* Get the list element */
541       oldest_per_user_translation_list_elt =
542         pool_elt_at_index (tsm->list_pool,
543                            oldest_per_user_translation_list_index);
544
545       /* Get the session index from the list element */
546       session_index = oldest_per_user_translation_list_elt->value;
547
548       /* Get the session */
549       s = pool_elt_at_index (tsm->sessions, session_index);
550       nat_free_session_data (sm, s, thread_index, 0);
551       if (snat_is_session_static (s))
552         u->nstaticsessions--;
553       else
554         u->nsessions--;
555       s->flags = 0;
556       s->total_bytes = 0;
557       s->total_pkts = 0;
558       s->state = 0;
559       s->ext_host_addr.as_u32 = 0;
560       s->ext_host_port = 0;
561       s->ext_host_nat_addr.as_u32 = 0;
562       s->ext_host_nat_port = 0;
563     }
564   else
565     {
566       pool_get (tsm->sessions, s);
567       clib_memset (s, 0, sizeof (*s));
568
569       /* Create list elts */
570       pool_get (tsm->list_pool, per_user_translation_list_elt);
571       clib_dlist_init (tsm->list_pool,
572                        per_user_translation_list_elt - tsm->list_pool);
573
574       per_user_translation_list_elt->value = s - tsm->sessions;
575       s->per_user_index = per_user_translation_list_elt - tsm->list_pool;
576       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
577
578       clib_dlist_addtail (tsm->list_pool,
579                           s->per_user_list_head_index,
580                           per_user_translation_list_elt - tsm->list_pool);
581
582       s->user_index = u - tsm->users;
583       vlib_set_simple_counter (&sm->total_sessions, thread_index, 0,
584                                pool_elts (tsm->sessions));
585     }
586
587   s->ha_last_refreshed = now;
588
589   return s;
590 }
591
592 void
593 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
594                           int is_add)
595 {
596   fib_prefix_t prefix = {
597     .fp_len = p_len,
598     .fp_proto = FIB_PROTOCOL_IP4,
599     .fp_addr = {
600                 .ip4.as_u32 = addr->as_u32,
601                 },
602   };
603   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
604
605   if (is_add)
606     fib_table_entry_update_one_path (fib_index,
607                                      &prefix,
608                                      nat_fib_src_low,
609                                      (FIB_ENTRY_FLAG_CONNECTED |
610                                       FIB_ENTRY_FLAG_LOCAL |
611                                       FIB_ENTRY_FLAG_EXCLUSIVE),
612                                      DPO_PROTO_IP4,
613                                      NULL,
614                                      sw_if_index,
615                                      ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
616   else
617     fib_table_entry_delete (fib_index, &prefix, nat_fib_src_low);
618 }
619
620 int
621 snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
622                   u8 twice_nat)
623 {
624   snat_address_t *ap;
625   snat_interface_t *i;
626   vlib_thread_main_t *tm = vlib_get_thread_main ();
627
628   if (twice_nat && !sm->endpoint_dependent)
629     return VNET_API_ERROR_FEATURE_DISABLED;
630
631   /* Check if address already exists */
632   /* *INDENT-OFF* */
633   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
634     {
635       if (ap->addr.as_u32 == addr->as_u32)
636         return VNET_API_ERROR_VALUE_EXIST;
637     }
638   /* *INDENT-ON* */
639
640   if (twice_nat)
641     vec_add2 (sm->twice_nat_addresses, ap, 1);
642   else
643     vec_add2 (sm->addresses, ap, 1);
644
645   ap->addr = *addr;
646   if (vrf_id != ~0)
647     ap->fib_index =
648       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
649                                          nat_fib_src_low);
650   else
651     ap->fib_index = ~0;
652 #define _(N, i, n, s) \
653   clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
654   ap->busy_##n##_ports = 0; \
655   ap->busy_##n##_ports_per_thread = 0;\
656   vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
657   foreach_nat_protocol
658 #undef _
659     if (twice_nat)
660     return 0;
661
662   /* Add external address to FIB */
663   /* *INDENT-OFF* */
664   pool_foreach (i, sm->interfaces,
665   ({
666     if (nat_interface_is_inside(i) || sm->out2in_dpo)
667       continue;
668
669     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
670     break;
671   }));
672   pool_foreach (i, sm->output_feature_interfaces,
673   ({
674     if (nat_interface_is_inside(i) || sm->out2in_dpo)
675       continue;
676
677     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
678     break;
679   }));
680   /* *INDENT-ON* */
681
682   return 0;
683 }
684
685 static int
686 is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
687 {
688   snat_static_mapping_t *m;
689   /* *INDENT-OFF* */
690   pool_foreach (m, sm->static_mappings,
691   ({
692       if (is_addr_only_static_mapping (m) ||
693           is_out2in_only_static_mapping (m) ||
694           is_identity_static_mapping (m))
695         continue;
696       if (m->external_addr.as_u32 == addr.as_u32)
697         return 1;
698   }));
699   /* *INDENT-ON* */
700
701   return 0;
702 }
703
704 static void
705 snat_add_static_mapping_when_resolved (snat_main_t * sm,
706                                        ip4_address_t l_addr,
707                                        u16 l_port,
708                                        u32 sw_if_index,
709                                        u16 e_port,
710                                        u32 vrf_id,
711                                        nat_protocol_t proto,
712                                        int addr_only, int is_add, u8 * tag,
713                                        int twice_nat, int out2in_only,
714                                        int identity_nat)
715 {
716   snat_static_map_resolve_t *rp;
717
718   vec_add2 (sm->to_resolve, rp, 1);
719   rp->l_addr.as_u32 = l_addr.as_u32;
720   rp->l_port = l_port;
721   rp->sw_if_index = sw_if_index;
722   rp->e_port = e_port;
723   rp->vrf_id = vrf_id;
724   rp->proto = proto;
725   rp->addr_only = addr_only;
726   rp->is_add = is_add;
727   rp->twice_nat = twice_nat;
728   rp->out2in_only = out2in_only;
729   rp->identity_nat = identity_nat;
730   rp->tag = vec_dup (tag);
731 }
732
733 static u32
734 get_thread_idx_by_port (u16 e_port)
735 {
736   snat_main_t *sm = &snat_main;
737   u32 thread_idx = sm->num_workers;
738   if (sm->num_workers > 1)
739     {
740       thread_idx =
741         sm->first_worker_index +
742         sm->workers[(e_port - 1024) / sm->port_per_thread];
743     }
744   return thread_idx;
745 }
746
747 void
748 snat_static_mapping_del_sessions (snat_main_t * sm,
749                                   snat_main_per_thread_data_t * tsm,
750                                   snat_user_key_t u_key, int addr_only,
751                                   ip4_address_t e_addr, u16 e_port)
752 {
753   clib_bihash_kv_8_8_t kv, value;
754   kv.key = u_key.as_u64;
755   u64 user_index;
756   dlist_elt_t *head, *elt;
757   snat_user_t *u;
758   snat_session_t *s;
759   u32 elt_index, head_index, ses_index;
760   if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
761     {
762       user_index = value.value;
763       u = pool_elt_at_index (tsm->users, user_index);
764       if (u->nstaticsessions)
765         {
766           head_index = u->sessions_per_user_list_head_index;
767           head = pool_elt_at_index (tsm->list_pool, head_index);
768           elt_index = head->next;
769           elt = pool_elt_at_index (tsm->list_pool, elt_index);
770           ses_index = elt->value;
771           while (ses_index != ~0)
772             {
773               s = pool_elt_at_index (tsm->sessions, ses_index);
774               elt = pool_elt_at_index (tsm->list_pool, elt->next);
775               ses_index = elt->value;
776
777               if (!addr_only)
778                 {
779                   if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
780                       (s->out2in.port != e_port))
781                     continue;
782                 }
783
784               if (is_lb_session (s))
785                 continue;
786
787               if (!snat_is_session_static (s))
788                 continue;
789
790               nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
791               nat44_delete_session (sm, s, tsm - sm->per_thread_data);
792
793               if (!addr_only)
794                 break;
795             }
796         }
797     }
798 }
799
800 void
801 snat_ed_static_mapping_del_sessions (snat_main_t * sm,
802                                      snat_main_per_thread_data_t * tsm,
803                                      ip4_address_t l_addr,
804                                      u16 l_port,
805                                      u8 protocol,
806                                      u32 fib_index, int addr_only,
807                                      ip4_address_t e_addr, u16 e_port)
808 {
809   snat_session_t *s;
810   u32 *indexes_to_free = NULL;
811   /* *INDENT-OFF* */
812   pool_foreach (s, tsm->sessions, {
813     if (s->in2out.fib_index != fib_index ||
814         s->in2out.addr.as_u32 != l_addr.as_u32)
815       {
816         continue;
817       }
818     if (!addr_only)
819       {
820         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
821             s->out2in.port != e_port ||
822             s->in2out.port != l_port ||
823             s->nat_proto != protocol)
824           continue;
825       }
826
827     if (is_lb_session (s))
828       continue;
829     if (!snat_is_session_static (s))
830       continue;
831     nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
832     vec_add1 (indexes_to_free, s - tsm->sessions);
833     if (!addr_only)
834       break;
835   });
836   /* *INDENT-ON* */
837   u32 *ses_index;
838   vec_foreach (ses_index, indexes_to_free)
839   {
840     s = pool_elt_at_index (tsm->sessions, *ses_index);
841     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
842   }
843   vec_free (indexes_to_free);
844 }
845
846 int
847 snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
848                          u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
849                          u32 sw_if_index, nat_protocol_t proto, int is_add,
850                          twice_nat_type_t twice_nat, u8 out2in_only, u8 * tag,
851                          u8 identity_nat)
852 {
853   snat_main_t *sm = &snat_main;
854   snat_static_mapping_t *m;
855   clib_bihash_kv_8_8_t kv, value;
856   snat_address_t *a = 0;
857   u32 fib_index = ~0;
858   snat_interface_t *interface;
859   int i;
860   snat_main_per_thread_data_t *tsm;
861   snat_user_key_t u_key;
862   snat_user_t *u;
863   dlist_elt_t *head, *elt;
864   u32 elt_index, head_index;
865   u32 ses_index;
866   u64 user_index;
867   snat_session_t *s;
868   snat_static_map_resolve_t *rp, *rp_match = 0;
869   nat44_lb_addr_port_t *local;
870   u32 find = ~0;
871
872   if (!sm->endpoint_dependent)
873     {
874       if (twice_nat || out2in_only)
875         return VNET_API_ERROR_FEATURE_DISABLED;
876     }
877
878   /* If the external address is a specific interface address */
879   if (sw_if_index != ~0)
880     {
881       ip4_address_t *first_int_addr;
882
883       for (i = 0; i < vec_len (sm->to_resolve); i++)
884         {
885           rp = sm->to_resolve + i;
886           if (rp->sw_if_index != sw_if_index ||
887               rp->l_addr.as_u32 != l_addr.as_u32 ||
888               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
889             continue;
890
891           if (!addr_only)
892             {
893               if ((rp->l_port != l_port && rp->e_port != e_port)
894                   || rp->proto != proto)
895                 continue;
896             }
897
898           rp_match = rp;
899           break;
900         }
901
902       /* Might be already set... */
903       first_int_addr = ip4_interface_first_address
904         (sm->ip4_main, sw_if_index, 0 /* just want the address */ );
905
906       if (is_add)
907         {
908           if (rp_match)
909             return VNET_API_ERROR_VALUE_EXIST;
910
911           snat_add_static_mapping_when_resolved
912             (sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto,
913              addr_only, is_add, tag, twice_nat, out2in_only, identity_nat);
914
915           /* DHCP resolution required? */
916           if (first_int_addr == 0)
917             {
918               return 0;
919             }
920           else
921             {
922               e_addr.as_u32 = first_int_addr->as_u32;
923               /* Identity mapping? */
924               if (l_addr.as_u32 == 0)
925                 l_addr.as_u32 = e_addr.as_u32;
926             }
927         }
928       else
929         {
930           if (!rp_match)
931             return VNET_API_ERROR_NO_SUCH_ENTRY;
932
933           vec_del1 (sm->to_resolve, i);
934
935           if (first_int_addr)
936             {
937               e_addr.as_u32 = first_int_addr->as_u32;
938               /* Identity mapping? */
939               if (l_addr.as_u32 == 0)
940                 l_addr.as_u32 = e_addr.as_u32;
941             }
942           else
943             return 0;
944         }
945     }
946
947   init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
948   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
949     m = 0;
950   else
951     m = pool_elt_at_index (sm->static_mappings, value.value);
952
953   if (is_add)
954     {
955       if (m)
956         {
957           if (is_identity_static_mapping (m))
958             {
959               /* *INDENT-OFF* */
960               pool_foreach (local, m->locals,
961               ({
962                 if (local->vrf_id == vrf_id)
963                   return VNET_API_ERROR_VALUE_EXIST;
964               }));
965               /* *INDENT-ON* */
966               pool_get (m->locals, local);
967               local->vrf_id = vrf_id;
968               local->fib_index =
969                 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
970                                                    nat_fib_src_low);
971               init_nat_kv (&kv, m->local_addr, m->local_port,
972                            local->fib_index, m->proto,
973                            m - sm->static_mappings);
974               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
975               return 0;
976             }
977           else
978             return VNET_API_ERROR_VALUE_EXIST;
979         }
980
981       if (twice_nat && addr_only)
982         return VNET_API_ERROR_UNSUPPORTED;
983
984       /* Convert VRF id to FIB index */
985       if (vrf_id != ~0)
986         fib_index =
987           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
988                                              nat_fib_src_low);
989       /* If not specified use inside VRF id from SNAT plugin startup config */
990       else
991         {
992           fib_index = sm->inside_fib_index;
993           vrf_id = sm->inside_vrf_id;
994           fib_table_lock (fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
995         }
996
997       if (!(out2in_only || identity_nat))
998         {
999           init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
1000                       addr_only ? 0 : proto);
1001           if (!clib_bihash_search_8_8
1002               (&sm->static_mapping_by_local, &kv, &value))
1003             return VNET_API_ERROR_VALUE_EXIST;
1004         }
1005
1006       /* Find external address in allocated addresses and reserve port for
1007          address and port pair mapping when dynamic translations enabled */
1008       if (!(addr_only || sm->static_mapping_only || out2in_only))
1009         {
1010           for (i = 0; i < vec_len (sm->addresses); i++)
1011             {
1012               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1013                 {
1014                   a = sm->addresses + i;
1015                   /* External port must be unused */
1016                   switch (proto)
1017                     {
1018 #define _(N, j, n, s) \
1019                     case NAT_PROTOCOL_##N: \
1020                       if (a->busy_##n##_port_refcounts[e_port]) \
1021                         return VNET_API_ERROR_INVALID_VALUE; \
1022                       ++a->busy_##n##_port_refcounts[e_port]; \
1023                       if (e_port > 1024) \
1024                         { \
1025                           a->busy_##n##_ports++; \
1026                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1027                         } \
1028                       break;
1029                       foreach_nat_protocol
1030 #undef _
1031                     default:
1032                       nat_elog_info ("unknown protocol");
1033                       return VNET_API_ERROR_INVALID_VALUE_2;
1034                     }
1035                   break;
1036                 }
1037             }
1038           /* External address must be allocated */
1039           if (!a && (l_addr.as_u32 != e_addr.as_u32))
1040             {
1041               if (sw_if_index != ~0)
1042                 {
1043                   for (i = 0; i < vec_len (sm->to_resolve); i++)
1044                     {
1045                       rp = sm->to_resolve + i;
1046                       if (rp->addr_only)
1047                         continue;
1048                       if (rp->sw_if_index != sw_if_index &&
1049                           rp->l_addr.as_u32 != l_addr.as_u32 &&
1050                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
1051                           rp->e_port != e_port && rp->proto != proto)
1052                         continue;
1053
1054                       vec_del1 (sm->to_resolve, i);
1055                       break;
1056                     }
1057                 }
1058               return VNET_API_ERROR_NO_SUCH_ENTRY;
1059             }
1060         }
1061
1062       pool_get (sm->static_mappings, m);
1063       clib_memset (m, 0, sizeof (*m));
1064       m->tag = vec_dup (tag);
1065       m->local_addr = l_addr;
1066       m->external_addr = e_addr;
1067       m->twice_nat = twice_nat;
1068       if (out2in_only)
1069         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
1070       if (addr_only)
1071         m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY;
1072       if (identity_nat)
1073         {
1074           m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT;
1075           pool_get (m->locals, local);
1076           local->vrf_id = vrf_id;
1077           local->fib_index = fib_index;
1078         }
1079       else
1080         {
1081           m->vrf_id = vrf_id;
1082           m->fib_index = fib_index;
1083         }
1084       if (!addr_only)
1085         {
1086           m->local_port = l_port;
1087           m->external_port = e_port;
1088           m->proto = proto;
1089         }
1090
1091       if (sm->num_workers > 1)
1092         {
1093           ip4_header_t ip = {
1094             .src_address = m->local_addr,
1095           };
1096           vec_add1 (m->workers, sm->worker_in2out_cb (&ip, m->fib_index, 0));
1097           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1098         }
1099       else
1100         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1101
1102       init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto,
1103                    m - sm->static_mappings);
1104       if (!out2in_only)
1105         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1106
1107       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto,
1108                    m - sm->static_mappings);
1109       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
1110
1111       /* Delete dynamic sessions matching local address (+ local port) */
1112       if (!(sm->static_mapping_only))
1113         {
1114           u_key.addr = m->local_addr;
1115           u_key.fib_index = m->fib_index;
1116           kv.key = u_key.as_u64;
1117           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1118             {
1119               user_index = value.value;
1120               u = pool_elt_at_index (tsm->users, user_index);
1121               if (u->nsessions)
1122                 {
1123                   head_index = u->sessions_per_user_list_head_index;
1124                   head = pool_elt_at_index (tsm->list_pool, head_index);
1125                   elt_index = head->next;
1126                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1127                   ses_index = elt->value;
1128                   while (ses_index != ~0)
1129                     {
1130                       s = pool_elt_at_index (tsm->sessions, ses_index);
1131                       elt = pool_elt_at_index (tsm->list_pool, elt->next);
1132                       ses_index = elt->value;
1133
1134                       if (snat_is_session_static (s))
1135                         continue;
1136
1137                       if (!addr_only && s->in2out.port != m->local_port)
1138                         continue;
1139
1140                       nat_free_session_data (sm, s,
1141                                              tsm - sm->per_thread_data, 0);
1142                       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
1143
1144                       if (!addr_only && !sm->endpoint_dependent)
1145                         break;
1146                     }
1147                 }
1148             }
1149         }
1150     }
1151   else
1152     {
1153       if (!m)
1154         {
1155           if (sw_if_index != ~0)
1156             return 0;
1157           else
1158             return VNET_API_ERROR_NO_SUCH_ENTRY;
1159         }
1160
1161       if (identity_nat)
1162         {
1163           if (vrf_id == ~0)
1164             vrf_id = sm->inside_vrf_id;
1165
1166           /* *INDENT-OFF* */
1167           pool_foreach (local, m->locals,
1168           ({
1169             if (local->vrf_id == vrf_id)
1170               find = local - m->locals;
1171           }));
1172           /* *INDENT-ON* */
1173           if (find == ~0)
1174             return VNET_API_ERROR_NO_SUCH_ENTRY;
1175
1176           local = pool_elt_at_index (m->locals, find);
1177           fib_index = local->fib_index;
1178           pool_put (m->locals, local);
1179         }
1180       else
1181         fib_index = m->fib_index;
1182
1183       /* Free external address port */
1184       if (!(addr_only || sm->static_mapping_only || out2in_only))
1185         {
1186           for (i = 0; i < vec_len (sm->addresses); i++)
1187             {
1188               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1189                 {
1190                   a = sm->addresses + i;
1191                   switch (proto)
1192                     {
1193 #define _(N, j, n, s) \
1194                     case NAT_PROTOCOL_##N: \
1195                       --a->busy_##n##_port_refcounts[e_port]; \
1196                       if (e_port > 1024) \
1197                         { \
1198                           a->busy_##n##_ports--; \
1199                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1200                         } \
1201                       break;
1202                       foreach_nat_protocol
1203 #undef _
1204                     default:
1205                       nat_elog_info ("unknown protocol");
1206                       return VNET_API_ERROR_INVALID_VALUE_2;
1207                     }
1208                   break;
1209                 }
1210             }
1211         }
1212
1213       if (sm->num_workers > 1)
1214         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1215       else
1216         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1217
1218       init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
1219       if (!out2in_only)
1220         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
1221
1222       /* Delete session(s) for static mapping if exist */
1223       if (!(sm->static_mapping_only) ||
1224           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
1225         {
1226           if (sm->endpoint_dependent)
1227             {
1228               snat_ed_static_mapping_del_sessions (sm, tsm, m->local_addr,
1229                                                    m->local_port, m->proto,
1230                                                    fib_index, addr_only,
1231                                                    e_addr, e_port);
1232             }
1233           else
1234             {
1235               u_key.addr = m->local_addr;
1236               u_key.fib_index = fib_index;
1237               kv.key = u_key.as_u64;
1238               snat_static_mapping_del_sessions (sm, tsm, u_key, addr_only,
1239                                                 e_addr, e_port);
1240             }
1241         }
1242
1243       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1244       if (pool_elts (m->locals))
1245         return 0;
1246
1247       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1248       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
1249
1250       vec_free (m->tag);
1251       vec_free (m->workers);
1252       /* Delete static mapping from pool */
1253       pool_put (sm->static_mappings, m);
1254     }
1255
1256   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
1257     return 0;
1258
1259   /* Add/delete external address to FIB */
1260   /* *INDENT-OFF* */
1261   pool_foreach (interface, sm->interfaces,
1262   ({
1263     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1264       continue;
1265
1266     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1267     break;
1268   }));
1269   pool_foreach (interface, sm->output_feature_interfaces,
1270   ({
1271     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1272       continue;
1273
1274     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1275     break;
1276   }));
1277   /* *INDENT-ON* */
1278
1279   return 0;
1280 }
1281
1282 int
1283 nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
1284                                  nat_protocol_t proto,
1285                                  nat44_lb_addr_port_t * locals, u8 is_add,
1286                                  twice_nat_type_t twice_nat, u8 out2in_only,
1287                                  u8 * tag, u32 affinity)
1288 {
1289   snat_main_t *sm = &snat_main;
1290   snat_static_mapping_t *m;
1291   clib_bihash_kv_8_8_t kv, value;
1292   snat_address_t *a = 0;
1293   int i;
1294   nat44_lb_addr_port_t *local;
1295   snat_main_per_thread_data_t *tsm;
1296   snat_session_t *s;
1297   uword *bitmap = 0;
1298
1299   if (!sm->endpoint_dependent)
1300     return VNET_API_ERROR_FEATURE_DISABLED;
1301
1302   init_nat_k (&kv, e_addr, e_port, 0, proto);
1303   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1304     m = 0;
1305   else
1306     m = pool_elt_at_index (sm->static_mappings, value.value);
1307
1308   if (is_add)
1309     {
1310       if (m)
1311         return VNET_API_ERROR_VALUE_EXIST;
1312
1313       if (vec_len (locals) < 2)
1314         return VNET_API_ERROR_INVALID_VALUE;
1315
1316       /* Find external address in allocated addresses and reserve port for
1317          address and port pair mapping when dynamic translations enabled */
1318       if (!(sm->static_mapping_only || out2in_only))
1319         {
1320           for (i = 0; i < vec_len (sm->addresses); i++)
1321             {
1322               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1323                 {
1324                   a = sm->addresses + i;
1325                   /* External port must be unused */
1326                   switch (proto)
1327                     {
1328 #define _(N, j, n, s) \
1329                     case NAT_PROTOCOL_##N: \
1330                       if (a->busy_##n##_port_refcounts[e_port]) \
1331                         return VNET_API_ERROR_INVALID_VALUE; \
1332                       ++a->busy_##n##_port_refcounts[e_port]; \
1333                       if (e_port > 1024) \
1334                         { \
1335                           a->busy_##n##_ports++; \
1336                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1337                         } \
1338                       break;
1339                       foreach_nat_protocol
1340 #undef _
1341                     default:
1342                       nat_elog_info ("unknown protocol");
1343                       return VNET_API_ERROR_INVALID_VALUE_2;
1344                     }
1345                   break;
1346                 }
1347             }
1348           /* External address must be allocated */
1349           if (!a)
1350             return VNET_API_ERROR_NO_SUCH_ENTRY;
1351         }
1352
1353       pool_get (sm->static_mappings, m);
1354       clib_memset (m, 0, sizeof (*m));
1355       m->tag = vec_dup (tag);
1356       m->external_addr = e_addr;
1357       m->external_port = e_port;
1358       m->proto = proto;
1359       m->twice_nat = twice_nat;
1360       m->flags |= NAT_STATIC_MAPPING_FLAG_LB;
1361       if (out2in_only)
1362         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
1363       m->affinity = affinity;
1364
1365       if (affinity)
1366         m->affinity_per_service_list_head_index =
1367           nat_affinity_get_per_service_list_head_index ();
1368       else
1369         m->affinity_per_service_list_head_index = ~0;
1370
1371       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto,
1372                    m - sm->static_mappings);
1373       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
1374         {
1375           nat_elog_err ("static_mapping_by_external key add failed");
1376           return VNET_API_ERROR_UNSPECIFIED;
1377         }
1378
1379       for (i = 0; i < vec_len (locals); i++)
1380         {
1381           locals[i].fib_index =
1382             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
1383                                                locals[i].vrf_id,
1384                                                nat_fib_src_low);
1385           if (!out2in_only)
1386             {
1387               init_nat_kv (&kv, locals[i].addr, locals[i].port,
1388                            locals[i].fib_index, m->proto,
1389                            m - sm->static_mappings);
1390               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1391             }
1392           locals[i].prefix = (i == 0) ? locals[i].probability :
1393             (locals[i - 1].prefix + locals[i].probability);
1394           pool_get (m->locals, local);
1395           *local = locals[i];
1396           if (sm->num_workers > 1)
1397             {
1398               ip4_header_t ip = {
1399                 .src_address = locals[i].addr,
1400               };
1401               bitmap =
1402                 clib_bitmap_set (bitmap,
1403                                  sm->worker_in2out_cb (&ip, m->fib_index, 0),
1404                                  1);
1405             }
1406         }
1407
1408       /* Assign workers */
1409       if (sm->num_workers > 1)
1410         {
1411           /* *INDENT-OFF* */
1412           clib_bitmap_foreach (i, bitmap,
1413             ({
1414                vec_add1(m->workers, i);
1415             }));
1416           /* *INDENT-ON* */
1417         }
1418     }
1419   else
1420     {
1421       if (!m)
1422         return VNET_API_ERROR_NO_SUCH_ENTRY;
1423
1424       if (!is_lb_static_mapping (m))
1425         return VNET_API_ERROR_INVALID_VALUE;
1426
1427       /* Free external address port */
1428       if (!(sm->static_mapping_only || out2in_only))
1429         {
1430           for (i = 0; i < vec_len (sm->addresses); i++)
1431             {
1432               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1433                 {
1434                   a = sm->addresses + i;
1435                   switch (proto)
1436                     {
1437 #define _(N, j, n, s) \
1438                     case NAT_PROTOCOL_##N: \
1439                       --a->busy_##n##_port_refcounts[e_port]; \
1440                       if (e_port > 1024) \
1441                         { \
1442                           a->busy_##n##_ports--; \
1443                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1444                         } \
1445                       break;
1446                       foreach_nat_protocol
1447 #undef _
1448                     default:
1449                       nat_elog_info ("unknown protocol");
1450                       return VNET_API_ERROR_INVALID_VALUE_2;
1451                     }
1452                   break;
1453                 }
1454             }
1455         }
1456
1457       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1458       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
1459         {
1460           nat_elog_err ("static_mapping_by_external key del failed");
1461           return VNET_API_ERROR_UNSPECIFIED;
1462         }
1463
1464       /* *INDENT-OFF* */
1465       pool_foreach (local, m->locals,
1466       ({
1467           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1468                             nat_fib_src_low);
1469           if (!out2in_only)
1470             {
1471 init_nat_k(&              kv, local->addr, local->port, local->fib_index, m->proto);
1472               if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0))
1473                 {
1474                   nat_elog_err ("static_mapping_by_local key del failed");
1475                   return VNET_API_ERROR_UNSPECIFIED;
1476                 }
1477             }
1478
1479           if (sm->num_workers > 1)
1480             {
1481               ip4_header_t ip = {
1482                 .src_address = local->addr,
1483               };
1484               tsm = vec_elt_at_index (sm->per_thread_data,
1485                                       sm->worker_in2out_cb (&ip, m->fib_index, 0));
1486             }
1487           else
1488             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1489
1490           /* Delete sessions */
1491           pool_foreach (s, tsm->sessions, {
1492             if (!(is_lb_session (s)))
1493               continue;
1494
1495             if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1496                 s->in2out.port != local->port)
1497               continue;
1498
1499             nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1500             nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1501           });
1502       }));
1503       /* *INDENT-ON* */
1504       if (m->affinity)
1505         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1506       pool_free (m->locals);
1507       vec_free (m->tag);
1508       vec_free (m->workers);
1509
1510       pool_put (sm->static_mappings, m);
1511     }
1512
1513   return 0;
1514 }
1515
1516 int
1517 nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
1518                                        ip4_address_t l_addr, u16 l_port,
1519                                        nat_protocol_t proto, u32 vrf_id,
1520                                        u8 probability, u8 is_add)
1521 {
1522   snat_main_t *sm = &snat_main;
1523   snat_static_mapping_t *m = 0;
1524   clib_bihash_kv_8_8_t kv, value;
1525   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1526   snat_main_per_thread_data_t *tsm;
1527   snat_session_t *s;
1528   u32 *locals = 0;
1529   uword *bitmap = 0;
1530   int i;
1531
1532   if (!sm->endpoint_dependent)
1533     return VNET_API_ERROR_FEATURE_DISABLED;
1534
1535   init_nat_k (&kv, e_addr, e_port, 0, proto);
1536   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1537     m = pool_elt_at_index (sm->static_mappings, value.value);
1538
1539   if (!m)
1540     return VNET_API_ERROR_NO_SUCH_ENTRY;
1541
1542   if (!is_lb_static_mapping (m))
1543     return VNET_API_ERROR_INVALID_VALUE;
1544
1545   /* *INDENT-OFF* */
1546   pool_foreach (local, m->locals,
1547   ({
1548     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1549         (local->vrf_id == vrf_id))
1550       {
1551         match_local = local;
1552         break;
1553       }
1554   }));
1555   /* *INDENT-ON* */
1556
1557   if (is_add)
1558     {
1559       if (match_local)
1560         return VNET_API_ERROR_VALUE_EXIST;
1561
1562       pool_get (m->locals, local);
1563       clib_memset (local, 0, sizeof (*local));
1564       local->addr.as_u32 = l_addr.as_u32;
1565       local->port = l_port;
1566       local->probability = probability;
1567       local->vrf_id = vrf_id;
1568       local->fib_index =
1569         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1570                                            nat_fib_src_low);
1571
1572       if (!is_out2in_only_static_mapping (m))
1573         {
1574           init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto,
1575                        m - sm->static_mappings);
1576           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
1577             nat_elog_err ("static_mapping_by_local key add failed");
1578         }
1579     }
1580   else
1581     {
1582       if (!match_local)
1583         return VNET_API_ERROR_NO_SUCH_ENTRY;
1584
1585       if (pool_elts (m->locals) < 3)
1586         return VNET_API_ERROR_UNSPECIFIED;
1587
1588       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1589                         nat_fib_src_low);
1590
1591       if (!is_out2in_only_static_mapping (m))
1592         {
1593           init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
1594           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
1595             nat_elog_err ("static_mapping_by_local key del failed");
1596         }
1597
1598       if (sm->num_workers > 1)
1599         {
1600           ip4_header_t ip = {
1601             .src_address = local->addr,
1602           };
1603           tsm = vec_elt_at_index (sm->per_thread_data,
1604                                   sm->worker_in2out_cb (&ip, m->fib_index,
1605                                                         0));
1606         }
1607       else
1608         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1609
1610       /* Delete sessions */
1611       /* *INDENT-OFF* */
1612       pool_foreach (s, tsm->sessions, {
1613         if (!(is_lb_session (s)))
1614           continue;
1615
1616         if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1617             s->in2out.port != match_local->port)
1618           continue;
1619
1620         nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1621         nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1622       });
1623       /* *INDENT-ON* */
1624
1625       pool_put (m->locals, match_local);
1626     }
1627
1628   vec_free (m->workers);
1629
1630   /* *INDENT-OFF* */
1631   pool_foreach (local, m->locals,
1632   ({
1633     vec_add1 (locals, local - m->locals);
1634     if (sm->num_workers > 1)
1635       {
1636         ip4_header_t ip;
1637         ip.src_address.as_u32 = local->addr.as_u32,
1638         bitmap = clib_bitmap_set (bitmap,
1639                                   sm->worker_in2out_cb (&ip, local->fib_index, 0),
1640                                   1);
1641       }
1642   }));
1643   /* *INDENT-ON* */
1644
1645   ASSERT (vec_len (locals) > 1);
1646
1647   local = pool_elt_at_index (m->locals, locals[0]);
1648   local->prefix = local->probability;
1649   for (i = 1; i < vec_len (locals); i++)
1650     {
1651       local = pool_elt_at_index (m->locals, locals[i]);
1652       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1653       local->prefix = local->probability + prev_local->prefix;
1654     }
1655
1656   /* Assign workers */
1657   if (sm->num_workers > 1)
1658     {
1659       /* *INDENT-OFF* */
1660       clib_bitmap_foreach (i, bitmap, ({ vec_add1(m->workers, i); }));
1661       /* *INDENT-ON* */
1662     }
1663
1664   return 0;
1665 }
1666
1667 int
1668 snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
1669                   u8 twice_nat)
1670 {
1671   snat_address_t *a = 0;
1672   snat_session_t *ses;
1673   u32 *ses_to_be_removed = 0, *ses_index;
1674   snat_main_per_thread_data_t *tsm;
1675   snat_static_mapping_t *m;
1676   snat_interface_t *interface;
1677   int i;
1678   snat_address_t *addresses =
1679     twice_nat ? sm->twice_nat_addresses : sm->addresses;
1680
1681   /* Find SNAT address */
1682   for (i = 0; i < vec_len (addresses); i++)
1683     {
1684       if (addresses[i].addr.as_u32 == addr.as_u32)
1685         {
1686           a = addresses + i;
1687           break;
1688         }
1689     }
1690   if (!a)
1691     return VNET_API_ERROR_NO_SUCH_ENTRY;
1692
1693   if (delete_sm)
1694     {
1695       /* *INDENT-OFF* */
1696       pool_foreach (m, sm->static_mappings,
1697       ({
1698           if (m->external_addr.as_u32 == addr.as_u32)
1699             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1700                                             m->local_port, m->external_port,
1701                                             m->vrf_id, is_addr_only_static_mapping(m), ~0,
1702                                             m->proto, 0, m->twice_nat,
1703                                             is_out2in_only_static_mapping(m), m->tag, is_identity_static_mapping(m));
1704       }));
1705       /* *INDENT-ON* */
1706     }
1707   else
1708     {
1709       /* Check if address is used in some static mapping */
1710       if (is_snat_address_used_in_static_mapping (sm, addr))
1711         {
1712           nat_elog_notice ("address used in static mapping");
1713           return VNET_API_ERROR_UNSPECIFIED;
1714         }
1715     }
1716
1717   if (a->fib_index != ~0)
1718     fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1719
1720   /* Delete sessions using address */
1721   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1722     {
1723       /* *INDENT-OFF* */
1724       vec_foreach (tsm, sm->per_thread_data)
1725         {
1726           pool_foreach (ses, tsm->sessions, ({
1727             if (ses->out2in.addr.as_u32 == addr.as_u32)
1728               {
1729                 nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
1730                 vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1731               }
1732           }));
1733
1734           if (sm->endpoint_dependent){
1735               vec_foreach (ses_index, ses_to_be_removed)
1736                 {
1737                   ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1738                   nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
1739                 }
1740           }else{
1741               vec_foreach (ses_index, ses_to_be_removed)
1742                 {
1743                   ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1744                   nat44_delete_session (sm, ses, tsm - sm->per_thread_data);
1745                 }
1746           }
1747
1748           vec_free (ses_to_be_removed);
1749         }
1750       /* *INDENT-ON* */
1751     }
1752
1753 #define _(N, i, n, s) \
1754   vec_free (a->busy_##n##_ports_per_thread);
1755   foreach_nat_protocol
1756 #undef _
1757     if (twice_nat)
1758     {
1759       vec_del1 (sm->twice_nat_addresses, i);
1760       return 0;
1761     }
1762   else
1763     vec_del1 (sm->addresses, i);
1764
1765   /* Delete external address from FIB */
1766   /* *INDENT-OFF* */
1767   pool_foreach (interface, sm->interfaces,
1768   ({
1769     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1770       continue;
1771
1772     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1773     break;
1774   }));
1775   pool_foreach (interface, sm->output_feature_interfaces,
1776   ({
1777     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1778       continue;
1779
1780     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1781     break;
1782   }));
1783   /* *INDENT-ON* */
1784
1785   return 0;
1786 }
1787
1788 static void
1789 nat_validate_counters (snat_main_t * sm, u32 sw_if_index)
1790 {
1791 #define _(x)                                                                  \
1792   vlib_validate_simple_counter (&sm->counters.fastpath.in2out.x,              \
1793                                 sw_if_index);                                 \
1794   vlib_zero_simple_counter (&sm->counters.fastpath.in2out.x, sw_if_index);    \
1795   vlib_validate_simple_counter (&sm->counters.fastpath.out2in.x,              \
1796                                 sw_if_index);                                 \
1797   vlib_zero_simple_counter (&sm->counters.fastpath.out2in.x, sw_if_index);    \
1798   vlib_validate_simple_counter (&sm->counters.slowpath.in2out.x,              \
1799                                 sw_if_index);                                 \
1800   vlib_zero_simple_counter (&sm->counters.slowpath.in2out.x, sw_if_index);    \
1801   vlib_validate_simple_counter (&sm->counters.slowpath.out2in.x,              \
1802                                 sw_if_index);                                 \
1803   vlib_zero_simple_counter (&sm->counters.slowpath.out2in.x, sw_if_index);    \
1804   vlib_validate_simple_counter (&sm->counters.fastpath.in2out_ed.x,           \
1805                                 sw_if_index);                                 \
1806   vlib_zero_simple_counter (&sm->counters.fastpath.in2out_ed.x, sw_if_index); \
1807   vlib_validate_simple_counter (&sm->counters.fastpath.out2in_ed.x,           \
1808                                 sw_if_index);                                 \
1809   vlib_zero_simple_counter (&sm->counters.fastpath.out2in_ed.x, sw_if_index); \
1810   vlib_validate_simple_counter (&sm->counters.slowpath.in2out_ed.x,           \
1811                                 sw_if_index);                                 \
1812   vlib_zero_simple_counter (&sm->counters.slowpath.in2out_ed.x, sw_if_index); \
1813   vlib_validate_simple_counter (&sm->counters.slowpath.out2in_ed.x,           \
1814                                 sw_if_index);                                 \
1815   vlib_zero_simple_counter (&sm->counters.slowpath.out2in_ed.x, sw_if_index);
1816   foreach_nat_counter;
1817 #undef _
1818   vlib_validate_simple_counter (&sm->counters.hairpinning, sw_if_index);
1819   vlib_zero_simple_counter (&sm->counters.hairpinning, sw_if_index);
1820 }
1821
1822 void
1823 expire_per_vrf_sessions (u32 fib_index)
1824 {
1825   per_vrf_sessions_t *per_vrf_sessions;
1826   snat_main_per_thread_data_t *tsm;
1827   snat_main_t *sm = &snat_main;
1828
1829   /* *INDENT-OFF* */
1830   vec_foreach (tsm, sm->per_thread_data)
1831     {
1832       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1833         {
1834           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1835               (per_vrf_sessions->tx_fib_index == fib_index))
1836             {
1837               per_vrf_sessions->expired = 1;
1838             }
1839         }
1840     }
1841   /* *INDENT-ON* */
1842 }
1843
1844 void
1845 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1846 {
1847   snat_main_t *sm = &snat_main;
1848   nat_fib_t *fib;
1849
1850   // we don't care if it is outside/inside fib
1851   // we just care about their ref_count
1852   // if it reaches 0 sessions should expire
1853   // because the fib isn't valid for NAT anymore
1854
1855   vec_foreach (fib, sm->fibs)
1856   {
1857     if (fib->fib_index == fib_index)
1858       {
1859         if (is_del)
1860           {
1861             fib->ref_count--;
1862             if (!fib->ref_count)
1863               {
1864                 vec_del1 (sm->fibs, fib - sm->fibs);
1865                 expire_per_vrf_sessions (fib_index);
1866               }
1867             return;
1868           }
1869         else
1870           fib->ref_count++;
1871       }
1872   }
1873   if (!is_del)
1874     {
1875       vec_add2 (sm->fibs, fib, 1);
1876       fib->ref_count = 1;
1877       fib->fib_index = fib_index;
1878     }
1879 }
1880
1881 int
1882 snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
1883 {
1884   snat_main_t *sm = &snat_main;
1885   snat_interface_t *i;
1886   const char *feature_name, *del_feature_name;
1887   snat_address_t *ap;
1888   snat_static_mapping_t *m;
1889   nat_outside_fib_t *outside_fib;
1890   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1891                                                        sw_if_index);
1892
1893   if (sm->out2in_dpo && !is_inside)
1894     return VNET_API_ERROR_UNSUPPORTED;
1895
1896   /* *INDENT-OFF* */
1897   pool_foreach (i, sm->output_feature_interfaces,
1898   ({
1899     if (i->sw_if_index == sw_if_index)
1900       return VNET_API_ERROR_VALUE_EXIST;
1901   }));
1902   /* *INDENT-ON* */
1903
1904   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1905     feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast";
1906   else
1907     {
1908       if (sm->num_workers > 1)
1909         feature_name =
1910           is_inside ? "nat44-in2out-worker-handoff" :
1911           "nat44-out2in-worker-handoff";
1912       else if (sm->endpoint_dependent)
1913         {
1914           feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
1915         }
1916       else
1917         feature_name = is_inside ? "nat44-in2out" : "nat44-out2in";
1918     }
1919
1920   if (sm->fq_in2out_index == ~0 && sm->num_workers > 1)
1921     sm->fq_in2out_index =
1922       vlib_frame_queue_main_init (sm->in2out_node_index, NAT_FQ_NELTS);
1923
1924   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
1925     sm->fq_out2in_index =
1926       vlib_frame_queue_main_init (sm->out2in_node_index, NAT_FQ_NELTS);
1927
1928   if (sm->endpoint_dependent)
1929     update_per_vrf_sessions_vec (fib_index, is_del);
1930
1931   if (!is_inside)
1932     {
1933       /* *INDENT-OFF* */
1934       vec_foreach (outside_fib, sm->outside_fibs)
1935         {
1936           if (outside_fib->fib_index == fib_index)
1937             {
1938               if (is_del)
1939                 {
1940                   outside_fib->refcount--;
1941                   if (!outside_fib->refcount)
1942                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
1943                 }
1944               else
1945                 outside_fib->refcount++;
1946               goto feature_set;
1947             }
1948         }
1949       /* *INDENT-ON* */
1950       if (!is_del)
1951         {
1952           vec_add2 (sm->outside_fibs, outside_fib, 1);
1953           outside_fib->refcount = 1;
1954           outside_fib->fib_index = fib_index;
1955         }
1956     }
1957
1958 feature_set:
1959   /* *INDENT-OFF* */
1960   pool_foreach (i, sm->interfaces,
1961   ({
1962     if (i->sw_if_index == sw_if_index)
1963       {
1964         if (is_del)
1965           {
1966             if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
1967               {
1968                 if (is_inside)
1969                   i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
1970                 else
1971                   i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
1972
1973                 if (sm->num_workers > 1)
1974                   {
1975                     del_feature_name = "nat44-handoff-classify";
1976                     feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
1977                                                  "nat44-out2in-worker-handoff";
1978                   }
1979                 else if (sm->endpoint_dependent)
1980                   {
1981                     del_feature_name = "nat44-ed-classify";
1982                     feature_name = !is_inside ?  "nat-pre-in2out" :
1983                                                  "nat-pre-out2in";
1984                   }
1985                 else
1986                   {
1987                     del_feature_name = "nat44-classify";
1988                     feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
1989                   }
1990
1991                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
1992                 if (rv)
1993                   return rv;
1994                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
1995                                              sw_if_index, 0, 0, 0);
1996                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
1997                                              sw_if_index, 1, 0, 0);
1998                 if (!is_inside)
1999                   {
2000                     if (sm->endpoint_dependent)
2001                       vnet_feature_enable_disable ("ip4-local",
2002                                                    "nat44-ed-hairpinning",
2003                                                    sw_if_index, 1, 0, 0);
2004                     else
2005                       vnet_feature_enable_disable ("ip4-local",
2006                                                    "nat44-hairpinning",
2007                                                    sw_if_index, 1, 0, 0);
2008                   }
2009               }
2010             else
2011               {
2012                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
2013                 if (rv)
2014                   return rv;
2015                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
2016                                              sw_if_index, 0, 0, 0);
2017                 pool_put (sm->interfaces, i);
2018                 if (is_inside)
2019                   {
2020                     if (sm->endpoint_dependent)
2021                       vnet_feature_enable_disable ("ip4-local",
2022                                                    "nat44-ed-hairpinning",
2023                                                    sw_if_index, 0, 0, 0);
2024                     else
2025                       vnet_feature_enable_disable ("ip4-local",
2026                                                    "nat44-hairpinning",
2027                                                    sw_if_index, 0, 0, 0);
2028                   }
2029               }
2030           }
2031         else
2032           {
2033             if ((nat_interface_is_inside(i) && is_inside) ||
2034                 (nat_interface_is_outside(i) && !is_inside))
2035               return 0;
2036
2037             if (sm->num_workers > 1)
2038               {
2039                 del_feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
2040                                                  "nat44-out2in-worker-handoff";
2041                 feature_name = "nat44-handoff-classify";
2042               }
2043             else if (sm->endpoint_dependent)
2044               {
2045                 del_feature_name = !is_inside ?  "nat-pre-in2out" :
2046                                                  "nat-pre-out2in";
2047
2048                 feature_name = "nat44-ed-classify";
2049               }
2050             else
2051               {
2052                 del_feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
2053                 feature_name = "nat44-classify";
2054               }
2055
2056             int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
2057             if (rv)
2058               return rv;
2059             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
2060                                          sw_if_index, 0, 0, 0);
2061             vnet_feature_enable_disable ("ip4-unicast", feature_name,
2062                                          sw_if_index, 1, 0, 0);
2063             if (!is_inside)
2064               {
2065                 if (sm->endpoint_dependent)
2066                   vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
2067                                                sw_if_index, 0, 0, 0);
2068                 else
2069                   vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
2070                                                sw_if_index, 0, 0, 0);
2071               }
2072             goto set_flags;
2073           }
2074
2075         goto fib;
2076       }
2077   }));
2078   /* *INDENT-ON* */
2079
2080   if (is_del)
2081     return VNET_API_ERROR_NO_SUCH_ENTRY;
2082
2083   pool_get (sm->interfaces, i);
2084   i->sw_if_index = sw_if_index;
2085   i->flags = 0;
2086   nat_validate_counters (sm, sw_if_index);
2087
2088   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0,
2089                                0);
2090
2091   int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
2092   if (rv)
2093     return rv;
2094
2095   if (is_inside && !sm->out2in_dpo)
2096     {
2097       if (sm->endpoint_dependent)
2098         vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
2099                                      sw_if_index, 1, 0, 0);
2100       else
2101         vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
2102                                      sw_if_index, 1, 0, 0);
2103     }
2104
2105 set_flags:
2106   if (is_inside)
2107     {
2108       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
2109       return 0;
2110     }
2111   else
2112     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
2113
2114   /* Add/delete external addresses to FIB */
2115 fib:
2116   /* *INDENT-OFF* */
2117   vec_foreach (ap, sm->addresses)
2118     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
2119
2120   pool_foreach (m, sm->static_mappings,
2121   ({
2122     if (!(is_addr_only_static_mapping(m)) || (m->local_addr.as_u32 == m->external_addr.as_u32))
2123       continue;
2124
2125     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
2126   }));
2127   /* *INDENT-ON* */
2128
2129   return 0;
2130 }
2131
2132 int
2133 snat_interface_add_del_output_feature (u32 sw_if_index,
2134                                        u8 is_inside, int is_del)
2135 {
2136   snat_main_t *sm = &snat_main;
2137   snat_interface_t *i;
2138   snat_address_t *ap;
2139   snat_static_mapping_t *m;
2140   nat_outside_fib_t *outside_fib;
2141   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2142                                                        sw_if_index);
2143
2144   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
2145     return VNET_API_ERROR_UNSUPPORTED;
2146
2147   /* *INDENT-OFF* */
2148   pool_foreach (i, sm->interfaces,
2149   ({
2150     if (i->sw_if_index == sw_if_index)
2151       return VNET_API_ERROR_VALUE_EXIST;
2152   }));
2153   /* *INDENT-ON* */
2154
2155   if (sm->endpoint_dependent)
2156     update_per_vrf_sessions_vec (fib_index, is_del);
2157
2158   if (!is_inside)
2159     {
2160       /* *INDENT-OFF* */
2161       vec_foreach (outside_fib, sm->outside_fibs)
2162         {
2163           if (outside_fib->fib_index == fib_index)
2164             {
2165               if (is_del)
2166                 {
2167                   outside_fib->refcount--;
2168                   if (!outside_fib->refcount)
2169                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2170                 }
2171               else
2172                 outside_fib->refcount++;
2173               goto feature_set;
2174             }
2175         }
2176       /* *INDENT-ON* */
2177       if (!is_del)
2178         {
2179           vec_add2 (sm->outside_fibs, outside_fib, 1);
2180           outside_fib->refcount = 1;
2181           outside_fib->fib_index = fib_index;
2182         }
2183     }
2184
2185 feature_set:
2186   if (is_inside)
2187     {
2188       if (sm->endpoint_dependent)
2189         {
2190           int rv =
2191             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2192           if (rv)
2193             return rv;
2194           rv =
2195             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2196                                                             !is_del);
2197           if (rv)
2198             return rv;
2199           vnet_feature_enable_disable ("ip4-unicast", "nat44-ed-hairpin-dst",
2200                                        sw_if_index, !is_del, 0, 0);
2201           vnet_feature_enable_disable ("ip4-output", "nat44-ed-hairpin-src",
2202                                        sw_if_index, !is_del, 0, 0);
2203         }
2204       else
2205         {
2206           int rv =
2207             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2208           if (rv)
2209             return rv;
2210           rv =
2211             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2212                                                             !is_del);
2213           if (rv)
2214             return rv;
2215           vnet_feature_enable_disable ("ip4-unicast", "nat44-hairpin-dst",
2216                                        sw_if_index, !is_del, 0, 0);
2217           vnet_feature_enable_disable ("ip4-output", "nat44-hairpin-src",
2218                                        sw_if_index, !is_del, 0, 0);
2219         }
2220       goto fq;
2221     }
2222
2223   if (sm->num_workers > 1)
2224     {
2225       int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2226       if (rv)
2227         return rv;
2228       rv =
2229         ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
2230       if (rv)
2231         return rv;
2232       vnet_feature_enable_disable ("ip4-unicast",
2233                                    "nat44-out2in-worker-handoff",
2234                                    sw_if_index, !is_del, 0, 0);
2235       vnet_feature_enable_disable ("ip4-output",
2236                                    "nat44-in2out-output-worker-handoff",
2237                                    sw_if_index, !is_del, 0, 0);
2238     }
2239   else
2240     {
2241       if (sm->endpoint_dependent)
2242         {
2243           int rv =
2244             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2245           if (rv)
2246             return rv;
2247           rv =
2248             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2249                                                             !is_del);
2250           if (rv)
2251             return rv;
2252           vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
2253                                        sw_if_index, !is_del, 0, 0);
2254           vnet_feature_enable_disable ("ip4-output", "nat44-ed-in2out-output",
2255                                        sw_if_index, !is_del, 0, 0);
2256         }
2257       else
2258         {
2259           int rv =
2260             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2261           if (rv)
2262             return rv;
2263           rv =
2264             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2265                                                             !is_del);
2266           if (rv)
2267             return rv;
2268           vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in",
2269                                        sw_if_index, !is_del, 0, 0);
2270           vnet_feature_enable_disable ("ip4-output", "nat44-in2out-output",
2271                                        sw_if_index, !is_del, 0, 0);
2272         }
2273     }
2274
2275 fq:
2276   if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
2277     sm->fq_in2out_output_index =
2278       vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
2279
2280   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
2281     sm->fq_out2in_index =
2282       vlib_frame_queue_main_init (sm->out2in_node_index, 0);
2283
2284   /* *INDENT-OFF* */
2285   pool_foreach (i, sm->output_feature_interfaces,
2286   ({
2287     if (i->sw_if_index == sw_if_index)
2288       {
2289         if (is_del)
2290           pool_put (sm->output_feature_interfaces, i);
2291         else
2292           return VNET_API_ERROR_VALUE_EXIST;
2293
2294         goto fib;
2295       }
2296   }));
2297   /* *INDENT-ON* */
2298
2299   if (is_del)
2300     return VNET_API_ERROR_NO_SUCH_ENTRY;
2301
2302   pool_get (sm->output_feature_interfaces, i);
2303   i->sw_if_index = sw_if_index;
2304   i->flags = 0;
2305   nat_validate_counters (sm, sw_if_index);
2306   if (is_inside)
2307     i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
2308   else
2309     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
2310
2311   /* Add/delete external addresses to FIB */
2312 fib:
2313   if (is_inside)
2314     return 0;
2315
2316   /* *INDENT-OFF* */
2317   vec_foreach (ap, sm->addresses)
2318     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
2319
2320   pool_foreach (m, sm->static_mappings,
2321   ({
2322     if (!((is_addr_only_static_mapping(m)))  || (m->local_addr.as_u32 == m->external_addr.as_u32))
2323       continue;
2324
2325     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
2326   }));
2327   /* *INDENT-ON* */
2328
2329   return 0;
2330 }
2331
2332 int
2333 snat_set_workers (uword * bitmap)
2334 {
2335   snat_main_t *sm = &snat_main;
2336   int i, j = 0;
2337
2338   if (sm->num_workers < 2)
2339     return VNET_API_ERROR_FEATURE_DISABLED;
2340
2341   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
2342     return VNET_API_ERROR_INVALID_WORKER;
2343
2344   vec_free (sm->workers);
2345   /* *INDENT-OFF* */
2346   clib_bitmap_foreach (i, bitmap,
2347     ({
2348       vec_add1(sm->workers, i);
2349       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
2350       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
2351       j++;
2352     }));
2353   /* *INDENT-ON* */
2354
2355   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
2356
2357   return 0;
2358 }
2359
2360 static void
2361 snat_update_outside_fib (u32 sw_if_index, u32 new_fib_index,
2362                          u32 old_fib_index)
2363 {
2364   snat_main_t *sm = &snat_main;
2365   nat_outside_fib_t *outside_fib;
2366   snat_interface_t *i;
2367   u8 is_add = 1;
2368   u8 match = 0;
2369
2370   if (new_fib_index == old_fib_index)
2371     return;
2372
2373   if (!vec_len (sm->outside_fibs))
2374     return;
2375
2376   /* *INDENT-OFF* */
2377   pool_foreach (i, sm->interfaces,
2378     ({
2379       if (i->sw_if_index == sw_if_index)
2380         {
2381           if (!(nat_interface_is_outside (i)))
2382             return;
2383           match = 1;
2384         }
2385     }));
2386
2387   pool_foreach (i, sm->output_feature_interfaces,
2388     ({
2389       if (i->sw_if_index == sw_if_index)
2390         {
2391           if (!(nat_interface_is_outside (i)))
2392             return;
2393           match = 1;
2394         }
2395     }));
2396   /* *INDENT-ON* */
2397
2398   if (!match)
2399     return;
2400
2401   vec_foreach (outside_fib, sm->outside_fibs)
2402   {
2403     if (outside_fib->fib_index == old_fib_index)
2404       {
2405         outside_fib->refcount--;
2406         if (!outside_fib->refcount)
2407           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2408         break;
2409       }
2410   }
2411
2412   vec_foreach (outside_fib, sm->outside_fibs)
2413   {
2414     if (outside_fib->fib_index == new_fib_index)
2415       {
2416         outside_fib->refcount++;
2417         is_add = 0;
2418         break;
2419       }
2420   }
2421
2422   if (is_add)
2423     {
2424       vec_add2 (sm->outside_fibs, outside_fib, 1);
2425       outside_fib->refcount = 1;
2426       outside_fib->fib_index = new_fib_index;
2427     }
2428 }
2429
2430 static void
2431 snat_ip4_table_bind (ip4_main_t * im,
2432                      uword opaque,
2433                      u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
2434 {
2435   snat_update_outside_fib (sw_if_index, new_fib_index, old_fib_index);
2436 }
2437
2438 static void
2439 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
2440                                        uword opaque,
2441                                        u32 sw_if_index,
2442                                        ip4_address_t * address,
2443                                        u32 address_length,
2444                                        u32 if_address_index, u32 is_delete);
2445
2446 static void
2447 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
2448                                  uword opaque,
2449                                  u32 sw_if_index,
2450                                  ip4_address_t * address,
2451                                  u32 address_length,
2452                                  u32 if_address_index, u32 is_delete);
2453
2454 static int
2455 nat_alloc_addr_and_port_default (snat_address_t * addresses, u32 fib_index,
2456                                  u32 thread_index, nat_protocol_t proto,
2457                                  ip4_address_t * addr, u16 * port,
2458                                  u16 port_per_thread, u32 snat_thread_index);
2459
2460 void
2461 test_key_calc_split ()
2462 {
2463   ip4_address_t l_addr;
2464   l_addr.as_u8[0] = 1;
2465   l_addr.as_u8[1] = 1;
2466   l_addr.as_u8[2] = 1;
2467   l_addr.as_u8[3] = 1;
2468   ip4_address_t r_addr;
2469   r_addr.as_u8[0] = 2;
2470   r_addr.as_u8[1] = 2;
2471   r_addr.as_u8[2] = 2;
2472   r_addr.as_u8[3] = 2;
2473   u16 l_port = 40001;
2474   u16 r_port = 40301;
2475   u8 proto = 9;
2476   u32 fib_index = 9000001;
2477   u32 thread_index = 3000000001;
2478   u32 session_index = 3000000221;
2479   clib_bihash_kv_16_8_t kv;
2480   init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
2481               thread_index, session_index);
2482   ip4_address_t l_addr2;
2483   ip4_address_t r_addr2;
2484   clib_memset (&l_addr2, 0, sizeof (l_addr2));
2485   clib_memset (&r_addr2, 0, sizeof (r_addr2));
2486   u16 l_port2 = 0;
2487   u16 r_port2 = 0;
2488   u8 proto2 = 0;
2489   u32 fib_index2 = 0;
2490   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
2491                &r_port2);
2492   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2493   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
2494   ASSERT (l_port == l_port2);
2495   ASSERT (r_port == r_port2);
2496   ASSERT (proto == proto2);
2497   ASSERT (fib_index == fib_index2);
2498   ASSERT (thread_index == ed_value_get_thread_index (&kv));
2499   ASSERT (session_index == ed_value_get_session_index (&kv));
2500
2501   fib_index = 7001;
2502   proto = 5;
2503   nat_protocol_t proto3 = ~0;
2504   u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
2505   split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
2506   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2507   ASSERT (l_port == l_port2);
2508   ASSERT (proto == proto3);
2509   ASSERT (fib_index == fib_index2);
2510 }
2511
2512 static clib_error_t *
2513 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
2514 {
2515   snat_main_t *sm = &snat_main;
2516   u32 fib_index;
2517
2518   if (sm->endpoint_dependent)
2519     {
2520       // TODO: consider removing all NAT interfaces
2521
2522       if (!is_add)
2523         {
2524           fib_index = ip4_fib_index_from_table_id (table_id);
2525           if (fib_index != ~0)
2526             expire_per_vrf_sessions (fib_index);
2527         }
2528     }
2529   return 0;
2530 }
2531
2532 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
2533
2534
2535 static clib_error_t *
2536 snat_init (vlib_main_t * vm)
2537 {
2538   snat_main_t *sm = &snat_main;
2539   clib_error_t *error = 0;
2540   ip4_main_t *im = &ip4_main;
2541   ip_lookup_main_t *lm = &im->lookup_main;
2542   uword *p;
2543   vlib_thread_registration_t *tr;
2544   vlib_thread_main_t *tm = vlib_get_thread_main ();
2545   uword *bitmap = 0;
2546   u32 i;
2547   ip4_add_del_interface_address_callback_t cb4;
2548   vlib_node_t *node;
2549
2550   sm->vnet_main = vnet_get_main ();
2551   sm->ip4_main = im;
2552   sm->ip4_lookup_main = lm;
2553   sm->api_main = vlibapi_get_main ();
2554   sm->first_worker_index = 0;
2555   sm->num_workers = 0;
2556   sm->workers = 0;
2557   sm->port_per_thread = 0xffff - 1024;
2558   sm->fq_in2out_index = ~0;
2559   sm->fq_in2out_output_index = ~0;
2560   sm->fq_out2in_index = ~0;
2561
2562   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
2563   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
2564   sm->forwarding_enabled = 0;
2565   sm->log_class = vlib_log_register_class ("nat", 0);
2566   sm->log_level = SNAT_LOG_ERROR;
2567   sm->mss_clamping = 0;
2568
2569   node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
2570   sm->error_node_index = node->index;
2571
2572   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-in2out");
2573   sm->pre_in2out_node_index = node->index;
2574   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-out2in");
2575   sm->pre_out2in_node_index = node->index;
2576
2577   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-in2out");
2578   sm->pre_in2out_node_index = node->index;
2579
2580   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-out2in");
2581   sm->pre_out2in_node_index = node->index;
2582
2583   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out");
2584   sm->in2out_node_index = node->index;
2585   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output");
2586   sm->in2out_output_node_index = node->index;
2587   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-fast");
2588   sm->in2out_fast_node_index = node->index;
2589   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-slowpath");
2590   sm->in2out_slowpath_node_index = node->index;
2591   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output-slowpath");
2592   sm->in2out_slowpath_output_node_index = node->index;
2593
2594   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
2595   sm->ed_in2out_node_index = node->index;
2596   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-slowpath");
2597   sm->ed_in2out_slowpath_node_index = node->index;
2598
2599   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in");
2600   sm->out2in_node_index = node->index;
2601   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in-fast");
2602   sm->out2in_fast_node_index = node->index;
2603
2604   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
2605   sm->ed_out2in_node_index = node->index;
2606   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in-slowpath");
2607   sm->ed_out2in_slowpath_node_index = node->index;
2608
2609   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpinning");
2610   sm->hairpinning_node_index = node->index;
2611   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-dst");
2612   sm->hairpin_dst_node_index = node->index;
2613   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-src");
2614   sm->hairpin_src_node_index = node->index;
2615   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpinning");
2616   sm->ed_hairpinning_node_index = node->index;
2617   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-dst");
2618   sm->ed_hairpin_dst_node_index = node->index;
2619   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-src");
2620   sm->ed_hairpin_src_node_index = node->index;
2621
2622   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2623   if (p)
2624     {
2625       tr = (vlib_thread_registration_t *) p[0];
2626       if (tr)
2627         {
2628           sm->num_workers = tr->count;
2629           sm->first_worker_index = tr->first_index;
2630         }
2631     }
2632
2633   vec_validate (sm->per_thread_data, tm->n_vlib_mains - 1);
2634
2635   /* Use all available workers by default */
2636   if (sm->num_workers > 1)
2637     {
2638       for (i = 0; i < sm->num_workers; i++)
2639         bitmap = clib_bitmap_set (bitmap, i, 1);
2640       snat_set_workers (bitmap);
2641       clib_bitmap_free (bitmap);
2642     }
2643   else
2644     {
2645       sm->per_thread_data[0].snat_thread_index = 0;
2646     }
2647
2648   error = snat_api_init (vm, sm);
2649   if (error)
2650     return error;
2651
2652   /* Set up the interface address add/del callback */
2653   cb4.function = snat_ip4_add_del_interface_address_cb;
2654   cb4.function_opaque = 0;
2655
2656   vec_add1 (im->add_del_interface_address_callbacks, cb4);
2657
2658   cb4.function = nat_ip4_add_del_addr_only_sm_cb;
2659   cb4.function_opaque = 0;
2660
2661   vec_add1 (im->add_del_interface_address_callbacks, cb4);
2662
2663   nat_dpo_module_init ();
2664
2665   /* Init counters */
2666   sm->total_users.name = "total-users";
2667   sm->total_users.stat_segment_name = "/nat44/total-users";
2668   vlib_validate_simple_counter (&sm->total_users, 0);
2669   vlib_zero_simple_counter (&sm->total_users, 0);
2670   sm->total_sessions.name = "total-sessions";
2671   sm->total_sessions.stat_segment_name = "/nat44/total-sessions";
2672   vlib_validate_simple_counter (&sm->total_sessions, 0);
2673   vlib_zero_simple_counter (&sm->total_sessions, 0);
2674   sm->user_limit_reached.name = "user-limit-reached";
2675   sm->user_limit_reached.stat_segment_name = "/nat44/user-limit-reached";
2676   vlib_validate_simple_counter (&sm->user_limit_reached, 0);
2677   vlib_zero_simple_counter (&sm->user_limit_reached, 0);
2678
2679 #define _(x)                                            \
2680   sm->counters.fastpath.in2out.x.name = #x;             \
2681   sm->counters.fastpath.in2out.x.stat_segment_name =    \
2682       "/nat44/in2out/fastpath/" #x;                     \
2683   sm->counters.slowpath.in2out.x.name = #x;             \
2684   sm->counters.slowpath.in2out.x.stat_segment_name =    \
2685       "/nat44/in2out/slowpath/" #x;                     \
2686   sm->counters.fastpath.out2in.x.name = #x;             \
2687   sm->counters.fastpath.out2in.x.stat_segment_name =    \
2688       "/nat44/out2in/fastpath/" #x;                     \
2689   sm->counters.slowpath.out2in.x.name = #x;             \
2690   sm->counters.slowpath.out2in.x.stat_segment_name =    \
2691       "/nat44/out2in/slowpath/" #x;                     \
2692   sm->counters.fastpath.in2out_ed.x.name = #x;          \
2693   sm->counters.fastpath.in2out_ed.x.stat_segment_name = \
2694       "/nat44/ed/in2out/fastpath/" #x;                  \
2695   sm->counters.slowpath.in2out_ed.x.name = #x;          \
2696   sm->counters.slowpath.in2out_ed.x.stat_segment_name = \
2697       "/nat44/ed/in2out/slowpath/" #x;                  \
2698   sm->counters.fastpath.out2in_ed.x.name = #x;          \
2699   sm->counters.fastpath.out2in_ed.x.stat_segment_name = \
2700       "/nat44/ed/out2in/fastpath/" #x;                  \
2701   sm->counters.slowpath.out2in_ed.x.name = #x;          \
2702   sm->counters.slowpath.out2in_ed.x.stat_segment_name = \
2703       "/nat44/ed/out2in/slowpath/" #x;
2704   foreach_nat_counter;
2705 #undef _
2706   sm->counters.hairpinning.name = "hairpinning";
2707   sm->counters.hairpinning.stat_segment_name = "/nat44/hairpinning";
2708
2709   /* Init IPFIX logging */
2710   snat_ipfix_logging_init (vm);
2711
2712   /* Init NAT64 */
2713   error = nat64_init (vm);
2714   if (error)
2715     return error;
2716
2717   ip4_table_bind_callback_t cbt4 = {
2718     .function = snat_ip4_table_bind,
2719   };
2720   vec_add1 (ip4_main.table_bind_callbacks, cbt4);
2721
2722   nat_fib_src_hi = fib_source_allocate ("nat-hi",
2723                                         FIB_SOURCE_PRIORITY_HI,
2724                                         FIB_SOURCE_BH_SIMPLE);
2725   nat_fib_src_low = fib_source_allocate ("nat-low",
2726                                          FIB_SOURCE_PRIORITY_LOW,
2727                                          FIB_SOURCE_BH_SIMPLE);
2728
2729   test_key_calc_split ();
2730   return error;
2731 }
2732
2733 VLIB_INIT_FUNCTION (snat_init);
2734
2735 void
2736 snat_free_outside_address_and_port (snat_address_t * addresses,
2737                                     u32 thread_index,
2738                                     ip4_address_t * addr,
2739                                     u16 port, nat_protocol_t protocol)
2740 {
2741   snat_address_t *a;
2742   u32 address_index;
2743   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2744
2745   for (address_index = 0; address_index < vec_len (addresses);
2746        address_index++)
2747     {
2748       if (addresses[address_index].addr.as_u32 == addr->as_u32)
2749         break;
2750     }
2751
2752   ASSERT (address_index < vec_len (addresses));
2753
2754   a = addresses + address_index;
2755
2756   switch (protocol)
2757     {
2758 #define _(N, i, n, s) \
2759     case NAT_PROTOCOL_##N: \
2760       ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
2761       --a->busy_##n##_port_refcounts[port_host_byte_order]; \
2762       a->busy_##n##_ports--; \
2763       a->busy_##n##_ports_per_thread[thread_index]--; \
2764       break;
2765       foreach_nat_protocol
2766 #undef _
2767     default:
2768       nat_elog_info ("unknown protocol");
2769       return;
2770     }
2771 }
2772
2773 static int
2774 nat_set_outside_address_and_port (snat_address_t * addresses,
2775                                   u32 thread_index, ip4_address_t addr,
2776                                   u16 port, nat_protocol_t protocol)
2777 {
2778   snat_address_t *a = 0;
2779   u32 address_index;
2780   u16 port_host_byte_order = clib_net_to_host_u16 (port);
2781
2782   for (address_index = 0; address_index < vec_len (addresses);
2783        address_index++)
2784     {
2785       if (addresses[address_index].addr.as_u32 != addr.as_u32)
2786         continue;
2787
2788       a = addresses + address_index;
2789       switch (protocol)
2790         {
2791 #define _(N, j, n, s) \
2792         case NAT_PROTOCOL_##N: \
2793           if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
2794             return VNET_API_ERROR_INSTANCE_IN_USE; \
2795           ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
2796           a->busy_##n##_ports_per_thread[thread_index]++; \
2797           a->busy_##n##_ports++; \
2798           return 0;
2799           foreach_nat_protocol
2800 #undef _
2801         default:
2802           nat_elog_info ("unknown protocol");
2803           return 1;
2804         }
2805     }
2806
2807   return VNET_API_ERROR_NO_SUCH_ENTRY;
2808 }
2809
2810 int
2811 snat_static_mapping_match (snat_main_t * sm,
2812                            ip4_address_t match_addr,
2813                            u16 match_port,
2814                            u32 match_fib_index,
2815                            nat_protocol_t match_protocol,
2816                            ip4_address_t * mapping_addr,
2817                            u16 * mapping_port,
2818                            u32 * mapping_fib_index,
2819                            u8 by_external,
2820                            u8 * is_addr_only,
2821                            twice_nat_type_t * twice_nat,
2822                            lb_nat_type_t * lb, ip4_address_t * ext_host_addr,
2823                            u8 * is_identity_nat)
2824 {
2825   clib_bihash_kv_8_8_t kv, value;
2826   snat_static_mapping_t *m;
2827   clib_bihash_8_8_t *mapping_hash = &sm->static_mapping_by_local;
2828   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
2829   u8 backend_index;
2830   nat44_lb_addr_port_t *local;
2831
2832   if (by_external)
2833     {
2834       mapping_hash = &sm->static_mapping_by_external;
2835       init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
2836       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2837         {
2838           /* Try address only mapping */
2839           init_nat_k (&kv, match_addr, 0, 0, 0);
2840           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2841             return 1;
2842         }
2843
2844     }
2845   else
2846     {
2847       init_nat_k (&kv, match_addr, match_port, match_fib_index,
2848                   match_protocol);
2849       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2850         {
2851           /* Try address only mapping */
2852           init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
2853           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
2854             return 1;
2855         }
2856
2857     }
2858
2859   m = pool_elt_at_index (sm->static_mappings, value.value);
2860
2861   if (by_external)
2862     {
2863       if (is_lb_static_mapping (m))
2864         {
2865           if (PREDICT_FALSE (lb != 0))
2866             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
2867           if (m->affinity && !nat_affinity_find_and_lock (ext_host_addr[0],
2868                                                           match_addr,
2869                                                           match_protocol,
2870                                                           match_port,
2871                                                           &backend_index))
2872             {
2873               local = pool_elt_at_index (m->locals, backend_index);
2874               *mapping_addr = local->addr;
2875               *mapping_port = local->port;
2876               *mapping_fib_index = local->fib_index;
2877               goto end;
2878             }
2879           // pick locals matching this worker
2880           if (PREDICT_FALSE (sm->num_workers > 1))
2881             {
2882               u32 thread_index = vlib_get_thread_index ();
2883               /* *INDENT-OFF* */
2884               pool_foreach_index (i, m->locals,
2885               ({
2886                 local = pool_elt_at_index (m->locals, i);
2887
2888                 ip4_header_t ip = {
2889                   .src_address = local->addr,
2890                 };
2891
2892                 if (sm->worker_in2out_cb (&ip, m->fib_index, 0) ==
2893                     thread_index)
2894                   {
2895                     vec_add1 (tmp, i);
2896                   }
2897               }));
2898               /* *INDENT-ON* */
2899               ASSERT (vec_len (tmp) != 0);
2900             }
2901           else
2902             {
2903               /* *INDENT-OFF* */
2904               pool_foreach_index (i, m->locals,
2905               ({
2906                 vec_add1 (tmp, i);
2907               }));
2908               /* *INDENT-ON* */
2909             }
2910           hi = vec_len (tmp) - 1;
2911           local = pool_elt_at_index (m->locals, tmp[hi]);
2912           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
2913           while (lo < hi)
2914             {
2915               mid = ((hi - lo) >> 1) + lo;
2916               local = pool_elt_at_index (m->locals, tmp[mid]);
2917               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
2918             }
2919           local = pool_elt_at_index (m->locals, tmp[lo]);
2920           if (!(local->prefix >= rand))
2921             return 1;
2922           *mapping_addr = local->addr;
2923           *mapping_port = local->port;
2924           *mapping_fib_index = local->fib_index;
2925           if (m->affinity)
2926             {
2927               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
2928                                                 match_protocol, match_port,
2929                                                 tmp[lo], m->affinity,
2930                                                 m->affinity_per_service_list_head_index))
2931                 nat_elog_info ("create affinity record failed");
2932             }
2933           vec_free (tmp);
2934         }
2935       else
2936         {
2937           if (PREDICT_FALSE (lb != 0))
2938             *lb = NO_LB_NAT;
2939           *mapping_fib_index = m->fib_index;
2940           *mapping_addr = m->local_addr;
2941           /* Address only mapping doesn't change port */
2942           *mapping_port = is_addr_only_static_mapping (m) ? match_port
2943             : m->local_port;
2944         }
2945     }
2946   else
2947     {
2948       *mapping_addr = m->external_addr;
2949       /* Address only mapping doesn't change port */
2950       *mapping_port = is_addr_only_static_mapping (m) ? match_port
2951         : m->external_port;
2952       *mapping_fib_index = sm->outside_fib_index;
2953     }
2954
2955 end:
2956   if (PREDICT_FALSE (is_addr_only != 0))
2957     *is_addr_only = is_addr_only_static_mapping (m);
2958
2959   if (PREDICT_FALSE (twice_nat != 0))
2960     *twice_nat = m->twice_nat;
2961
2962   if (PREDICT_FALSE (is_identity_nat != 0))
2963     *is_identity_nat = is_identity_static_mapping (m);
2964
2965   return 0;
2966 }
2967
2968 int
2969 snat_alloc_outside_address_and_port (snat_address_t * addresses,
2970                                      u32 fib_index,
2971                                      u32 thread_index,
2972                                      nat_protocol_t proto,
2973                                      ip4_address_t * addr,
2974                                      u16 * port,
2975                                      u16 port_per_thread,
2976                                      u32 snat_thread_index)
2977 {
2978   snat_main_t *sm = &snat_main;
2979
2980   return sm->alloc_addr_and_port (addresses, fib_index, thread_index, proto,
2981                                   addr, port, port_per_thread,
2982                                   snat_thread_index);
2983 }
2984
2985 static int
2986 nat_alloc_addr_and_port_default (snat_address_t * addresses,
2987                                  u32 fib_index,
2988                                  u32 thread_index,
2989                                  nat_protocol_t proto,
2990                                  ip4_address_t * addr,
2991                                  u16 * port,
2992                                  u16 port_per_thread, u32 snat_thread_index)
2993 {
2994   int i;
2995   snat_address_t *a, *ga = 0;
2996   u32 portnum;
2997
2998   for (i = 0; i < vec_len (addresses); i++)
2999     {
3000       a = addresses + i;
3001       switch (proto)
3002         {
3003 #define _(N, j, n, s) \
3004         case NAT_PROTOCOL_##N: \
3005           if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
3006             { \
3007               if (a->fib_index == fib_index) \
3008                 { \
3009                   while (1) \
3010                     { \
3011                       portnum = (port_per_thread * \
3012                         snat_thread_index) + \
3013                         snat_random_port(0, port_per_thread - 1) + 1024; \
3014                       if (a->busy_##n##_port_refcounts[portnum]) \
3015                         continue; \
3016                       --a->busy_##n##_port_refcounts[portnum]; \
3017                       a->busy_##n##_ports_per_thread[thread_index]++; \
3018                       a->busy_##n##_ports++; \
3019                       *addr = a->addr; \
3020                       *port = clib_host_to_net_u16(portnum); \
3021                       return 0; \
3022                     } \
3023                 } \
3024               else if (a->fib_index == ~0) \
3025                 { \
3026                   ga = a; \
3027                 } \
3028             } \
3029           break;
3030           foreach_nat_protocol
3031 #undef _
3032         default:
3033           nat_elog_info ("unknown protocol");
3034           return 1;
3035         }
3036
3037     }
3038
3039   if (ga)
3040     {
3041       a = ga;
3042       switch (proto)
3043         {
3044 #define _(N, j, n, s) \
3045         case NAT_PROTOCOL_##N: \
3046           while (1) \
3047             { \
3048               portnum = (port_per_thread * \
3049                 snat_thread_index) + \
3050                 snat_random_port(0, port_per_thread - 1) + 1024; \
3051               if (a->busy_##n##_port_refcounts[portnum]) \
3052                 continue; \
3053               ++a->busy_##n##_port_refcounts[portnum]; \
3054               a->busy_##n##_ports_per_thread[thread_index]++; \
3055               a->busy_##n##_ports++; \
3056               *addr = a->addr; \
3057               *port = clib_host_to_net_u16(portnum); \
3058               return 0; \
3059             }
3060           break;
3061           foreach_nat_protocol
3062 #undef _
3063         default:
3064           nat_elog_info ("unknown protocol");
3065           return 1;
3066         }
3067     }
3068
3069   /* Totally out of translations to use... */
3070   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
3071   return 1;
3072 }
3073
3074 static int
3075 nat_alloc_addr_and_port_mape (snat_address_t * addresses, u32 fib_index,
3076                               u32 thread_index, nat_protocol_t proto,
3077                               ip4_address_t * addr, u16 * port,
3078                               u16 port_per_thread, u32 snat_thread_index)
3079 {
3080   snat_main_t *sm = &snat_main;
3081   snat_address_t *a = addresses;
3082   u16 m, ports, portnum, A, j;
3083   m = 16 - (sm->psid_offset + sm->psid_length);
3084   ports = (1 << (16 - sm->psid_length)) - (1 << m);
3085
3086   if (!vec_len (addresses))
3087     goto exhausted;
3088
3089   switch (proto)
3090     {
3091 #define _(N, i, n, s) \
3092     case NAT_PROTOCOL_##N: \
3093       if (a->busy_##n##_ports < ports) \
3094         { \
3095           while (1) \
3096             { \
3097               A = snat_random_port(1, pow2_mask(sm->psid_offset)); \
3098               j = snat_random_port(0, pow2_mask(m)); \
3099               portnum = A | (sm->psid << sm->psid_offset) | (j << (16 - m)); \
3100               if (a->busy_##n##_port_refcounts[portnum]) \
3101                 continue; \
3102               ++a->busy_##n##_port_refcounts[portnum]; \
3103               a->busy_##n##_ports++; \
3104               *addr = a->addr; \
3105               *port = clib_host_to_net_u16 (portnum); \
3106               return 0; \
3107             } \
3108         } \
3109       break;
3110       foreach_nat_protocol
3111 #undef _
3112     default:
3113       nat_elog_info ("unknown protocol");
3114       return 1;
3115     }
3116
3117 exhausted:
3118   /* Totally out of translations to use... */
3119   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
3120   return 1;
3121 }
3122
3123 static int
3124 nat_alloc_addr_and_port_range (snat_address_t * addresses, u32 fib_index,
3125                                u32 thread_index, nat_protocol_t proto,
3126                                ip4_address_t * addr, u16 * port,
3127                                u16 port_per_thread, u32 snat_thread_index)
3128 {
3129   snat_main_t *sm = &snat_main;
3130   snat_address_t *a = addresses;
3131   u16 portnum, ports;
3132
3133   ports = sm->end_port - sm->start_port + 1;
3134
3135   if (!vec_len (addresses))
3136     goto exhausted;
3137
3138   switch (proto)
3139     {
3140 #define _(N, i, n, s) \
3141     case NAT_PROTOCOL_##N: \
3142       if (a->busy_##n##_ports < ports) \
3143         { \
3144           while (1) \
3145             { \
3146               portnum = snat_random_port(sm->start_port, sm->end_port); \
3147               if (a->busy_##n##_port_refcounts[portnum]) \
3148                 continue; \
3149               ++a->busy_##n##_port_refcounts[portnum]; \
3150               a->busy_##n##_ports++; \
3151               *addr = a->addr; \
3152               *port = clib_host_to_net_u16 (portnum); \
3153               return 0; \
3154             } \
3155         } \
3156       break;
3157       foreach_nat_protocol
3158 #undef _
3159     default:
3160       nat_elog_info ("unknown protocol");
3161       return 1;
3162     }
3163
3164 exhausted:
3165   /* Totally out of translations to use... */
3166   snat_ipfix_logging_addresses_exhausted (thread_index, 0);
3167   return 1;
3168 }
3169
3170 void
3171 nat44_add_del_address_dpo (ip4_address_t addr, u8 is_add)
3172 {
3173   dpo_id_t dpo_v4 = DPO_INVALID;
3174   fib_prefix_t pfx = {
3175     .fp_proto = FIB_PROTOCOL_IP4,
3176     .fp_len = 32,
3177     .fp_addr.ip4.as_u32 = addr.as_u32,
3178   };
3179
3180   if (is_add)
3181     {
3182       nat_dpo_create (DPO_PROTO_IP4, 0, &dpo_v4);
3183       fib_table_entry_special_dpo_add (0, &pfx, nat_fib_src_hi,
3184                                        FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4);
3185       dpo_reset (&dpo_v4);
3186     }
3187   else
3188     {
3189       fib_table_entry_special_remove (0, &pfx, nat_fib_src_hi);
3190     }
3191 }
3192
3193 u8 *
3194 format_session_kvp (u8 * s, va_list * args)
3195 {
3196   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3197
3198   s = format (s, "%U session-index %llu", format_snat_key, v->key, v->value);
3199
3200   return s;
3201 }
3202
3203 u8 *
3204 format_static_mapping_kvp (u8 * s, va_list * args)
3205 {
3206   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3207
3208   s = format (s, "%U static-mapping-index %llu",
3209               format_snat_key, v->key, v->value);
3210
3211   return s;
3212 }
3213
3214 u8 *
3215 format_user_kvp (u8 * s, va_list * args)
3216 {
3217   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3218   snat_user_key_t k;
3219
3220   k.as_u64 = v->key;
3221
3222   s = format (s, "%U fib %d user-index %llu", format_ip4_address, &k.addr,
3223               k.fib_index, v->value);
3224
3225   return s;
3226 }
3227
3228 u8 *
3229 format_ed_session_kvp (u8 * s, va_list * args)
3230 {
3231   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
3232
3233   u8 proto;
3234   u16 r_port, l_port;
3235   ip4_address_t l_addr, r_addr;
3236   u32 fib_index;
3237
3238   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
3239   s =
3240     format (s,
3241             "local %U:%d remote %U:%d proto %U fib %d thread-index %u session-index %u",
3242             format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
3243             format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
3244             format_ip_protocol, proto, fib_index,
3245             ed_value_get_session_index (v), ed_value_get_thread_index (v));
3246
3247   return s;
3248 }
3249
3250 static u32
3251 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0,
3252                            u8 is_output)
3253 {
3254   snat_main_t *sm = &snat_main;
3255   u32 next_worker_index = 0;
3256   u32 hash;
3257
3258   next_worker_index = sm->first_worker_index;
3259   hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) +
3260     (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >> 24);
3261
3262   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
3263     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
3264   else
3265     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
3266
3267   return next_worker_index;
3268 }
3269
3270 static u32
3271 snat_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip0,
3272                            u32 rx_fib_index0, u8 is_output)
3273 {
3274   snat_main_t *sm = &snat_main;
3275   udp_header_t *udp;
3276   u16 port;
3277   clib_bihash_kv_8_8_t kv, value;
3278   snat_static_mapping_t *m;
3279   u32 proto;
3280   u32 next_worker_index = 0;
3281
3282   /* first try static mappings without port */
3283   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3284     {
3285       init_nat_k (&kv, ip0->dst_address, 0, rx_fib_index0, 0);
3286       if (!clib_bihash_search_8_8
3287           (&sm->static_mapping_by_external, &kv, &value))
3288         {
3289           m = pool_elt_at_index (sm->static_mappings, value.value);
3290           return m->workers[0];
3291         }
3292     }
3293
3294   proto = ip_proto_to_nat_proto (ip0->protocol);
3295   udp = ip4_next_header (ip0);
3296   port = udp->dst_port;
3297
3298   /* unknown protocol */
3299   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
3300     {
3301       /* use current thread */
3302       return vlib_get_thread_index ();
3303     }
3304
3305   if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_ICMP))
3306     {
3307       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3308       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3309       if (!icmp_type_is_error_message
3310           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3311         port = vnet_buffer (b)->ip.reass.l4_src_port;
3312       else
3313         {
3314           /* if error message, then it's not fragmented and we can access it */
3315           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3316           proto = ip_proto_to_nat_proto (inner_ip->protocol);
3317           void *l4_header = ip4_next_header (inner_ip);
3318           switch (proto)
3319             {
3320             case NAT_PROTOCOL_ICMP:
3321               icmp = (icmp46_header_t *) l4_header;
3322               echo = (icmp_echo_header_t *) (icmp + 1);
3323               port = echo->identifier;
3324               break;
3325             case NAT_PROTOCOL_UDP:
3326             case NAT_PROTOCOL_TCP:
3327               port = ((tcp_udp_header_t *) l4_header)->src_port;
3328               break;
3329             default:
3330               return vlib_get_thread_index ();
3331             }
3332         }
3333     }
3334
3335   /* try static mappings with port */
3336   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3337     {
3338       init_nat_k (&kv, ip0->dst_address, port, rx_fib_index0, proto);
3339       if (!clib_bihash_search_8_8
3340           (&sm->static_mapping_by_external, &kv, &value))
3341         {
3342           m = pool_elt_at_index (sm->static_mappings, value.value);
3343           return m->workers[0];
3344         }
3345     }
3346
3347   /* worker by outside port */
3348   next_worker_index = sm->first_worker_index;
3349   next_worker_index +=
3350     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3351   return next_worker_index;
3352 }
3353
3354 static u32
3355 nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index,
3356                                u8 is_output)
3357 {
3358   snat_main_t *sm = &snat_main;
3359   u32 next_worker_index = sm->first_worker_index;
3360   u32 hash;
3361
3362   clib_bihash_kv_16_8_t kv16, value16;
3363   snat_main_per_thread_data_t *tsm;
3364   udp_header_t *udp;
3365
3366   if (PREDICT_FALSE (is_output))
3367     {
3368       u32 fib_index = sm->outside_fib_index;
3369       nat_outside_fib_t *outside_fib;
3370       fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3371       fib_prefix_t pfx = {
3372         .fp_proto = FIB_PROTOCOL_IP4,
3373         .fp_len = 32,
3374         .fp_addr = {
3375                     .ip4.as_u32 = ip->dst_address.as_u32,
3376                     }
3377         ,
3378       };
3379
3380       udp = ip4_next_header (ip);
3381
3382       switch (vec_len (sm->outside_fibs))
3383         {
3384         case 0:
3385           fib_index = sm->outside_fib_index;
3386           break;
3387         case 1:
3388           fib_index = sm->outside_fibs[0].fib_index;
3389           break;
3390         default:
3391             /* *INDENT-OFF* */
3392             vec_foreach (outside_fib, sm->outside_fibs)
3393               {
3394                 fei = fib_table_lookup (outside_fib->fib_index, &pfx);
3395                 if (FIB_NODE_INDEX_INVALID != fei)
3396                   {
3397                     if (fib_entry_get_resolving_interface (fei) != ~0)
3398                       {
3399                         fib_index = outside_fib->fib_index;
3400                         break;
3401                       }
3402                   }
3403               }
3404             /* *INDENT-ON* */
3405           break;
3406         }
3407
3408       init_ed_k (&kv16, ip->src_address, udp->src_port, ip->dst_address,
3409                  udp->dst_port, fib_index, ip->protocol);
3410
3411       if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3412                                                   &kv16, &value16)))
3413         {
3414           tsm =
3415             vec_elt_at_index (sm->per_thread_data,
3416                               ed_value_get_thread_index (&value16));
3417           next_worker_index += tsm->thread_index;
3418
3419           nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE (session)",
3420                                   next_worker_index, fib_index,
3421                                   clib_net_to_host_u32 (ip->
3422                                                         src_address.as_u32),
3423                                   clib_net_to_host_u32 (ip->
3424                                                         dst_address.as_u32));
3425
3426           return next_worker_index;
3427         }
3428     }
3429
3430   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3431     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3432
3433   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
3434     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
3435   else
3436     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
3437
3438   if (PREDICT_TRUE (!is_output))
3439     {
3440       nat_elog_debug_handoff ("HANDOFF IN2OUT",
3441                               next_worker_index, rx_fib_index,
3442                               clib_net_to_host_u32 (ip->src_address.as_u32),
3443                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3444     }
3445   else
3446     {
3447       nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE",
3448                               next_worker_index, rx_fib_index,
3449                               clib_net_to_host_u32 (ip->src_address.as_u32),
3450                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3451     }
3452
3453   return next_worker_index;
3454 }
3455
3456 static u32
3457 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
3458                                u32 rx_fib_index, u8 is_output)
3459 {
3460   snat_main_t *sm = &snat_main;
3461   clib_bihash_kv_8_8_t kv, value;
3462   clib_bihash_kv_16_8_t kv16, value16;
3463   snat_main_per_thread_data_t *tsm;
3464
3465   u32 proto, next_worker_index = 0;
3466   udp_header_t *udp;
3467   u16 port;
3468   snat_static_mapping_t *m;
3469   u32 hash;
3470
3471   proto = ip_proto_to_nat_proto (ip->protocol);
3472
3473   if (PREDICT_TRUE (proto == NAT_PROTOCOL_UDP || proto == NAT_PROTOCOL_TCP))
3474     {
3475       udp = ip4_next_header (ip);
3476
3477       init_ed_k (&kv16, ip->dst_address, udp->dst_port, ip->src_address,
3478                  udp->src_port, rx_fib_index, ip->protocol);
3479
3480       if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3481                                                   &kv16, &value16)))
3482         {
3483           tsm =
3484             vec_elt_at_index (sm->per_thread_data,
3485                               ed_value_get_thread_index (&value16));
3486           vnet_buffer2 (b)->nat.ed_out2in_nat_session_index =
3487             ed_value_get_session_index (&value16);
3488           next_worker_index = sm->first_worker_index + tsm->thread_index;
3489           nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
3490                                   next_worker_index, rx_fib_index,
3491                                   clib_net_to_host_u32 (ip->
3492                                                         src_address.as_u32),
3493                                   clib_net_to_host_u32 (ip->
3494                                                         dst_address.as_u32));
3495           return next_worker_index;
3496         }
3497     }
3498   else if (proto == NAT_PROTOCOL_ICMP)
3499     {
3500       if (!get_icmp_o2i_ed_key (b, ip, rx_fib_index, ~0, ~0, 0, 0, 0, &kv16))
3501         {
3502           if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3503                                                       &kv16, &value16)))
3504             {
3505               tsm =
3506                 vec_elt_at_index (sm->per_thread_data,
3507                                   ed_value_get_thread_index (&value16));
3508               next_worker_index = sm->first_worker_index + tsm->thread_index;
3509               nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
3510                                       next_worker_index, rx_fib_index,
3511                                       clib_net_to_host_u32 (ip->
3512                                                             src_address.as_u32),
3513                                       clib_net_to_host_u32 (ip->
3514                                                             dst_address.as_u32));
3515               return next_worker_index;
3516             }
3517         }
3518     }
3519
3520   /* first try static mappings without port */
3521   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3522     {
3523       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
3524       if (!clib_bihash_search_8_8
3525           (&sm->static_mapping_by_external, &kv, &value))
3526         {
3527           m = pool_elt_at_index (sm->static_mappings, value.value);
3528           next_worker_index = m->workers[0];
3529           goto done;
3530         }
3531     }
3532
3533   /* unknown protocol */
3534   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
3535     {
3536       /* use current thread */
3537       next_worker_index = vlib_get_thread_index ();
3538       goto done;
3539     }
3540
3541   udp = ip4_next_header (ip);
3542   port = udp->dst_port;
3543
3544   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
3545     {
3546       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3547       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3548       if (!icmp_type_is_error_message
3549           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3550         port = vnet_buffer (b)->ip.reass.l4_src_port;
3551       else
3552         {
3553           /* if error message, then it's not fragmented and we can access it */
3554           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3555           proto = ip_proto_to_nat_proto (inner_ip->protocol);
3556           void *l4_header = ip4_next_header (inner_ip);
3557           switch (proto)
3558             {
3559             case NAT_PROTOCOL_ICMP:
3560               icmp = (icmp46_header_t *) l4_header;
3561               echo = (icmp_echo_header_t *) (icmp + 1);
3562               port = echo->identifier;
3563               break;
3564             case NAT_PROTOCOL_UDP:
3565             case NAT_PROTOCOL_TCP:
3566               port = ((tcp_udp_header_t *) l4_header)->src_port;
3567               break;
3568             default:
3569               next_worker_index = vlib_get_thread_index ();
3570               goto done;
3571             }
3572         }
3573     }
3574
3575   /* try static mappings with port */
3576   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3577     {
3578       init_nat_k (&kv, ip->dst_address, proto, 0, port);
3579       if (!clib_bihash_search_8_8
3580           (&sm->static_mapping_by_external, &kv, &value))
3581         {
3582           m = pool_elt_at_index (sm->static_mappings, value.value);
3583           if (!is_lb_static_mapping (m))
3584             {
3585               next_worker_index = m->workers[0];
3586               goto done;
3587             }
3588
3589           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3590             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3591
3592           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
3593             next_worker_index =
3594               m->workers[hash & (_vec_len (m->workers) - 1)];
3595           else
3596             next_worker_index = m->workers[hash % _vec_len (m->workers)];
3597           goto done;
3598         }
3599     }
3600
3601   /* worker by outside port */
3602   next_worker_index = sm->first_worker_index;
3603   next_worker_index +=
3604     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3605
3606 done:
3607   nat_elog_debug_handoff ("HANDOFF OUT2IN", next_worker_index, rx_fib_index,
3608                           clib_net_to_host_u32 (ip->src_address.as_u32),
3609                           clib_net_to_host_u32 (ip->dst_address.as_u32));
3610   return next_worker_index;
3611 }
3612
3613 void
3614 nat_ha_sadd_cb (ip4_address_t * in_addr, u16 in_port,
3615                 ip4_address_t * out_addr, u16 out_port,
3616                 ip4_address_t * eh_addr, u16 eh_port,
3617                 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
3618                 u32 fib_index, u16 flags, u32 thread_index)
3619 {
3620   snat_main_t *sm = &snat_main;
3621   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
3622   snat_user_t *u;
3623   snat_session_t *s;
3624   clib_bihash_kv_8_8_t kv;
3625   vlib_main_t *vm = vlib_get_main ();
3626   f64 now = vlib_time_now (vm);
3627   nat_outside_fib_t *outside_fib;
3628   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3629   fib_prefix_t pfx = {
3630     .fp_proto = FIB_PROTOCOL_IP4,
3631     .fp_len = 32,
3632     .fp_addr = {
3633                 .ip4.as_u32 = eh_addr->as_u32,
3634                 },
3635   };
3636
3637   if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
3638     {
3639       if (nat_set_outside_address_and_port
3640           (sm->addresses, thread_index, *out_addr, out_port, proto))
3641         return;
3642     }
3643
3644   u = nat_user_get_or_create (sm, in_addr, fib_index, thread_index);
3645   if (!u)
3646     return;
3647
3648   s = nat_session_alloc_or_recycle (sm, u, thread_index, now);
3649   if (!s)
3650     return;
3651
3652   if (sm->endpoint_dependent)
3653     {
3654       nat_ed_lru_insert (tsm, s, now, nat_proto_to_ip_proto (proto));
3655     }
3656
3657   s->out2in.addr.as_u32 = out_addr->as_u32;
3658   s->out2in.port = out_port;
3659   s->nat_proto = proto;
3660   s->last_heard = now;
3661   s->flags = flags;
3662   s->ext_host_addr.as_u32 = eh_addr->as_u32;
3663   s->ext_host_port = eh_port;
3664   user_session_increment (sm, u, snat_is_session_static (s));
3665   switch (vec_len (sm->outside_fibs))
3666     {
3667     case 0:
3668       s->out2in.fib_index = sm->outside_fib_index;
3669       break;
3670     case 1:
3671       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
3672       break;
3673     default:
3674       /* *INDENT-OFF* */
3675       vec_foreach (outside_fib, sm->outside_fibs)
3676         {
3677           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
3678           if (FIB_NODE_INDEX_INVALID != fei)
3679             {
3680               if (fib_entry_get_resolving_interface (fei) != ~0)
3681                 {
3682                   s->out2in.fib_index = outside_fib->fib_index;
3683                   break;
3684                 }
3685             }
3686         }
3687       /* *INDENT-ON* */
3688       break;
3689     }
3690   init_nat_o2i_kv (&kv, s, s - tsm->sessions);
3691   if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 1))
3692     nat_elog_warn ("out2in key add failed");
3693
3694   s->in2out.addr.as_u32 = in_addr->as_u32;
3695   s->in2out.port = in_port;
3696   s->in2out.fib_index = fib_index;
3697   init_nat_i2o_kv (&kv, s, s - tsm->sessions);
3698   if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 1))
3699     nat_elog_warn ("in2out key add failed");
3700 }
3701
3702 void
3703 nat_ha_sdel_cb (ip4_address_t * out_addr, u16 out_port,
3704                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
3705                 u32 ti)
3706 {
3707   snat_main_t *sm = &snat_main;
3708   clib_bihash_kv_8_8_t kv, value;
3709   u32 thread_index;
3710   snat_session_t *s;
3711   snat_main_per_thread_data_t *tsm;
3712
3713   if (sm->num_workers > 1)
3714     thread_index =
3715       sm->first_worker_index +
3716       (sm->workers[(clib_net_to_host_u16 (out_port) -
3717                     1024) / sm->port_per_thread]);
3718   else
3719     thread_index = sm->num_workers;
3720   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
3721
3722   init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
3723   if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
3724     return;
3725
3726   s = pool_elt_at_index (tsm->sessions, value.value);
3727   nat_free_session_data (sm, s, thread_index, 1);
3728   nat44_delete_session (sm, s, thread_index);
3729 }
3730
3731 void
3732 nat_ha_sref_cb (ip4_address_t * out_addr, u16 out_port,
3733                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
3734                 u32 total_pkts, u64 total_bytes, u32 thread_index)
3735 {
3736   snat_main_t *sm = &snat_main;
3737   clib_bihash_kv_8_8_t kv, value;
3738   snat_session_t *s;
3739   snat_main_per_thread_data_t *tsm;
3740
3741   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
3742
3743   init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
3744   if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
3745     return;
3746
3747   s = pool_elt_at_index (tsm->sessions, value.value);
3748   s->total_pkts = total_pkts;
3749   s->total_bytes = total_bytes;
3750 }
3751
3752 void
3753 nat_ha_sadd_ed_cb (ip4_address_t * in_addr, u16 in_port,
3754                    ip4_address_t * out_addr, u16 out_port,
3755                    ip4_address_t * eh_addr, u16 eh_port,
3756                    ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
3757                    u32 fib_index, u16 flags, u32 thread_index)
3758 {
3759   snat_main_t *sm = &snat_main;
3760   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
3761   snat_session_t *s;
3762   clib_bihash_kv_16_8_t kv;
3763   vlib_main_t *vm = vlib_get_main ();
3764   f64 now = vlib_time_now (vm);
3765   nat_outside_fib_t *outside_fib;
3766   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3767   fib_prefix_t pfx = {
3768     .fp_proto = FIB_PROTOCOL_IP4,
3769     .fp_len = 32,
3770     .fp_addr = {
3771                 .ip4.as_u32 = eh_addr->as_u32,
3772                 },
3773   };
3774
3775
3776   if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
3777     {
3778       if (nat_set_outside_address_and_port
3779           (sm->addresses, thread_index, *out_addr, out_port, proto))
3780         return;
3781     }
3782
3783   if (flags & SNAT_SESSION_FLAG_TWICE_NAT)
3784     {
3785       if (nat_set_outside_address_and_port
3786           (sm->addresses, thread_index, *ehn_addr, ehn_port, proto))
3787         return;
3788     }
3789
3790   s = nat_ed_session_alloc (sm, thread_index, now, proto);
3791   if (!s)
3792     return;
3793
3794   s->last_heard = now;
3795   s->flags = flags;
3796   s->ext_host_nat_addr.as_u32 = s->ext_host_addr.as_u32 = eh_addr->as_u32;
3797   s->ext_host_nat_port = s->ext_host_port = eh_port;
3798   if (is_twice_nat_session (s))
3799     {
3800       s->ext_host_nat_addr.as_u32 = ehn_addr->as_u32;
3801       s->ext_host_nat_port = ehn_port;
3802     }
3803   switch (vec_len (sm->outside_fibs))
3804     {
3805     case 0:
3806       s->out2in.fib_index = sm->outside_fib_index;
3807       break;
3808     case 1:
3809       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
3810       break;
3811     default:
3812       /* *INDENT-OFF* */
3813       vec_foreach (outside_fib, sm->outside_fibs)
3814         {
3815           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
3816           if (FIB_NODE_INDEX_INVALID != fei)
3817             {
3818               if (fib_entry_get_resolving_interface (fei) != ~0)
3819                 {
3820                   s->out2in.fib_index = outside_fib->fib_index;
3821                   break;
3822                 }
3823             }
3824         }
3825       /* *INDENT-ON* */
3826       break;
3827     }
3828   s->nat_proto = proto;
3829   s->out2in.addr.as_u32 = out_addr->as_u32;
3830   s->out2in.port = out_port;
3831
3832   s->in2out.addr.as_u32 = in_addr->as_u32;
3833   s->in2out.port = in_port;
3834   s->in2out.fib_index = fib_index;
3835
3836   init_ed_kv (&kv, *in_addr, in_port, s->ext_host_nat_addr,
3837               s->ext_host_nat_port, fib_index, nat_proto_to_ip_proto (proto),
3838               thread_index, s - tsm->sessions);
3839   if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
3840     nat_elog_warn ("in2out key add failed");
3841
3842   init_ed_kv (&kv, *out_addr, out_port, *eh_addr, eh_port,
3843               s->out2in.fib_index, nat_proto_to_ip_proto (proto),
3844               thread_index, s - tsm->sessions);
3845   if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &kv, 1))
3846     nat_elog_warn ("out2in key add failed");
3847 }
3848
3849 void
3850 nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
3851                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
3852                    u32 fib_index, u32 ti)
3853 {
3854   snat_main_t *sm = &snat_main;
3855   clib_bihash_kv_16_8_t kv, value;
3856   u32 thread_index;
3857   snat_session_t *s;
3858   snat_main_per_thread_data_t *tsm;
3859
3860   if (sm->num_workers > 1)
3861     thread_index =
3862       sm->first_worker_index +
3863       (sm->workers[(clib_net_to_host_u16 (out_port) -
3864                     1024) / sm->port_per_thread]);
3865   else
3866     thread_index = sm->num_workers;
3867   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
3868
3869   init_ed_k (&kv, *out_addr, out_port, *eh_addr, eh_port, fib_index, proto);
3870   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
3871     return;
3872
3873   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3874   nat_free_session_data (sm, s, thread_index, 1);
3875   nat44_delete_session (sm, s, thread_index);
3876 }
3877
3878 void
3879 nat_ha_sref_ed_cb (ip4_address_t * out_addr, u16 out_port,
3880                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
3881                    u32 fib_index, u32 total_pkts, u64 total_bytes,
3882                    u32 thread_index)
3883 {
3884   snat_main_t *sm = &snat_main;
3885   clib_bihash_kv_16_8_t kv, value;
3886   snat_session_t *s;
3887   snat_main_per_thread_data_t *tsm;
3888
3889   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
3890
3891   init_ed_k (&kv, *out_addr, out_port, *eh_addr, eh_port, fib_index, proto);
3892   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
3893     return;
3894
3895   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
3896   s->total_pkts = total_pkts;
3897   s->total_bytes = total_bytes;
3898 }
3899
3900 static u32
3901 nat_calc_bihash_buckets (u32 n_elts)
3902 {
3903   return 1 << (max_log2 (n_elts >> 1) + 1);
3904 }
3905
3906 static u32
3907 nat_calc_bihash_memory (u32 n_buckets, uword kv_size)
3908 {
3909   return n_buckets * (8 + kv_size * 4);
3910 }
3911
3912 void
3913 nat44_db_init (snat_main_per_thread_data_t * tsm)
3914 {
3915   snat_main_t *sm = &snat_main;
3916
3917   pool_alloc (tsm->sessions, sm->max_translations_per_thread);
3918   pool_alloc (tsm->lru_pool, sm->max_translations_per_thread);
3919
3920   dlist_elt_t *head;
3921
3922   pool_get (tsm->lru_pool, head);
3923   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
3924   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
3925
3926   pool_get (tsm->lru_pool, head);
3927   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
3928   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
3929
3930   pool_get (tsm->lru_pool, head);
3931   tsm->udp_lru_head_index = head - tsm->lru_pool;
3932   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
3933
3934   pool_get (tsm->lru_pool, head);
3935   tsm->icmp_lru_head_index = head - tsm->lru_pool;
3936   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
3937
3938   pool_get (tsm->lru_pool, head);
3939   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
3940   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
3941
3942   if (sm->endpoint_dependent)
3943     {
3944       clib_bihash_init_16_8 (&tsm->in2out_ed, "in2out-ed",
3945                              sm->translation_buckets,
3946                              sm->translation_memory_size);
3947       clib_bihash_set_kvp_format_fn_16_8 (&tsm->in2out_ed,
3948                                           format_ed_session_kvp);
3949
3950     }
3951   else
3952     {
3953       clib_bihash_init_8_8 (&tsm->in2out, "in2out",
3954                             sm->translation_buckets,
3955                             sm->translation_memory_size);
3956       clib_bihash_set_kvp_format_fn_8_8 (&tsm->in2out, format_session_kvp);
3957       clib_bihash_init_8_8 (&tsm->out2in, "out2in",
3958                             sm->translation_buckets,
3959                             sm->translation_memory_size);
3960       clib_bihash_set_kvp_format_fn_8_8 (&tsm->out2in, format_session_kvp);
3961     }
3962
3963   // TODO: resolve static mappings (put only to !ED)
3964   pool_alloc (tsm->list_pool, sm->max_translations_per_thread);
3965   clib_bihash_init_8_8 (&tsm->user_hash, "users", sm->user_buckets,
3966                         sm->user_memory_size);
3967   clib_bihash_set_kvp_format_fn_8_8 (&tsm->user_hash, format_user_kvp);
3968 }
3969
3970 void
3971 nat44_db_free (snat_main_per_thread_data_t * tsm)
3972 {
3973   snat_main_t *sm = &snat_main;
3974
3975   pool_free (tsm->sessions);
3976   pool_free (tsm->lru_pool);
3977
3978   if (sm->endpoint_dependent)
3979     {
3980       clib_bihash_free_16_8 (&tsm->in2out_ed);
3981       vec_free (tsm->per_vrf_sessions_vec);
3982     }
3983   else
3984     {
3985       clib_bihash_free_8_8 (&tsm->in2out);
3986       clib_bihash_free_8_8 (&tsm->out2in);
3987     }
3988
3989   // TODO: resolve static mappings (put only to !ED)
3990   pool_free (tsm->users);
3991   pool_free (tsm->list_pool);
3992   clib_bihash_free_8_8 (&tsm->user_hash);
3993 }
3994
3995 void
3996 nat44_sessions_clear ()
3997 {
3998   snat_main_t *sm = &snat_main;
3999   snat_main_per_thread_data_t *tsm;
4000
4001   if (sm->endpoint_dependent)
4002     {
4003       clib_bihash_free_16_8 (&sm->out2in_ed);
4004       clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
4005                              clib_max (1, sm->num_workers) *
4006                              sm->translation_buckets,
4007                              clib_max (1, sm->num_workers) *
4008                              sm->translation_memory_size);
4009       clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed,
4010                                           format_ed_session_kvp);
4011     }
4012
4013   /* *INDENT-OFF* */
4014   vec_foreach (tsm, sm->per_thread_data)
4015     {
4016       u32 ti;
4017
4018       nat44_db_free (tsm);
4019       nat44_db_init (tsm);
4020
4021       ti = tsm->snat_thread_index;
4022       vlib_set_simple_counter (&sm->total_users, ti, 0, 0);
4023       vlib_set_simple_counter (&sm->total_sessions, ti, 0, 0);
4024     }
4025   /* *INDENT-ON* */
4026 }
4027
4028 static clib_error_t *
4029 snat_config (vlib_main_t * vm, unformat_input_t * input)
4030 {
4031   snat_main_t *sm = &snat_main;
4032   snat_main_per_thread_data_t *tsm;
4033
4034   u32 static_mapping_buckets = 1024;
4035   uword static_mapping_memory_size = 64 << 20;
4036
4037   u32 nat64_bib_buckets = 1024;
4038   u32 nat64_bib_memory_size = 128 << 20;
4039
4040   u32 nat64_st_buckets = 2048;
4041   uword nat64_st_memory_size = 256 << 20;
4042
4043   u32 max_users_per_thread = 0;
4044   u32 user_memory_size = 0;
4045   u32 max_translations_per_thread = 0;
4046   u32 translation_memory_size = 0;
4047
4048   u32 max_translations_per_user = ~0;
4049
4050   u32 outside_vrf_id = 0;
4051   u32 outside_ip6_vrf_id = 0;
4052   u32 inside_vrf_id = 0;
4053   u8 static_mapping_only = 0;
4054   u8 static_mapping_connection_tracking = 0;
4055
4056   u32 udp_timeout = SNAT_UDP_TIMEOUT;
4057   u32 icmp_timeout = SNAT_ICMP_TIMEOUT;
4058   u32 tcp_transitory_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
4059   u32 tcp_established_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
4060
4061   sm->out2in_dpo = 0;
4062   sm->endpoint_dependent = 0;
4063
4064   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
4065     {
4066       if (unformat
4067           (input, "max translations per thread %d",
4068            &max_translations_per_thread))
4069         ;
4070       else if (unformat (input, "udp timeout %d", &udp_timeout))
4071         ;
4072       else if (unformat (input, "icmp timeout %d", &icmp_timeout))
4073         ;
4074       else if (unformat (input, "tcp transitory timeout %d",
4075                          &tcp_transitory_timeout));
4076       else if (unformat (input, "tcp established timeout %d",
4077                          &tcp_established_timeout));
4078       else if (unformat (input, "translation hash memory %d",
4079                          &translation_memory_size));
4080       else
4081         if (unformat
4082             (input, "max users per thread %d", &max_users_per_thread))
4083         ;
4084       else if (unformat (input, "user hash memory %d", &user_memory_size))
4085         ;
4086       else if (unformat (input, "max translations per user %d",
4087                          &max_translations_per_user))
4088         ;
4089       else if (unformat (input, "outside VRF id %d", &outside_vrf_id))
4090         ;
4091       else if (unformat (input, "outside ip6 VRF id %d", &outside_ip6_vrf_id))
4092         ;
4093       else if (unformat (input, "inside VRF id %d", &inside_vrf_id))
4094         ;
4095       else if (unformat (input, "static mapping only"))
4096         {
4097           static_mapping_only = 1;
4098           if (unformat (input, "connection tracking"))
4099             static_mapping_connection_tracking = 1;
4100         }
4101       else if (unformat (input, "nat64 bib hash buckets %d",
4102                          &nat64_bib_buckets))
4103         ;
4104       else if (unformat (input, "nat64 bib hash memory %d",
4105                          &nat64_bib_memory_size))
4106         ;
4107       else
4108         if (unformat (input, "nat64 st hash buckets %d", &nat64_st_buckets))
4109         ;
4110       else if (unformat (input, "nat64 st hash memory %d",
4111                          &nat64_st_memory_size))
4112         ;
4113       else if (unformat (input, "out2in dpo"))
4114         sm->out2in_dpo = 1;
4115       else if (unformat (input, "endpoint-dependent"))
4116         sm->endpoint_dependent = 1;
4117       else
4118         return clib_error_return (0, "unknown input '%U'",
4119                                   format_unformat_error, input);
4120     }
4121
4122   if (static_mapping_only && (sm->endpoint_dependent))
4123     return clib_error_return (0,
4124                               "static mapping only mode available only for simple nat");
4125
4126   if (sm->out2in_dpo && (sm->endpoint_dependent))
4127     return clib_error_return (0,
4128                               "out2in dpo mode available only for simple nat");
4129   if (sm->endpoint_dependent && max_users_per_thread > 0)
4130     {
4131       return clib_error_return (0,
4132                                 "setting 'max users' in endpoint-dependent mode is not supported");
4133     }
4134
4135   if (sm->endpoint_dependent && max_translations_per_user != ~0)
4136     {
4137       return clib_error_return (0,
4138                                 "setting 'max translations per user' in endpoint-dependent mode is not supported");
4139     }
4140
4141   /* optionally configurable timeouts for testing purposes */
4142   sm->udp_timeout = udp_timeout;
4143   sm->tcp_transitory_timeout = tcp_transitory_timeout;
4144   sm->tcp_established_timeout = tcp_established_timeout;
4145   sm->icmp_timeout = icmp_timeout;
4146
4147   if (0 == max_users_per_thread)
4148     {
4149       max_users_per_thread = 1024;
4150     }
4151   sm->max_users_per_thread = max_users_per_thread;
4152   sm->user_buckets = nat_calc_bihash_buckets (sm->max_users_per_thread);
4153
4154   if (0 == max_translations_per_thread)
4155     {
4156       // default value based on legacy setting of load factor 10 * default
4157       // translation buckets 1024
4158       max_translations_per_thread = 10 * 1024;
4159     }
4160   sm->max_translations_per_thread = max_translations_per_thread;
4161   sm->translation_buckets =
4162     nat_calc_bihash_buckets (sm->max_translations_per_thread);
4163   if (0 == translation_memory_size)
4164     {
4165       translation_memory_size =
4166         nat_calc_bihash_memory (sm->translation_buckets,
4167                                 sizeof (clib_bihash_16_8_t));
4168     }
4169   sm->translation_memory_size = translation_memory_size;
4170   if (0 == user_memory_size)
4171     {
4172       user_memory_size =
4173         nat_calc_bihash_memory (sm->max_users_per_thread,
4174                                 sizeof (clib_bihash_8_8_t));
4175     }
4176   sm->user_memory_size = user_memory_size;
4177   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
4178
4179   sm->max_translations_per_user = max_translations_per_user == ~0 ?
4180     sm->max_translations_per_thread : max_translations_per_user;
4181
4182   sm->outside_vrf_id = outside_vrf_id;
4183   sm->outside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
4184                                                              outside_vrf_id,
4185                                                              nat_fib_src_hi);
4186   sm->inside_vrf_id = inside_vrf_id;
4187   sm->inside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
4188                                                             inside_vrf_id,
4189                                                             nat_fib_src_hi);
4190   sm->static_mapping_only = static_mapping_only;
4191   sm->static_mapping_connection_tracking = static_mapping_connection_tracking;
4192
4193   nat64_set_hash (nat64_bib_buckets, nat64_bib_memory_size, nat64_st_buckets,
4194                   nat64_st_memory_size);
4195
4196   if (sm->endpoint_dependent)
4197     {
4198       sm->worker_in2out_cb = nat44_ed_get_worker_in2out_cb;
4199       sm->worker_out2in_cb = nat44_ed_get_worker_out2in_cb;
4200
4201       sm->in2out_node_index = nat44_ed_in2out_node.index;
4202       sm->in2out_output_node_index = nat44_ed_in2out_output_node.index;
4203       sm->out2in_node_index = nat44_ed_out2in_node.index;
4204
4205       sm->icmp_match_in2out_cb = icmp_match_in2out_ed;
4206       sm->icmp_match_out2in_cb = icmp_match_out2in_ed;
4207       nat_affinity_init (vm);
4208       nat_ha_init (vm, nat_ha_sadd_ed_cb, nat_ha_sdel_ed_cb,
4209                    nat_ha_sref_ed_cb);
4210       clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
4211                              clib_max (1, sm->num_workers) *
4212                              sm->translation_buckets,
4213                              clib_max (1, sm->num_workers) *
4214                              sm->translation_memory_size);
4215       clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed,
4216                                           format_ed_session_kvp);
4217     }
4218   else
4219     {
4220       sm->worker_in2out_cb = snat_get_worker_in2out_cb;
4221       sm->worker_out2in_cb = snat_get_worker_out2in_cb;
4222
4223       sm->in2out_node_index = snat_in2out_node.index;
4224       sm->in2out_output_node_index = snat_in2out_output_node.index;
4225       sm->out2in_node_index = snat_out2in_node.index;
4226
4227       sm->icmp_match_in2out_cb = icmp_match_in2out_slow;
4228       sm->icmp_match_out2in_cb = icmp_match_out2in_slow;
4229       nat_ha_init (vm, nat_ha_sadd_cb, nat_ha_sdel_cb, nat_ha_sref_cb);
4230     }
4231   if (!static_mapping_only ||
4232       (static_mapping_only && static_mapping_connection_tracking))
4233     {
4234           /* *INDENT-OFF* */
4235           vec_foreach (tsm, sm->per_thread_data)
4236             {
4237               nat44_db_init (tsm);
4238             }
4239           /* *INDENT-ON* */
4240     }
4241   else
4242     {
4243       sm->icmp_match_in2out_cb = icmp_match_in2out_fast;
4244       sm->icmp_match_out2in_cb = icmp_match_out2in_fast;
4245     }
4246   clib_bihash_init_8_8 (&sm->static_mapping_by_local,
4247                         "static_mapping_by_local", static_mapping_buckets,
4248                         static_mapping_memory_size);
4249   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
4250                                      format_static_mapping_kvp);
4251
4252   clib_bihash_init_8_8 (&sm->static_mapping_by_external,
4253                         "static_mapping_by_external",
4254                         static_mapping_buckets, static_mapping_memory_size);
4255   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
4256                                      format_static_mapping_kvp);
4257
4258   return 0;
4259 }
4260
4261 VLIB_CONFIG_FUNCTION (snat_config, "nat");
4262
4263 static void
4264 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
4265                                  uword opaque,
4266                                  u32 sw_if_index,
4267                                  ip4_address_t * address,
4268                                  u32 address_length,
4269                                  u32 if_address_index, u32 is_delete)
4270 {
4271   snat_main_t *sm = &snat_main;
4272   snat_static_map_resolve_t *rp;
4273   snat_static_mapping_t *m;
4274   clib_bihash_kv_8_8_t kv, value;
4275   int i, rv;
4276   ip4_address_t l_addr;
4277
4278   for (i = 0; i < vec_len (sm->to_resolve); i++)
4279     {
4280       rp = sm->to_resolve + i;
4281       if (rp->addr_only == 0)
4282         continue;
4283       if (rp->sw_if_index == sw_if_index)
4284         goto match;
4285     }
4286
4287   return;
4288
4289 match:
4290   init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
4291               sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
4292   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
4293     m = 0;
4294   else
4295     m = pool_elt_at_index (sm->static_mappings, value.value);
4296
4297   if (!is_delete)
4298     {
4299       /* Don't trip over lease renewal, static config */
4300       if (m)
4301         return;
4302     }
4303   else
4304     {
4305       if (!m)
4306         return;
4307     }
4308
4309   /* Indetity mapping? */
4310   if (rp->l_addr.as_u32 == 0)
4311     l_addr.as_u32 = address[0].as_u32;
4312   else
4313     l_addr.as_u32 = rp->l_addr.as_u32;
4314   /* Add the static mapping */
4315   rv = snat_add_static_mapping (l_addr,
4316                                 address[0],
4317                                 rp->l_port,
4318                                 rp->e_port,
4319                                 rp->vrf_id,
4320                                 rp->addr_only, ~0 /* sw_if_index */ ,
4321                                 rp->proto, !is_delete, rp->twice_nat,
4322                                 rp->out2in_only, rp->tag, rp->identity_nat);
4323   if (rv)
4324     nat_elog_notice_X1 ("snat_add_static_mapping returned %d", "i4", rv);
4325 }
4326
4327 static void
4328 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
4329                                        uword opaque,
4330                                        u32 sw_if_index,
4331                                        ip4_address_t * address,
4332                                        u32 address_length,
4333                                        u32 if_address_index, u32 is_delete)
4334 {
4335   snat_main_t *sm = &snat_main;
4336   snat_static_map_resolve_t *rp;
4337   ip4_address_t l_addr;
4338   int i, j;
4339   int rv;
4340   u8 twice_nat = 0;
4341   snat_address_t *addresses = sm->addresses;
4342
4343   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
4344     {
4345       if (sw_if_index == sm->auto_add_sw_if_indices[i])
4346         goto match;
4347     }
4348
4349   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
4350     {
4351       twice_nat = 1;
4352       addresses = sm->twice_nat_addresses;
4353       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
4354         goto match;
4355     }
4356
4357   return;
4358
4359 match:
4360   if (!is_delete)
4361     {
4362       /* Don't trip over lease renewal, static config */
4363       for (j = 0; j < vec_len (addresses); j++)
4364         if (addresses[j].addr.as_u32 == address->as_u32)
4365           return;
4366
4367       (void) snat_add_address (sm, address, ~0, twice_nat);
4368       /* Scan static map resolution vector */
4369       for (j = 0; j < vec_len (sm->to_resolve); j++)
4370         {
4371           rp = sm->to_resolve + j;
4372           if (rp->addr_only)
4373             continue;
4374           /* On this interface? */
4375           if (rp->sw_if_index == sw_if_index)
4376             {
4377               /* Indetity mapping? */
4378               if (rp->l_addr.as_u32 == 0)
4379                 l_addr.as_u32 = address[0].as_u32;
4380               else
4381                 l_addr.as_u32 = rp->l_addr.as_u32;
4382               /* Add the static mapping */
4383               rv = snat_add_static_mapping (l_addr,
4384                                             address[0],
4385                                             rp->l_port,
4386                                             rp->e_port,
4387                                             rp->vrf_id,
4388                                             rp->addr_only,
4389                                             ~0 /* sw_if_index */ ,
4390                                             rp->proto,
4391                                             rp->is_add, rp->twice_nat,
4392                                             rp->out2in_only, rp->tag,
4393                                             rp->identity_nat);
4394               if (rv)
4395                 nat_elog_notice_X1 ("snat_add_static_mapping returned %d",
4396                                     "i4", rv);
4397             }
4398         }
4399       return;
4400     }
4401   else
4402     {
4403       (void) snat_del_address (sm, address[0], 1, twice_nat);
4404       return;
4405     }
4406 }
4407
4408
4409 int
4410 snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
4411                             u8 twice_nat)
4412 {
4413   ip4_main_t *ip4_main = sm->ip4_main;
4414   ip4_address_t *first_int_addr;
4415   snat_static_map_resolve_t *rp;
4416   u32 *indices_to_delete = 0;
4417   int i, j;
4418   u32 *auto_add_sw_if_indices =
4419     twice_nat ? sm->
4420     auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
4421
4422   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0        /* just want the address */
4423     );
4424
4425   for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
4426     {
4427       if (auto_add_sw_if_indices[i] == sw_if_index)
4428         {
4429           if (is_del)
4430             {
4431               /* if have address remove it */
4432               if (first_int_addr)
4433                 (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
4434               else
4435                 {
4436                   for (j = 0; j < vec_len (sm->to_resolve); j++)
4437                     {
4438                       rp = sm->to_resolve + j;
4439                       if (rp->sw_if_index == sw_if_index)
4440                         vec_add1 (indices_to_delete, j);
4441                     }
4442                   if (vec_len (indices_to_delete))
4443                     {
4444                       for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
4445                         vec_del1 (sm->to_resolve, j);
4446                       vec_free (indices_to_delete);
4447                     }
4448                 }
4449               if (twice_nat)
4450                 vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
4451               else
4452                 vec_del1 (sm->auto_add_sw_if_indices, i);
4453             }
4454           else
4455             return VNET_API_ERROR_VALUE_EXIST;
4456
4457           return 0;
4458         }
4459     }
4460
4461   if (is_del)
4462     return VNET_API_ERROR_NO_SUCH_ENTRY;
4463
4464   /* add to the auto-address list */
4465   if (twice_nat)
4466     vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
4467   else
4468     vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
4469
4470   /* If the address is already bound - or static - add it now */
4471   if (first_int_addr)
4472     (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
4473
4474   return 0;
4475 }
4476
4477 int
4478 nat44_del_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
4479                    nat_protocol_t proto, u32 vrf_id, int is_in)
4480 {
4481   snat_main_per_thread_data_t *tsm;
4482   clib_bihash_kv_8_8_t kv, value;
4483   ip4_header_t ip;
4484   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4485   snat_session_t *s;
4486   clib_bihash_8_8_t *t;
4487
4488   if (sm->endpoint_dependent)
4489     return VNET_API_ERROR_UNSUPPORTED;
4490
4491   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
4492   if (sm->num_workers > 1)
4493     tsm =
4494       vec_elt_at_index (sm->per_thread_data,
4495                         sm->worker_in2out_cb (&ip, fib_index, 0));
4496   else
4497     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
4498
4499   init_nat_k (&kv, *addr, port, fib_index, proto);
4500   t = is_in ? &tsm->in2out : &tsm->out2in;
4501   if (!clib_bihash_search_8_8 (t, &kv, &value))
4502     {
4503       if (pool_is_free_index (tsm->sessions, value.value))
4504         return VNET_API_ERROR_UNSPECIFIED;
4505
4506       s = pool_elt_at_index (tsm->sessions, value.value);
4507       nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
4508       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
4509       return 0;
4510     }
4511
4512   return VNET_API_ERROR_NO_SUCH_ENTRY;
4513 }
4514
4515 int
4516 nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
4517                       ip4_address_t * eh_addr, u16 eh_port, u8 proto,
4518                       u32 vrf_id, int is_in)
4519 {
4520   ip4_header_t ip;
4521   clib_bihash_16_8_t *t;
4522   clib_bihash_kv_16_8_t kv, value;
4523   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4524   snat_session_t *s;
4525   snat_main_per_thread_data_t *tsm;
4526
4527   if (!sm->endpoint_dependent)
4528     return VNET_API_ERROR_FEATURE_DISABLED;
4529
4530   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
4531   if (sm->num_workers > 1)
4532     tsm =
4533       vec_elt_at_index (sm->per_thread_data,
4534                         sm->worker_in2out_cb (&ip, fib_index, 0));
4535   else
4536     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
4537
4538   t = is_in ? &tsm->in2out_ed : &sm->out2in_ed;
4539   init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
4540   if (clib_bihash_search_16_8 (t, &kv, &value))
4541     {
4542       return VNET_API_ERROR_NO_SUCH_ENTRY;
4543     }
4544
4545   if (pool_is_free_index (tsm->sessions, value.value))
4546     return VNET_API_ERROR_UNSPECIFIED;
4547   s = pool_elt_at_index (tsm->sessions, value.value);
4548   nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
4549   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
4550   return 0;
4551 }
4552
4553 void
4554 nat_set_alloc_addr_and_port_mape (u16 psid, u16 psid_offset, u16 psid_length)
4555 {
4556   snat_main_t *sm = &snat_main;
4557
4558   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_MAPE;
4559   sm->alloc_addr_and_port = nat_alloc_addr_and_port_mape;
4560   sm->psid = psid;
4561   sm->psid_offset = psid_offset;
4562   sm->psid_length = psid_length;
4563 }
4564
4565 void
4566 nat_set_alloc_addr_and_port_range (u16 start_port, u16 end_port)
4567 {
4568   snat_main_t *sm = &snat_main;
4569
4570   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_RANGE;
4571   sm->alloc_addr_and_port = nat_alloc_addr_and_port_range;
4572   sm->start_port = start_port;
4573   sm->end_port = end_port;
4574 }
4575
4576 void
4577 nat_set_alloc_addr_and_port_default (void)
4578 {
4579   snat_main_t *sm = &snat_main;
4580
4581   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
4582   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
4583 }
4584
4585 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
4586                                  vlib_node_runtime_t * node,
4587                                  vlib_frame_t * frame)
4588 {
4589   return 0;
4590 }
4591
4592 /* *INDENT-OFF* */
4593 VLIB_REGISTER_NODE (nat_default_node) = {
4594   .name = "nat-default",
4595   .vector_size = sizeof (u32),
4596   .format_trace = 0,
4597   .type = VLIB_NODE_TYPE_INTERNAL,
4598   .n_errors = 0,
4599   .n_next_nodes = NAT_N_NEXT,
4600   .next_nodes = {
4601     [NAT_NEXT_DROP] = "error-drop",
4602     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4603     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
4604     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
4605     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
4606     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
4607     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
4608     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
4609     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
4610   },
4611 };
4612 /* *INDENT-ON* */
4613
4614 /*
4615  * fd.io coding-style-patch-verification: ON
4616  *
4617  * Local Variables:
4618  * eval: (c-set-style "gnu")
4619  * End:
4620  */