nat: nat44 enable/disable dynamic config
[vpp.git] / src / plugins / nat / nat.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/vnet.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/ip/ip4.h>
21 #include <vnet/plugin/plugin.h>
22 #include <nat/nat.h>
23 #include <nat/nat_dpo.h>
24 #include <nat/lib/ipfix_logging.h>
25 #include <nat/nat_inlines.h>
26 #include <nat/nat44/inlines.h>
27 #include <nat/nat_affinity.h>
28 #include <nat/nat_syslog.h>
29 #include <nat/nat_ha.h>
30 #include <vnet/fib/fib_table.h>
31 #include <vnet/fib/ip4_fib.h>
32 #include <vnet/ip/reass/ip4_sv_reass.h>
33 #include <vppinfra/bihash_16_8.h>
34 #include <nat/nat44/ed_inlines.h>
35
36 #include <vpp/app/version.h>
37
38 snat_main_t snat_main;
39
40 fib_source_t nat_fib_src_hi;
41 fib_source_t nat_fib_src_low;
42
43 /* *INDENT-OFF* */
44 /* Hook up input features */
45 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
46   .arc_name = "ip4-unicast",
47   .node_name = "nat-pre-in2out",
48   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
49                                "ip4-sv-reassembly-feature"),
50 };
51 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
52   .arc_name = "ip4-unicast",
53   .node_name = "nat-pre-out2in",
54   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
55                                "ip4-dhcp-client-detect",
56                                "ip4-sv-reassembly-feature"),
57 };
58 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
59   .arc_name = "ip4-unicast",
60   .node_name = "nat44-in2out-worker-handoff",
61   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
62 };
63 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
64   .arc_name = "ip4-unicast",
65   .node_name = "nat44-out2in-worker-handoff",
66   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
67                                "ip4-dhcp-client-detect"),
68 };
69 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
70   .arc_name = "ip4-unicast",
71   .node_name = "nat44-in2out",
72   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
73 };
74 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
75   .arc_name = "ip4-unicast",
76   .node_name = "nat44-out2in",
77   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
78                                "ip4-dhcp-client-detect"),
79 };
80 VNET_FEATURE_INIT (ip4_nat_classify, static) = {
81   .arc_name = "ip4-unicast",
82   .node_name = "nat44-classify",
83   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
84 };
85 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
86   .arc_name = "ip4-unicast",
87   .node_name = "nat44-ed-in2out",
88   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
89 };
90 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
91   .arc_name = "ip4-unicast",
92   .node_name = "nat44-ed-out2in",
93   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
94                                "ip4-dhcp-client-detect"),
95 };
96 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
97   .arc_name = "ip4-unicast",
98   .node_name = "nat44-ed-classify",
99   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
100 };
101 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
102   .arc_name = "ip4-unicast",
103   .node_name = "nat44-handoff-classify",
104   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
105 };
106 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
107   .arc_name = "ip4-unicast",
108   .node_name = "nat44-in2out-fast",
109   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
110 };
111 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
112   .arc_name = "ip4-unicast",
113   .node_name = "nat44-out2in-fast",
114   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
115                                "ip4-dhcp-client-detect"),
116 };
117 VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = {
118   .arc_name = "ip4-unicast",
119   .node_name = "nat44-hairpin-dst",
120   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
121 };
122 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_dst, static) = {
123   .arc_name = "ip4-unicast",
124   .node_name = "nat44-ed-hairpin-dst",
125   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
126 };
127
128 /* Hook up output features */
129 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
130   .arc_name = "ip4-output",
131   .node_name = "nat44-in2out-output",
132   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
133 };
134 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
135   .arc_name = "ip4-output",
136   .node_name = "nat44-in2out-output-worker-handoff",
137   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
138 };
139 VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = {
140   .arc_name = "ip4-output",
141   .node_name = "nat44-hairpin-src",
142   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
143 };
144 VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
145   .arc_name = "ip4-output",
146   .node_name = "nat-pre-in2out-output",
147   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
148   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
149 };
150 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
151   .arc_name = "ip4-output",
152   .node_name = "nat44-ed-in2out-output",
153   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
154   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
155 };
156 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_src, static) = {
157   .arc_name = "ip4-output",
158   .node_name = "nat44-ed-hairpin-src",
159   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
160   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
161 };
162
163 /* Hook up ip4-local features */
164 VNET_FEATURE_INIT (ip4_nat_hairpinning, static) =
165 {
166   .arc_name = "ip4-local",
167   .node_name = "nat44-hairpinning",
168   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
169 };
170 VNET_FEATURE_INIT (ip4_nat44_ed_hairpinning, static) =
171 {
172   .arc_name = "ip4-local",
173   .node_name = "nat44-ed-hairpinning",
174   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
175 };
176
177
178 VLIB_PLUGIN_REGISTER () = {
179     .version = VPP_BUILD_VER,
180     .description = "Network Address Translation (NAT)",
181 };
182 /* *INDENT-ON* */
183
184 static u32
185 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
186                                u32 rx_fib_index, u8 is_output);
187
188 static u32
189 nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index,
190                                u8 is_output);
191
192 static u32
193 snat_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip0,
194                            u32 rx_fib_index0, u8 is_output);
195
196 static u32
197 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0,
198                            u8 is_output);
199
200 static u32 nat_calc_bihash_buckets (u32 n_elts);
201
202 static u32 nat_calc_bihash_memory (u32 n_buckets, uword kv_size);
203
204 u8 *format_static_mapping_kvp (u8 * s, va_list * args);
205
206 u8 *format_ed_session_kvp (u8 * s, va_list * args);
207
208 void
209 nat_ha_sadd_cb (ip4_address_t * in_addr, u16 in_port,
210                 ip4_address_t * out_addr, u16 out_port,
211                 ip4_address_t * eh_addr, u16 eh_port,
212                 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
213                 u32 fib_index, u16 flags, u32 thread_index);
214
215 void
216 nat_ha_sdel_cb (ip4_address_t * out_addr, u16 out_port,
217                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
218                 u32 ti);
219
220 void
221 nat_ha_sref_cb (ip4_address_t * out_addr, u16 out_port,
222                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
223                 u32 total_pkts, u64 total_bytes, u32 thread_index);
224
225 void
226 nat_ha_sadd_ed_cb (ip4_address_t * in_addr, u16 in_port,
227                    ip4_address_t * out_addr, u16 out_port,
228                    ip4_address_t * eh_addr, u16 eh_port,
229                    ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
230                    u32 fib_index, u16 flags, u32 thread_index);
231
232 void
233 nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
234                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
235                    u32 fib_index, u32 ti);
236
237 void
238 nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
239                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
240                    u32 fib_index, u32 ti);
241
242 void
243 nat_ha_sref_ed_cb (ip4_address_t * out_addr, u16 out_port,
244                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
245                    u32 fib_index, u32 total_pkts, u64 total_bytes,
246                    u32 thread_index);
247
248 void
249 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
250                        u8 is_ha)
251 {
252   clib_bihash_kv_8_8_t kv;
253   u8 proto;
254   u16 r_port, l_port;
255   ip4_address_t *l_addr, *r_addr;
256   u32 fib_index = 0;
257   clib_bihash_kv_16_8_t ed_kv;
258   snat_main_per_thread_data_t *tsm =
259     vec_elt_at_index (sm->per_thread_data, thread_index);
260
261   if (is_ed_session (s))
262     {
263       per_vrf_sessions_unregister_session (s, thread_index);
264     }
265
266   if (is_fwd_bypass_session (s))
267     {
268       if (snat_is_unk_proto_session (s))
269         {
270           init_ed_k (&ed_kv, s->in2out.addr, 0, s->ext_host_addr, 0, 0,
271                      s->in2out.port);
272         }
273       else
274         {
275           l_port = s->in2out.port;
276           r_port = s->ext_host_port;
277           l_addr = &s->in2out.addr;
278           r_addr = &s->ext_host_addr;
279           proto = nat_proto_to_ip_proto (s->nat_proto);
280           fib_index = s->in2out.fib_index;
281           init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index,
282                      proto);
283         }
284       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
285         nat_elog_warn ("in2out_ed key del failed");
286       return;
287     }
288
289   /* session lookup tables */
290   if (is_ed_session (s))
291     {
292       if (is_affinity_sessions (s))
293         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
294                              s->nat_proto, s->out2in.port);
295       l_addr = &s->out2in.addr;
296       r_addr = &s->ext_host_addr;
297       fib_index = s->out2in.fib_index;
298       if (snat_is_unk_proto_session (s))
299         {
300           proto = s->in2out.port;
301           r_port = 0;
302           l_port = 0;
303         }
304       else
305         {
306           proto = nat_proto_to_ip_proto (s->nat_proto);
307           l_port = s->out2in.port;
308           r_port = s->ext_host_port;
309         }
310       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
311       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0))
312         nat_elog_warn ("out2in_ed key del failed");
313       l_addr = &s->in2out.addr;
314       fib_index = s->in2out.fib_index;
315       if (!snat_is_unk_proto_session (s))
316         l_port = s->in2out.port;
317       if (is_twice_nat_session (s))
318         {
319           r_addr = &s->ext_host_nat_addr;
320           r_port = s->ext_host_nat_port;
321         }
322       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
323       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
324         nat_elog_warn ("in2out_ed key del failed");
325
326       if (!is_ha)
327         nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
328                                &s->in2out.addr, s->in2out.port,
329                                &s->ext_host_nat_addr, s->ext_host_nat_port,
330                                &s->out2in.addr, s->out2in.port,
331                                &s->ext_host_addr, s->ext_host_port,
332                                s->nat_proto, is_twice_nat_session (s));
333     }
334   else
335     {
336       init_nat_i2o_k (&kv, s);
337       if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0))
338         nat_elog_warn ("in2out key del failed");
339       init_nat_o2i_k (&kv, s);
340       if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0))
341         nat_elog_warn ("out2in key del failed");
342
343       if (!is_ha)
344         nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index,
345                                  &s->in2out.addr, s->in2out.port,
346                                  &s->out2in.addr, s->out2in.port,
347                                  s->nat_proto);
348     }
349
350   if (snat_is_unk_proto_session (s))
351     return;
352
353   if (!is_ha)
354     {
355       /* log NAT event */
356       nat_ipfix_logging_nat44_ses_delete (thread_index,
357                                           s->in2out.addr.as_u32,
358                                           s->out2in.addr.as_u32,
359                                           s->nat_proto,
360                                           s->in2out.port,
361                                           s->out2in.port,
362                                           s->in2out.fib_index);
363
364       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
365                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
366                    thread_index);
367     }
368
369   /* Twice NAT address and port for external host */
370   if (is_twice_nat_session (s))
371     {
372       snat_free_outside_address_and_port (sm->twice_nat_addresses,
373                                           thread_index,
374                                           &s->ext_host_nat_addr,
375                                           s->ext_host_nat_port, s->nat_proto);
376     }
377
378   if (snat_is_session_static (s))
379     return;
380
381   snat_free_outside_address_and_port (sm->addresses, thread_index,
382                                       &s->out2in.addr, s->out2in.port,
383                                       s->nat_proto);
384 }
385
386 void
387 nat44_free_session_data (snat_main_t * sm, snat_session_t * s,
388                          u32 thread_index, u8 is_ha)
389 {
390   u8 proto;
391   u16 r_port, l_port;
392   ip4_address_t *l_addr, *r_addr;
393   u32 fib_index;
394   clib_bihash_kv_16_8_t ed_kv;
395   snat_main_per_thread_data_t *tsm =
396     vec_elt_at_index (sm->per_thread_data, thread_index);
397
398   if (is_fwd_bypass_session (s))
399     {
400       if (snat_is_unk_proto_session (s))
401         {
402           proto = s->in2out.port;
403           r_port = 0;
404           l_port = 0;
405         }
406       else
407         {
408           proto = nat_proto_to_ip_proto (s->nat_proto);
409           l_port = s->in2out.port;
410           r_port = s->ext_host_port;
411         }
412
413       l_addr = &s->in2out.addr;
414       r_addr = &s->ext_host_addr;
415       fib_index = 0;
416       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
417
418       if (PREDICT_FALSE
419           (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)))
420         nat_elog_warn ("in2out_ed key del failed");
421       return;
422     }
423
424   /* session lookup tables */
425   if (is_affinity_sessions (s))
426     nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
427                          s->nat_proto, s->out2in.port);
428   l_addr = &s->out2in.addr;
429   r_addr = &s->ext_host_addr;
430   fib_index = s->out2in.fib_index;
431   if (snat_is_unk_proto_session (s))
432     {
433       proto = s->in2out.port;
434       r_port = 0;
435       l_port = 0;
436     }
437   else
438     {
439       proto = nat_proto_to_ip_proto (s->nat_proto);
440       l_port = s->out2in.port;
441       r_port = s->ext_host_port;
442     }
443   init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
444
445   if (PREDICT_FALSE (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0)))
446     nat_elog_warn ("out2in_ed key del failed");
447
448   l_addr = &s->in2out.addr;
449   fib_index = s->in2out.fib_index;
450
451   if (!snat_is_unk_proto_session (s))
452     l_port = s->in2out.port;
453
454   if (is_twice_nat_session (s))
455     {
456       r_addr = &s->ext_host_nat_addr;
457       r_port = s->ext_host_nat_port;
458     }
459   init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
460
461   if (PREDICT_FALSE (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)))
462     nat_elog_warn ("in2out_ed key del failed");
463
464   if (!is_ha)
465     {
466       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
467                              &s->in2out.addr, s->in2out.port,
468                              &s->ext_host_nat_addr, s->ext_host_nat_port,
469                              &s->out2in.addr, s->out2in.port,
470                              &s->ext_host_addr, s->ext_host_port,
471                              s->nat_proto, is_twice_nat_session (s));
472     }
473
474   if (snat_is_unk_proto_session (s))
475     return;
476
477   if (!is_ha)
478     {
479       nat_ipfix_logging_nat44_ses_delete (thread_index,
480                                           s->in2out.addr.as_u32,
481                                           s->out2in.addr.as_u32,
482                                           s->nat_proto,
483                                           s->in2out.port,
484                                           s->out2in.port,
485                                           s->in2out.fib_index);
486       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
487                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
488                    thread_index);
489     }
490
491   /* Twice NAT address and port for external host */
492   if (is_twice_nat_session (s))
493     {
494       snat_free_outside_address_and_port (sm->twice_nat_addresses,
495                                           thread_index,
496                                           &s->ext_host_nat_addr,
497                                           s->ext_host_nat_port, s->nat_proto);
498     }
499
500   if (snat_is_session_static (s))
501     return;
502
503   snat_free_outside_address_and_port (sm->addresses, thread_index,
504                                       &s->out2in.addr, s->out2in.port,
505                                       s->nat_proto);
506 }
507
508
509 snat_user_t *
510 nat_user_get_or_create (snat_main_t * sm, ip4_address_t * addr, u32 fib_index,
511                         u32 thread_index)
512 {
513   snat_user_t *u = 0;
514   snat_user_key_t user_key;
515   clib_bihash_kv_8_8_t kv, value;
516   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
517   dlist_elt_t *per_user_list_head_elt;
518
519   user_key.addr.as_u32 = addr->as_u32;
520   user_key.fib_index = fib_index;
521   kv.key = user_key.as_u64;
522
523   /* Ever heard of the "user" = src ip4 address before? */
524   if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
525     {
526       if (pool_elts (tsm->users) >= sm->max_users_per_thread)
527         {
528           vlib_increment_simple_counter (&sm->user_limit_reached,
529                                          thread_index, 0, 1);
530           nat_elog_warn ("maximum user limit reached");
531           return NULL;
532         }
533       /* no, make a new one */
534       pool_get (tsm->users, u);
535       clib_memset (u, 0, sizeof (*u));
536
537       u->addr.as_u32 = addr->as_u32;
538       u->fib_index = fib_index;
539
540       pool_get (tsm->list_pool, per_user_list_head_elt);
541
542       u->sessions_per_user_list_head_index = per_user_list_head_elt -
543         tsm->list_pool;
544
545       clib_dlist_init (tsm->list_pool, u->sessions_per_user_list_head_index);
546
547       kv.value = u - tsm->users;
548
549       /* add user */
550       if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
551         {
552           nat_elog_warn ("user_hash key add failed");
553           nat44_delete_user_with_no_session (sm, u, thread_index);
554           return NULL;
555         }
556
557       vlib_set_simple_counter (&sm->total_users, thread_index, 0,
558                                pool_elts (tsm->users));
559     }
560   else
561     {
562       u = pool_elt_at_index (tsm->users, value.value);
563     }
564
565   return u;
566 }
567
568 snat_session_t *
569 nat_session_alloc_or_recycle (snat_main_t * sm, snat_user_t * u,
570                               u32 thread_index, f64 now)
571 {
572   snat_session_t *s;
573   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
574   u32 oldest_per_user_translation_list_index, session_index;
575   dlist_elt_t *oldest_per_user_translation_list_elt;
576   dlist_elt_t *per_user_translation_list_elt;
577
578   /* Over quota? Recycle the least recently used translation */
579   if ((u->nsessions + u->nstaticsessions) >= sm->max_translations_per_user)
580     {
581       oldest_per_user_translation_list_index =
582         clib_dlist_remove_head (tsm->list_pool,
583                                 u->sessions_per_user_list_head_index);
584
585       ASSERT (oldest_per_user_translation_list_index != ~0);
586
587       /* Add it back to the end of the LRU list */
588       clib_dlist_addtail (tsm->list_pool,
589                           u->sessions_per_user_list_head_index,
590                           oldest_per_user_translation_list_index);
591       /* Get the list element */
592       oldest_per_user_translation_list_elt =
593         pool_elt_at_index (tsm->list_pool,
594                            oldest_per_user_translation_list_index);
595
596       /* Get the session index from the list element */
597       session_index = oldest_per_user_translation_list_elt->value;
598
599       /* Get the session */
600       s = pool_elt_at_index (tsm->sessions, session_index);
601       nat_free_session_data (sm, s, thread_index, 0);
602       if (snat_is_session_static (s))
603         u->nstaticsessions--;
604       else
605         u->nsessions--;
606       s->flags = 0;
607       s->total_bytes = 0;
608       s->total_pkts = 0;
609       s->state = 0;
610       s->ext_host_addr.as_u32 = 0;
611       s->ext_host_port = 0;
612       s->ext_host_nat_addr.as_u32 = 0;
613       s->ext_host_nat_port = 0;
614     }
615   else
616     {
617       pool_get (tsm->sessions, s);
618       clib_memset (s, 0, sizeof (*s));
619
620       /* Create list elts */
621       pool_get (tsm->list_pool, per_user_translation_list_elt);
622       clib_dlist_init (tsm->list_pool,
623                        per_user_translation_list_elt - tsm->list_pool);
624
625       per_user_translation_list_elt->value = s - tsm->sessions;
626       s->per_user_index = per_user_translation_list_elt - tsm->list_pool;
627       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
628
629       clib_dlist_addtail (tsm->list_pool,
630                           s->per_user_list_head_index,
631                           per_user_translation_list_elt - tsm->list_pool);
632
633       s->user_index = u - tsm->users;
634       vlib_set_simple_counter (&sm->total_sessions, thread_index, 0,
635                                pool_elts (tsm->sessions));
636     }
637
638   s->ha_last_refreshed = now;
639
640   return s;
641 }
642
643 void
644 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
645                           int is_add)
646 {
647   fib_prefix_t prefix = {
648     .fp_len = p_len,
649     .fp_proto = FIB_PROTOCOL_IP4,
650     .fp_addr = {
651                 .ip4.as_u32 = addr->as_u32,
652                 },
653   };
654   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
655
656   if (is_add)
657     fib_table_entry_update_one_path (fib_index,
658                                      &prefix,
659                                      nat_fib_src_low,
660                                      (FIB_ENTRY_FLAG_CONNECTED |
661                                       FIB_ENTRY_FLAG_LOCAL |
662                                       FIB_ENTRY_FLAG_EXCLUSIVE),
663                                      DPO_PROTO_IP4,
664                                      NULL,
665                                      sw_if_index,
666                                      ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
667   else
668     fib_table_entry_delete (fib_index, &prefix, nat_fib_src_low);
669 }
670
671 int
672 snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
673                   u8 twice_nat)
674 {
675   snat_address_t *ap;
676   snat_interface_t *i;
677   vlib_thread_main_t *tm = vlib_get_thread_main ();
678
679   if (twice_nat && !sm->endpoint_dependent)
680     {
681       nat_log_err ("unsupported");
682       return VNET_API_ERROR_UNSUPPORTED;
683     }
684
685   /* Check if address already exists */
686   /* *INDENT-OFF* */
687   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
688     {
689       if (ap->addr.as_u32 == addr->as_u32)
690         {
691           nat_log_err ("address exist");
692           return VNET_API_ERROR_VALUE_EXIST;
693         }
694     }
695   /* *INDENT-ON* */
696
697   if (twice_nat)
698     vec_add2 (sm->twice_nat_addresses, ap, 1);
699   else
700     vec_add2 (sm->addresses, ap, 1);
701
702   ap->addr = *addr;
703   if (vrf_id != ~0)
704     ap->fib_index =
705       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
706                                          nat_fib_src_low);
707   else
708     ap->fib_index = ~0;
709
710   /* *INDENT-OFF* */
711   #define _(N, i, n, s) \
712     clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
713     ap->busy_##n##_ports = 0; \
714     ap->busy_##n##_ports_per_thread = 0;\
715     vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
716     foreach_nat_protocol
717   #undef _
718   /* *INDENT-ON* */
719
720   if (twice_nat)
721     return 0;
722
723   /* Add external address to FIB */
724   /* *INDENT-OFF* */
725   pool_foreach (i, sm->interfaces,
726   ({
727     if (nat_interface_is_inside(i) || sm->out2in_dpo)
728       continue;
729
730     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
731     break;
732   }));
733   pool_foreach (i, sm->output_feature_interfaces,
734   ({
735     if (nat_interface_is_inside(i) || sm->out2in_dpo)
736       continue;
737
738     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
739     break;
740   }));
741   /* *INDENT-ON* */
742
743   return 0;
744 }
745
746 static int
747 is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
748 {
749   snat_static_mapping_t *m;
750   /* *INDENT-OFF* */
751   pool_foreach (m, sm->static_mappings,
752   ({
753       if (is_addr_only_static_mapping (m) ||
754           is_out2in_only_static_mapping (m) ||
755           is_identity_static_mapping (m))
756         continue;
757       if (m->external_addr.as_u32 == addr.as_u32)
758         return 1;
759   }));
760   /* *INDENT-ON* */
761
762   return 0;
763 }
764
765 static void
766 snat_add_static_mapping_when_resolved (snat_main_t * sm,
767                                        ip4_address_t l_addr,
768                                        u16 l_port,
769                                        u32 sw_if_index,
770                                        u16 e_port,
771                                        u32 vrf_id,
772                                        nat_protocol_t proto,
773                                        int addr_only, int is_add, u8 * tag,
774                                        int twice_nat, int out2in_only,
775                                        int identity_nat,
776                                        ip4_address_t pool_addr, int exact)
777 {
778   snat_static_map_resolve_t *rp;
779
780   vec_add2 (sm->to_resolve, rp, 1);
781   rp->l_addr.as_u32 = l_addr.as_u32;
782   rp->l_port = l_port;
783   rp->sw_if_index = sw_if_index;
784   rp->e_port = e_port;
785   rp->vrf_id = vrf_id;
786   rp->proto = proto;
787   rp->addr_only = addr_only;
788   rp->is_add = is_add;
789   rp->twice_nat = twice_nat;
790   rp->out2in_only = out2in_only;
791   rp->identity_nat = identity_nat;
792   rp->tag = vec_dup (tag);
793   rp->pool_addr = pool_addr;
794   rp->exact = exact;
795 }
796
797 static u32
798 get_thread_idx_by_port (u16 e_port)
799 {
800   snat_main_t *sm = &snat_main;
801   u32 thread_idx = sm->num_workers;
802   if (sm->num_workers > 1)
803     {
804       thread_idx =
805         sm->first_worker_index +
806         sm->workers[(e_port - 1024) / sm->port_per_thread];
807     }
808   return thread_idx;
809 }
810
811 void
812 snat_static_mapping_del_sessions (snat_main_t * sm,
813                                   snat_main_per_thread_data_t * tsm,
814                                   snat_user_key_t u_key, int addr_only,
815                                   ip4_address_t e_addr, u16 e_port)
816 {
817   clib_bihash_kv_8_8_t kv, value;
818   kv.key = u_key.as_u64;
819   u64 user_index;
820   dlist_elt_t *head, *elt;
821   snat_user_t *u;
822   snat_session_t *s;
823   u32 elt_index, head_index, ses_index;
824   if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
825     {
826       user_index = value.value;
827       u = pool_elt_at_index (tsm->users, user_index);
828       if (u->nstaticsessions)
829         {
830           head_index = u->sessions_per_user_list_head_index;
831           head = pool_elt_at_index (tsm->list_pool, head_index);
832           elt_index = head->next;
833           elt = pool_elt_at_index (tsm->list_pool, elt_index);
834           ses_index = elt->value;
835           while (ses_index != ~0)
836             {
837               s = pool_elt_at_index (tsm->sessions, ses_index);
838               elt = pool_elt_at_index (tsm->list_pool, elt->next);
839               ses_index = elt->value;
840
841               if (!addr_only)
842                 {
843                   if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
844                       (s->out2in.port != e_port))
845                     continue;
846                 }
847
848               if (is_lb_session (s))
849                 continue;
850
851               if (!snat_is_session_static (s))
852                 continue;
853
854               nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
855               nat44_delete_session (sm, s, tsm - sm->per_thread_data);
856
857               if (!addr_only)
858                 break;
859             }
860         }
861     }
862 }
863
864 void
865 snat_ed_static_mapping_del_sessions (snat_main_t * sm,
866                                      snat_main_per_thread_data_t * tsm,
867                                      ip4_address_t l_addr,
868                                      u16 l_port,
869                                      u8 protocol,
870                                      u32 fib_index, int addr_only,
871                                      ip4_address_t e_addr, u16 e_port)
872 {
873   snat_session_t *s;
874   u32 *indexes_to_free = NULL;
875   /* *INDENT-OFF* */
876   pool_foreach (s, tsm->sessions, {
877     if (s->in2out.fib_index != fib_index ||
878         s->in2out.addr.as_u32 != l_addr.as_u32)
879       {
880         continue;
881       }
882     if (!addr_only)
883       {
884         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
885             s->out2in.port != e_port ||
886             s->in2out.port != l_port ||
887             s->nat_proto != protocol)
888           continue;
889       }
890
891     if (is_lb_session (s))
892       continue;
893     if (!snat_is_session_static (s))
894       continue;
895     nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
896     vec_add1 (indexes_to_free, s - tsm->sessions);
897     if (!addr_only)
898       break;
899   });
900   /* *INDENT-ON* */
901   u32 *ses_index;
902   vec_foreach (ses_index, indexes_to_free)
903   {
904     s = pool_elt_at_index (tsm->sessions, *ses_index);
905     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
906   }
907   vec_free (indexes_to_free);
908 }
909
910 int
911 snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
912                          u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
913                          u32 sw_if_index, nat_protocol_t proto, int is_add,
914                          twice_nat_type_t twice_nat, u8 out2in_only, u8 * tag,
915                          u8 identity_nat, ip4_address_t pool_addr, int exact)
916 {
917   snat_main_t *sm = &snat_main;
918   snat_static_mapping_t *m;
919   clib_bihash_kv_8_8_t kv, value;
920   snat_address_t *a = 0;
921   u32 fib_index = ~0;
922   snat_interface_t *interface;
923   int i;
924   snat_main_per_thread_data_t *tsm;
925   snat_user_key_t u_key;
926   snat_user_t *u;
927   dlist_elt_t *head, *elt;
928   u32 elt_index, head_index;
929   u32 ses_index;
930   u64 user_index;
931   snat_session_t *s;
932   snat_static_map_resolve_t *rp, *rp_match = 0;
933   nat44_lb_addr_port_t *local;
934   u32 find = ~0;
935
936   if (!sm->endpoint_dependent)
937     {
938       if (twice_nat || out2in_only)
939         return VNET_API_ERROR_FEATURE_DISABLED;
940     }
941
942   /* If the external address is a specific interface address */
943   if (sw_if_index != ~0)
944     {
945       ip4_address_t *first_int_addr;
946
947       for (i = 0; i < vec_len (sm->to_resolve); i++)
948         {
949           rp = sm->to_resolve + i;
950           if (rp->sw_if_index != sw_if_index ||
951               rp->l_addr.as_u32 != l_addr.as_u32 ||
952               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
953             continue;
954
955           if (!addr_only)
956             {
957               if ((rp->l_port != l_port && rp->e_port != e_port)
958                   || rp->proto != proto)
959                 continue;
960             }
961
962           rp_match = rp;
963           break;
964         }
965
966       /* Might be already set... */
967       first_int_addr = ip4_interface_first_address
968         (sm->ip4_main, sw_if_index, 0 /* just want the address */ );
969
970       if (is_add)
971         {
972           if (rp_match)
973             return VNET_API_ERROR_VALUE_EXIST;
974
975           snat_add_static_mapping_when_resolved
976             (sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto,
977              addr_only, is_add, tag, twice_nat, out2in_only,
978              identity_nat, pool_addr, exact);
979
980           /* DHCP resolution required? */
981           if (first_int_addr == 0)
982             {
983               return 0;
984             }
985           else
986             {
987               e_addr.as_u32 = first_int_addr->as_u32;
988               /* Identity mapping? */
989               if (l_addr.as_u32 == 0)
990                 l_addr.as_u32 = e_addr.as_u32;
991             }
992         }
993       else
994         {
995           if (!rp_match)
996             return VNET_API_ERROR_NO_SUCH_ENTRY;
997
998           vec_del1 (sm->to_resolve, i);
999
1000           if (first_int_addr)
1001             {
1002               e_addr.as_u32 = first_int_addr->as_u32;
1003               /* Identity mapping? */
1004               if (l_addr.as_u32 == 0)
1005                 l_addr.as_u32 = e_addr.as_u32;
1006             }
1007           else
1008             return 0;
1009         }
1010     }
1011
1012   init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
1013   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1014     m = 0;
1015   else
1016     m = pool_elt_at_index (sm->static_mappings, value.value);
1017
1018   if (is_add)
1019     {
1020       if (m)
1021         {
1022           if (is_identity_static_mapping (m))
1023             {
1024               /* *INDENT-OFF* */
1025               pool_foreach (local, m->locals,
1026               ({
1027                 if (local->vrf_id == vrf_id)
1028                   return VNET_API_ERROR_VALUE_EXIST;
1029               }));
1030               /* *INDENT-ON* */
1031               pool_get (m->locals, local);
1032               local->vrf_id = vrf_id;
1033               local->fib_index =
1034                 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1035                                                    nat_fib_src_low);
1036               init_nat_kv (&kv, m->local_addr, m->local_port,
1037                            local->fib_index, m->proto,
1038                            m - sm->static_mappings);
1039               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1040               return 0;
1041             }
1042           else
1043             return VNET_API_ERROR_VALUE_EXIST;
1044         }
1045
1046       if (twice_nat && addr_only)
1047         return VNET_API_ERROR_UNSUPPORTED;
1048
1049       /* Convert VRF id to FIB index */
1050       if (vrf_id != ~0)
1051         fib_index =
1052           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1053                                              nat_fib_src_low);
1054       /* If not specified use inside VRF id from SNAT plugin startup config */
1055       else
1056         {
1057           fib_index = sm->inside_fib_index;
1058           vrf_id = sm->inside_vrf_id;
1059           fib_table_lock (fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1060         }
1061
1062       if (!(out2in_only || identity_nat))
1063         {
1064           init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
1065                       addr_only ? 0 : proto);
1066           if (!clib_bihash_search_8_8
1067               (&sm->static_mapping_by_local, &kv, &value))
1068             return VNET_API_ERROR_VALUE_EXIST;
1069         }
1070
1071       /* Find external address in allocated addresses and reserve port for
1072          address and port pair mapping when dynamic translations enabled */
1073       if (!(addr_only || sm->static_mapping_only || out2in_only))
1074         {
1075           for (i = 0; i < vec_len (sm->addresses); i++)
1076             {
1077               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1078                 {
1079                   a = sm->addresses + i;
1080                   /* External port must be unused */
1081                   switch (proto)
1082                     {
1083 #define _(N, j, n, s) \
1084                     case NAT_PROTOCOL_##N: \
1085                       if (a->busy_##n##_port_refcounts[e_port]) \
1086                         return VNET_API_ERROR_INVALID_VALUE; \
1087                       ++a->busy_##n##_port_refcounts[e_port]; \
1088                       if (e_port > 1024) \
1089                         { \
1090                           a->busy_##n##_ports++; \
1091                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1092                         } \
1093                       break;
1094                       foreach_nat_protocol
1095 #undef _
1096                     default:
1097                       nat_elog_info ("unknown protocol");
1098                       return VNET_API_ERROR_INVALID_VALUE_2;
1099                     }
1100                   break;
1101                 }
1102             }
1103           /* External address must be allocated */
1104           if (!a && (l_addr.as_u32 != e_addr.as_u32))
1105             {
1106               if (sw_if_index != ~0)
1107                 {
1108                   for (i = 0; i < vec_len (sm->to_resolve); i++)
1109                     {
1110                       rp = sm->to_resolve + i;
1111                       if (rp->addr_only)
1112                         continue;
1113                       if (rp->sw_if_index != sw_if_index &&
1114                           rp->l_addr.as_u32 != l_addr.as_u32 &&
1115                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
1116                           rp->e_port != e_port && rp->proto != proto)
1117                         continue;
1118
1119                       vec_del1 (sm->to_resolve, i);
1120                       break;
1121                     }
1122                 }
1123               return VNET_API_ERROR_NO_SUCH_ENTRY;
1124             }
1125         }
1126
1127       pool_get (sm->static_mappings, m);
1128       clib_memset (m, 0, sizeof (*m));
1129       m->tag = vec_dup (tag);
1130       m->local_addr = l_addr;
1131       m->external_addr = e_addr;
1132       m->twice_nat = twice_nat;
1133
1134       if (twice_nat == TWICE_NAT && exact)
1135         {
1136           m->flags |= NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS;
1137           m->pool_addr = pool_addr;
1138         }
1139
1140       if (out2in_only)
1141         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
1142       if (addr_only)
1143         m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY;
1144       if (identity_nat)
1145         {
1146           m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT;
1147           pool_get (m->locals, local);
1148           local->vrf_id = vrf_id;
1149           local->fib_index = fib_index;
1150         }
1151       else
1152         {
1153           m->vrf_id = vrf_id;
1154           m->fib_index = fib_index;
1155         }
1156       if (!addr_only)
1157         {
1158           m->local_port = l_port;
1159           m->external_port = e_port;
1160           m->proto = proto;
1161         }
1162
1163       if (sm->num_workers > 1)
1164         {
1165           ip4_header_t ip = {
1166             .src_address = m->local_addr,
1167           };
1168           vec_add1 (m->workers, sm->worker_in2out_cb (&ip, m->fib_index, 0));
1169           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1170         }
1171       else
1172         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1173
1174       init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto,
1175                    m - sm->static_mappings);
1176       if (!out2in_only)
1177         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1178
1179       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto,
1180                    m - sm->static_mappings);
1181       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
1182
1183       /* Delete dynamic sessions matching local address (+ local port) */
1184       if (!(sm->static_mapping_only))
1185         {
1186           u_key.addr = m->local_addr;
1187           u_key.fib_index = m->fib_index;
1188           kv.key = u_key.as_u64;
1189           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1190             {
1191               user_index = value.value;
1192               u = pool_elt_at_index (tsm->users, user_index);
1193               if (u->nsessions)
1194                 {
1195                   head_index = u->sessions_per_user_list_head_index;
1196                   head = pool_elt_at_index (tsm->list_pool, head_index);
1197                   elt_index = head->next;
1198                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1199                   ses_index = elt->value;
1200                   while (ses_index != ~0)
1201                     {
1202                       s = pool_elt_at_index (tsm->sessions, ses_index);
1203                       elt = pool_elt_at_index (tsm->list_pool, elt->next);
1204                       ses_index = elt->value;
1205
1206                       if (snat_is_session_static (s))
1207                         continue;
1208
1209                       if (!addr_only && s->in2out.port != m->local_port)
1210                         continue;
1211
1212                       nat_free_session_data (sm, s,
1213                                              tsm - sm->per_thread_data, 0);
1214                       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
1215
1216                       if (!addr_only && !sm->endpoint_dependent)
1217                         break;
1218                     }
1219                 }
1220             }
1221         }
1222     }
1223   else
1224     {
1225       if (!m)
1226         {
1227           if (sw_if_index != ~0)
1228             return 0;
1229           else
1230             return VNET_API_ERROR_NO_SUCH_ENTRY;
1231         }
1232
1233       if (identity_nat)
1234         {
1235           if (vrf_id == ~0)
1236             vrf_id = sm->inside_vrf_id;
1237
1238           /* *INDENT-OFF* */
1239           pool_foreach (local, m->locals,
1240           ({
1241             if (local->vrf_id == vrf_id)
1242               find = local - m->locals;
1243           }));
1244           /* *INDENT-ON* */
1245           if (find == ~0)
1246             return VNET_API_ERROR_NO_SUCH_ENTRY;
1247
1248           local = pool_elt_at_index (m->locals, find);
1249           fib_index = local->fib_index;
1250           pool_put (m->locals, local);
1251         }
1252       else
1253         fib_index = m->fib_index;
1254
1255       /* Free external address port */
1256       if (!(addr_only || sm->static_mapping_only || out2in_only))
1257         {
1258           for (i = 0; i < vec_len (sm->addresses); i++)
1259             {
1260               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1261                 {
1262                   a = sm->addresses + i;
1263                   switch (proto)
1264                     {
1265 #define _(N, j, n, s) \
1266                     case NAT_PROTOCOL_##N: \
1267                       --a->busy_##n##_port_refcounts[e_port]; \
1268                       if (e_port > 1024) \
1269                         { \
1270                           a->busy_##n##_ports--; \
1271                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1272                         } \
1273                       break;
1274                       foreach_nat_protocol
1275 #undef _
1276                     default:
1277                       nat_elog_info ("unknown protocol");
1278                       return VNET_API_ERROR_INVALID_VALUE_2;
1279                     }
1280                   break;
1281                 }
1282             }
1283         }
1284
1285       if (sm->num_workers > 1)
1286         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1287       else
1288         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1289
1290       init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
1291       if (!out2in_only)
1292         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
1293
1294       /* Delete session(s) for static mapping if exist */
1295       if (!(sm->static_mapping_only) ||
1296           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
1297         {
1298           if (sm->endpoint_dependent)
1299             {
1300               snat_ed_static_mapping_del_sessions (sm, tsm, m->local_addr,
1301                                                    m->local_port, m->proto,
1302                                                    fib_index, addr_only,
1303                                                    e_addr, e_port);
1304             }
1305           else
1306             {
1307               u_key.addr = m->local_addr;
1308               u_key.fib_index = fib_index;
1309               kv.key = u_key.as_u64;
1310               snat_static_mapping_del_sessions (sm, tsm, u_key, addr_only,
1311                                                 e_addr, e_port);
1312             }
1313         }
1314
1315       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1316       if (pool_elts (m->locals))
1317         return 0;
1318
1319       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1320       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
1321
1322       vec_free (m->tag);
1323       vec_free (m->workers);
1324       /* Delete static mapping from pool */
1325       pool_put (sm->static_mappings, m);
1326     }
1327
1328   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
1329     return 0;
1330
1331   /* Add/delete external address to FIB */
1332   /* *INDENT-OFF* */
1333   pool_foreach (interface, sm->interfaces,
1334   ({
1335     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1336       continue;
1337
1338     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1339     break;
1340   }));
1341   pool_foreach (interface, sm->output_feature_interfaces,
1342   ({
1343     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1344       continue;
1345
1346     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1347     break;
1348   }));
1349   /* *INDENT-ON* */
1350
1351   return 0;
1352 }
1353
1354 int
1355 nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
1356                                  nat_protocol_t proto,
1357                                  nat44_lb_addr_port_t * locals, u8 is_add,
1358                                  twice_nat_type_t twice_nat, u8 out2in_only,
1359                                  u8 * tag, u32 affinity)
1360 {
1361   snat_main_t *sm = &snat_main;
1362   snat_static_mapping_t *m;
1363   clib_bihash_kv_8_8_t kv, value;
1364   snat_address_t *a = 0;
1365   int i;
1366   nat44_lb_addr_port_t *local;
1367   snat_main_per_thread_data_t *tsm;
1368   snat_session_t *s;
1369   uword *bitmap = 0;
1370
1371   if (!sm->endpoint_dependent)
1372     return VNET_API_ERROR_FEATURE_DISABLED;
1373
1374   init_nat_k (&kv, e_addr, e_port, 0, proto);
1375   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1376     m = 0;
1377   else
1378     m = pool_elt_at_index (sm->static_mappings, value.value);
1379
1380   if (is_add)
1381     {
1382       if (m)
1383         return VNET_API_ERROR_VALUE_EXIST;
1384
1385       if (vec_len (locals) < 2)
1386         return VNET_API_ERROR_INVALID_VALUE;
1387
1388       /* Find external address in allocated addresses and reserve port for
1389          address and port pair mapping when dynamic translations enabled */
1390       if (!(sm->static_mapping_only || out2in_only))
1391         {
1392           for (i = 0; i < vec_len (sm->addresses); i++)
1393             {
1394               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1395                 {
1396                   a = sm->addresses + i;
1397                   /* External port must be unused */
1398                   switch (proto)
1399                     {
1400 #define _(N, j, n, s) \
1401                     case NAT_PROTOCOL_##N: \
1402                       if (a->busy_##n##_port_refcounts[e_port]) \
1403                         return VNET_API_ERROR_INVALID_VALUE; \
1404                       ++a->busy_##n##_port_refcounts[e_port]; \
1405                       if (e_port > 1024) \
1406                         { \
1407                           a->busy_##n##_ports++; \
1408                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1409                         } \
1410                       break;
1411                       foreach_nat_protocol
1412 #undef _
1413                     default:
1414                       nat_elog_info ("unknown protocol");
1415                       return VNET_API_ERROR_INVALID_VALUE_2;
1416                     }
1417                   break;
1418                 }
1419             }
1420           /* External address must be allocated */
1421           if (!a)
1422             return VNET_API_ERROR_NO_SUCH_ENTRY;
1423         }
1424
1425       pool_get (sm->static_mappings, m);
1426       clib_memset (m, 0, sizeof (*m));
1427       m->tag = vec_dup (tag);
1428       m->external_addr = e_addr;
1429       m->external_port = e_port;
1430       m->proto = proto;
1431       m->twice_nat = twice_nat;
1432       m->flags |= NAT_STATIC_MAPPING_FLAG_LB;
1433       if (out2in_only)
1434         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
1435       m->affinity = affinity;
1436
1437       if (affinity)
1438         m->affinity_per_service_list_head_index =
1439           nat_affinity_get_per_service_list_head_index ();
1440       else
1441         m->affinity_per_service_list_head_index = ~0;
1442
1443       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto,
1444                    m - sm->static_mappings);
1445       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
1446         {
1447           nat_elog_err ("static_mapping_by_external key add failed");
1448           return VNET_API_ERROR_UNSPECIFIED;
1449         }
1450
1451       for (i = 0; i < vec_len (locals); i++)
1452         {
1453           locals[i].fib_index =
1454             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
1455                                                locals[i].vrf_id,
1456                                                nat_fib_src_low);
1457           if (!out2in_only)
1458             {
1459               init_nat_kv (&kv, locals[i].addr, locals[i].port,
1460                            locals[i].fib_index, m->proto,
1461                            m - sm->static_mappings);
1462               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1463             }
1464           locals[i].prefix = (i == 0) ? locals[i].probability :
1465             (locals[i - 1].prefix + locals[i].probability);
1466           pool_get (m->locals, local);
1467           *local = locals[i];
1468           if (sm->num_workers > 1)
1469             {
1470               ip4_header_t ip = {
1471                 .src_address = locals[i].addr,
1472               };
1473               bitmap =
1474                 clib_bitmap_set (bitmap,
1475                                  sm->worker_in2out_cb (&ip, m->fib_index, 0),
1476                                  1);
1477             }
1478         }
1479
1480       /* Assign workers */
1481       if (sm->num_workers > 1)
1482         {
1483           /* *INDENT-OFF* */
1484           clib_bitmap_foreach (i, bitmap,
1485             ({
1486                vec_add1(m->workers, i);
1487             }));
1488           /* *INDENT-ON* */
1489         }
1490     }
1491   else
1492     {
1493       if (!m)
1494         return VNET_API_ERROR_NO_SUCH_ENTRY;
1495
1496       if (!is_lb_static_mapping (m))
1497         return VNET_API_ERROR_INVALID_VALUE;
1498
1499       /* Free external address port */
1500       if (!(sm->static_mapping_only || out2in_only))
1501         {
1502           for (i = 0; i < vec_len (sm->addresses); i++)
1503             {
1504               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1505                 {
1506                   a = sm->addresses + i;
1507                   switch (proto)
1508                     {
1509 #define _(N, j, n, s) \
1510                     case NAT_PROTOCOL_##N: \
1511                       --a->busy_##n##_port_refcounts[e_port]; \
1512                       if (e_port > 1024) \
1513                         { \
1514                           a->busy_##n##_ports--; \
1515                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1516                         } \
1517                       break;
1518                       foreach_nat_protocol
1519 #undef _
1520                     default:
1521                       nat_elog_info ("unknown protocol");
1522                       return VNET_API_ERROR_INVALID_VALUE_2;
1523                     }
1524                   break;
1525                 }
1526             }
1527         }
1528
1529       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1530       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
1531         {
1532           nat_elog_err ("static_mapping_by_external key del failed");
1533           return VNET_API_ERROR_UNSPECIFIED;
1534         }
1535
1536       /* *INDENT-OFF* */
1537       pool_foreach (local, m->locals,
1538       ({
1539           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1540                             nat_fib_src_low);
1541           if (!out2in_only)
1542             {
1543 init_nat_k(&              kv, local->addr, local->port, local->fib_index, m->proto);
1544               if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0))
1545                 {
1546                   nat_elog_err ("static_mapping_by_local key del failed");
1547                   return VNET_API_ERROR_UNSPECIFIED;
1548                 }
1549             }
1550
1551           if (sm->num_workers > 1)
1552             {
1553               ip4_header_t ip = {
1554                 .src_address = local->addr,
1555               };
1556               tsm = vec_elt_at_index (sm->per_thread_data,
1557                                       sm->worker_in2out_cb (&ip, m->fib_index, 0));
1558             }
1559           else
1560             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1561
1562           /* Delete sessions */
1563           pool_foreach (s, tsm->sessions, {
1564             if (!(is_lb_session (s)))
1565               continue;
1566
1567             if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1568                 s->in2out.port != local->port)
1569               continue;
1570
1571             nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1572             nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1573           });
1574       }));
1575       /* *INDENT-ON* */
1576       if (m->affinity)
1577         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1578       pool_free (m->locals);
1579       vec_free (m->tag);
1580       vec_free (m->workers);
1581
1582       pool_put (sm->static_mappings, m);
1583     }
1584
1585   return 0;
1586 }
1587
1588 int
1589 nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
1590                                        ip4_address_t l_addr, u16 l_port,
1591                                        nat_protocol_t proto, u32 vrf_id,
1592                                        u8 probability, u8 is_add)
1593 {
1594   snat_main_t *sm = &snat_main;
1595   snat_static_mapping_t *m = 0;
1596   clib_bihash_kv_8_8_t kv, value;
1597   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1598   snat_main_per_thread_data_t *tsm;
1599   snat_session_t *s;
1600   u32 *locals = 0;
1601   uword *bitmap = 0;
1602   int i;
1603
1604   if (!sm->endpoint_dependent)
1605     return VNET_API_ERROR_FEATURE_DISABLED;
1606
1607   init_nat_k (&kv, e_addr, e_port, 0, proto);
1608   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1609     m = pool_elt_at_index (sm->static_mappings, value.value);
1610
1611   if (!m)
1612     return VNET_API_ERROR_NO_SUCH_ENTRY;
1613
1614   if (!is_lb_static_mapping (m))
1615     return VNET_API_ERROR_INVALID_VALUE;
1616
1617   /* *INDENT-OFF* */
1618   pool_foreach (local, m->locals,
1619   ({
1620     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1621         (local->vrf_id == vrf_id))
1622       {
1623         match_local = local;
1624         break;
1625       }
1626   }));
1627   /* *INDENT-ON* */
1628
1629   if (is_add)
1630     {
1631       if (match_local)
1632         return VNET_API_ERROR_VALUE_EXIST;
1633
1634       pool_get (m->locals, local);
1635       clib_memset (local, 0, sizeof (*local));
1636       local->addr.as_u32 = l_addr.as_u32;
1637       local->port = l_port;
1638       local->probability = probability;
1639       local->vrf_id = vrf_id;
1640       local->fib_index =
1641         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1642                                            nat_fib_src_low);
1643
1644       if (!is_out2in_only_static_mapping (m))
1645         {
1646           init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto,
1647                        m - sm->static_mappings);
1648           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
1649             nat_elog_err ("static_mapping_by_local key add failed");
1650         }
1651     }
1652   else
1653     {
1654       if (!match_local)
1655         return VNET_API_ERROR_NO_SUCH_ENTRY;
1656
1657       if (pool_elts (m->locals) < 3)
1658         return VNET_API_ERROR_UNSPECIFIED;
1659
1660       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1661                         nat_fib_src_low);
1662
1663       if (!is_out2in_only_static_mapping (m))
1664         {
1665           init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
1666           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
1667             nat_elog_err ("static_mapping_by_local key del failed");
1668         }
1669
1670       if (sm->num_workers > 1)
1671         {
1672           ip4_header_t ip = {
1673             .src_address = local->addr,
1674           };
1675           tsm = vec_elt_at_index (sm->per_thread_data,
1676                                   sm->worker_in2out_cb (&ip, m->fib_index,
1677                                                         0));
1678         }
1679       else
1680         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1681
1682       /* Delete sessions */
1683       /* *INDENT-OFF* */
1684       pool_foreach (s, tsm->sessions, {
1685         if (!(is_lb_session (s)))
1686           continue;
1687
1688         if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1689             s->in2out.port != match_local->port)
1690           continue;
1691
1692         nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1693         nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1694       });
1695       /* *INDENT-ON* */
1696
1697       pool_put (m->locals, match_local);
1698     }
1699
1700   vec_free (m->workers);
1701
1702   /* *INDENT-OFF* */
1703   pool_foreach (local, m->locals,
1704   ({
1705     vec_add1 (locals, local - m->locals);
1706     if (sm->num_workers > 1)
1707       {
1708         ip4_header_t ip;
1709         ip.src_address.as_u32 = local->addr.as_u32,
1710         bitmap = clib_bitmap_set (bitmap,
1711                                   sm->worker_in2out_cb (&ip, local->fib_index, 0),
1712                                   1);
1713       }
1714   }));
1715   /* *INDENT-ON* */
1716
1717   ASSERT (vec_len (locals) > 1);
1718
1719   local = pool_elt_at_index (m->locals, locals[0]);
1720   local->prefix = local->probability;
1721   for (i = 1; i < vec_len (locals); i++)
1722     {
1723       local = pool_elt_at_index (m->locals, locals[i]);
1724       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1725       local->prefix = local->probability + prev_local->prefix;
1726     }
1727
1728   /* Assign workers */
1729   if (sm->num_workers > 1)
1730     {
1731       /* *INDENT-OFF* */
1732       clib_bitmap_foreach (i, bitmap, ({ vec_add1(m->workers, i); }));
1733       /* *INDENT-ON* */
1734     }
1735
1736   return 0;
1737 }
1738
1739 int
1740 snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
1741                   u8 twice_nat)
1742 {
1743   snat_address_t *a = 0;
1744   snat_session_t *ses;
1745   u32 *ses_to_be_removed = 0, *ses_index;
1746   snat_main_per_thread_data_t *tsm;
1747   snat_static_mapping_t *m;
1748   snat_interface_t *interface;
1749   int i;
1750   snat_address_t *addresses =
1751     twice_nat ? sm->twice_nat_addresses : sm->addresses;
1752
1753   /* Find SNAT address */
1754   for (i = 0; i < vec_len (addresses); i++)
1755     {
1756       if (addresses[i].addr.as_u32 == addr.as_u32)
1757         {
1758           a = addresses + i;
1759           break;
1760         }
1761     }
1762   if (!a)
1763     {
1764       nat_log_err ("no such address");
1765       return VNET_API_ERROR_NO_SUCH_ENTRY;
1766     }
1767
1768   if (delete_sm)
1769     {
1770       ip4_address_t pool_addr = { 0 };
1771       /* *INDENT-OFF* */
1772       pool_foreach (m, sm->static_mappings,
1773       ({
1774           if (m->external_addr.as_u32 == addr.as_u32)
1775             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1776                                             m->local_port, m->external_port,
1777                                             m->vrf_id,
1778                                             is_addr_only_static_mapping(m), ~0,
1779                                             m->proto, 0 /* is_add */,
1780                                             m->twice_nat,
1781                                             is_out2in_only_static_mapping(m),
1782                                             m->tag,
1783                                             is_identity_static_mapping(m),
1784                                             pool_addr, 0);
1785       }));
1786       /* *INDENT-ON* */
1787     }
1788   else
1789     {
1790       /* Check if address is used in some static mapping */
1791       if (is_snat_address_used_in_static_mapping (sm, addr))
1792         {
1793           nat_log_err ("address used in static mapping");
1794           return VNET_API_ERROR_UNSPECIFIED;
1795         }
1796     }
1797
1798   if (a->fib_index != ~0)
1799     fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1800
1801   /* Delete sessions using address */
1802   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1803     {
1804       /* *INDENT-OFF* */
1805       vec_foreach (tsm, sm->per_thread_data)
1806         {
1807           pool_foreach (ses, tsm->sessions, ({
1808             if (ses->out2in.addr.as_u32 == addr.as_u32)
1809               {
1810                 nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
1811                 vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1812               }
1813           }));
1814
1815           if (sm->endpoint_dependent){
1816               vec_foreach (ses_index, ses_to_be_removed)
1817                 {
1818                   ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1819                   nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
1820                 }
1821           }else{
1822               vec_foreach (ses_index, ses_to_be_removed)
1823                 {
1824                   ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1825                   nat44_delete_session (sm, ses, tsm - sm->per_thread_data);
1826                 }
1827           }
1828
1829           vec_free (ses_to_be_removed);
1830         }
1831       /* *INDENT-ON* */
1832     }
1833
1834 #define _(N, i, n, s) \
1835   vec_free (a->busy_##n##_ports_per_thread);
1836   foreach_nat_protocol
1837 #undef _
1838     if (twice_nat)
1839     {
1840       vec_del1 (sm->twice_nat_addresses, i);
1841       return 0;
1842     }
1843   else
1844     vec_del1 (sm->addresses, i);
1845
1846   /* Delete external address from FIB */
1847   /* *INDENT-OFF* */
1848   pool_foreach (interface, sm->interfaces,
1849   ({
1850     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1851       continue;
1852
1853     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1854     break;
1855   }));
1856   pool_foreach (interface, sm->output_feature_interfaces,
1857   ({
1858     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1859       continue;
1860
1861     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1862     break;
1863   }));
1864   /* *INDENT-ON* */
1865
1866   return 0;
1867 }
1868
1869 static void
1870 nat_validate_counters (snat_main_t * sm, u32 sw_if_index)
1871 {
1872 #define _(x)                                                                  \
1873   vlib_validate_simple_counter (&sm->counters.fastpath.in2out.x,              \
1874                                 sw_if_index);                                 \
1875   vlib_zero_simple_counter (&sm->counters.fastpath.in2out.x, sw_if_index);    \
1876   vlib_validate_simple_counter (&sm->counters.fastpath.out2in.x,              \
1877                                 sw_if_index);                                 \
1878   vlib_zero_simple_counter (&sm->counters.fastpath.out2in.x, sw_if_index);    \
1879   vlib_validate_simple_counter (&sm->counters.slowpath.in2out.x,              \
1880                                 sw_if_index);                                 \
1881   vlib_zero_simple_counter (&sm->counters.slowpath.in2out.x, sw_if_index);    \
1882   vlib_validate_simple_counter (&sm->counters.slowpath.out2in.x,              \
1883                                 sw_if_index);                                 \
1884   vlib_zero_simple_counter (&sm->counters.slowpath.out2in.x, sw_if_index);    \
1885   vlib_validate_simple_counter (&sm->counters.fastpath.in2out_ed.x,           \
1886                                 sw_if_index);                                 \
1887   vlib_zero_simple_counter (&sm->counters.fastpath.in2out_ed.x, sw_if_index); \
1888   vlib_validate_simple_counter (&sm->counters.fastpath.out2in_ed.x,           \
1889                                 sw_if_index);                                 \
1890   vlib_zero_simple_counter (&sm->counters.fastpath.out2in_ed.x, sw_if_index); \
1891   vlib_validate_simple_counter (&sm->counters.slowpath.in2out_ed.x,           \
1892                                 sw_if_index);                                 \
1893   vlib_zero_simple_counter (&sm->counters.slowpath.in2out_ed.x, sw_if_index); \
1894   vlib_validate_simple_counter (&sm->counters.slowpath.out2in_ed.x,           \
1895                                 sw_if_index);                                 \
1896   vlib_zero_simple_counter (&sm->counters.slowpath.out2in_ed.x, sw_if_index);
1897   foreach_nat_counter;
1898 #undef _
1899   vlib_validate_simple_counter (&sm->counters.hairpinning, sw_if_index);
1900   vlib_zero_simple_counter (&sm->counters.hairpinning, sw_if_index);
1901 }
1902
1903 void
1904 expire_per_vrf_sessions (u32 fib_index)
1905 {
1906   per_vrf_sessions_t *per_vrf_sessions;
1907   snat_main_per_thread_data_t *tsm;
1908   snat_main_t *sm = &snat_main;
1909
1910   /* *INDENT-OFF* */
1911   vec_foreach (tsm, sm->per_thread_data)
1912     {
1913       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1914         {
1915           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1916               (per_vrf_sessions->tx_fib_index == fib_index))
1917             {
1918               per_vrf_sessions->expired = 1;
1919             }
1920         }
1921     }
1922   /* *INDENT-ON* */
1923 }
1924
1925 void
1926 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1927 {
1928   snat_main_t *sm = &snat_main;
1929   nat_fib_t *fib;
1930
1931   // we don't care if it is outside/inside fib
1932   // we just care about their ref_count
1933   // if it reaches 0 sessions should expire
1934   // because the fib isn't valid for NAT anymore
1935
1936   vec_foreach (fib, sm->fibs)
1937   {
1938     if (fib->fib_index == fib_index)
1939       {
1940         if (is_del)
1941           {
1942             fib->ref_count--;
1943             if (!fib->ref_count)
1944               {
1945                 vec_del1 (sm->fibs, fib - sm->fibs);
1946                 expire_per_vrf_sessions (fib_index);
1947               }
1948             return;
1949           }
1950         else
1951           fib->ref_count++;
1952       }
1953   }
1954   if (!is_del)
1955     {
1956       vec_add2 (sm->fibs, fib, 1);
1957       fib->ref_count = 1;
1958       fib->fib_index = fib_index;
1959     }
1960 }
1961
1962 int
1963 snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
1964 {
1965   snat_main_t *sm = &snat_main;
1966   snat_interface_t *i;
1967   const char *feature_name, *del_feature_name;
1968   snat_address_t *ap;
1969   snat_static_mapping_t *m;
1970   nat_outside_fib_t *outside_fib;
1971   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1972                                                        sw_if_index);
1973
1974   if (!sm->enabled)
1975     {
1976       nat_log_err ("nat44 is disabled");
1977       return VNET_API_ERROR_UNSUPPORTED;
1978     }
1979
1980   if (sm->out2in_dpo && !is_inside)
1981     {
1982       nat_log_err ("error unsupported");
1983       return VNET_API_ERROR_UNSUPPORTED;
1984     }
1985
1986   /* *INDENT-OFF* */
1987   pool_foreach (i, sm->output_feature_interfaces,
1988   ({
1989     if (i->sw_if_index == sw_if_index)
1990       {
1991         nat_log_err ("error interface already configured");
1992         return VNET_API_ERROR_VALUE_EXIST;
1993       }
1994   }));
1995   /* *INDENT-ON* */
1996
1997   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1998     feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast";
1999   else
2000     {
2001       if (sm->num_workers > 1)
2002         feature_name =
2003           is_inside ? "nat44-in2out-worker-handoff" :
2004           "nat44-out2in-worker-handoff";
2005       else if (sm->endpoint_dependent)
2006         {
2007           feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
2008         }
2009       else
2010         feature_name = is_inside ? "nat44-in2out" : "nat44-out2in";
2011     }
2012
2013   if (sm->fq_in2out_index == ~0 && sm->num_workers > 1)
2014     sm->fq_in2out_index =
2015       vlib_frame_queue_main_init (sm->in2out_node_index, NAT_FQ_NELTS);
2016
2017   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
2018     sm->fq_out2in_index =
2019       vlib_frame_queue_main_init (sm->out2in_node_index, NAT_FQ_NELTS);
2020
2021   if (sm->endpoint_dependent)
2022     update_per_vrf_sessions_vec (fib_index, is_del);
2023
2024   if (!is_inside)
2025     {
2026       /* *INDENT-OFF* */
2027       vec_foreach (outside_fib, sm->outside_fibs)
2028         {
2029           if (outside_fib->fib_index == fib_index)
2030             {
2031               if (is_del)
2032                 {
2033                   outside_fib->refcount--;
2034                   if (!outside_fib->refcount)
2035                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2036                 }
2037               else
2038                 outside_fib->refcount++;
2039               goto feature_set;
2040             }
2041         }
2042       /* *INDENT-ON* */
2043       if (!is_del)
2044         {
2045           vec_add2 (sm->outside_fibs, outside_fib, 1);
2046           outside_fib->refcount = 1;
2047           outside_fib->fib_index = fib_index;
2048         }
2049     }
2050
2051 feature_set:
2052   /* *INDENT-OFF* */
2053   pool_foreach (i, sm->interfaces,
2054   ({
2055     if (i->sw_if_index == sw_if_index)
2056       {
2057         if (is_del)
2058           {
2059             if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
2060               {
2061                 if (is_inside)
2062                   i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
2063                 else
2064                   i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
2065
2066                 if (sm->num_workers > 1)
2067                   {
2068                     del_feature_name = "nat44-handoff-classify";
2069                     feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
2070                                                  "nat44-out2in-worker-handoff";
2071                   }
2072                 else if (sm->endpoint_dependent)
2073                   {
2074                     del_feature_name = "nat44-ed-classify";
2075                     feature_name = !is_inside ?  "nat-pre-in2out" :
2076                                                  "nat-pre-out2in";
2077                   }
2078                 else
2079                   {
2080                     del_feature_name = "nat44-classify";
2081                     feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
2082                   }
2083
2084                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
2085                 if (rv)
2086                   return rv;
2087                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
2088                                              sw_if_index, 0, 0, 0);
2089                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
2090                                              sw_if_index, 1, 0, 0);
2091                 if (!is_inside)
2092                   {
2093                     if (sm->endpoint_dependent)
2094                       vnet_feature_enable_disable ("ip4-local",
2095                                                    "nat44-ed-hairpinning",
2096                                                    sw_if_index, 1, 0, 0);
2097                     else
2098                       vnet_feature_enable_disable ("ip4-local",
2099                                                    "nat44-hairpinning",
2100                                                    sw_if_index, 1, 0, 0);
2101                   }
2102               }
2103             else
2104               {
2105                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
2106                 if (rv)
2107                   return rv;
2108                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
2109                                              sw_if_index, 0, 0, 0);
2110                 pool_put (sm->interfaces, i);
2111                 if (is_inside)
2112                   {
2113                     if (sm->endpoint_dependent)
2114                       vnet_feature_enable_disable ("ip4-local",
2115                                                    "nat44-ed-hairpinning",
2116                                                    sw_if_index, 0, 0, 0);
2117                     else
2118                       vnet_feature_enable_disable ("ip4-local",
2119                                                    "nat44-hairpinning",
2120                                                    sw_if_index, 0, 0, 0);
2121                   }
2122               }
2123           }
2124         else
2125           {
2126             if ((nat_interface_is_inside(i) && is_inside) ||
2127                 (nat_interface_is_outside(i) && !is_inside))
2128               return 0;
2129
2130             if (sm->num_workers > 1)
2131               {
2132                 del_feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
2133                                                  "nat44-out2in-worker-handoff";
2134                 feature_name = "nat44-handoff-classify";
2135               }
2136             else if (sm->endpoint_dependent)
2137               {
2138                 del_feature_name = !is_inside ?  "nat-pre-in2out" :
2139                                                  "nat-pre-out2in";
2140
2141                 feature_name = "nat44-ed-classify";
2142               }
2143             else
2144               {
2145                 del_feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
2146                 feature_name = "nat44-classify";
2147               }
2148
2149             int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
2150             if (rv)
2151               return rv;
2152             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
2153                                          sw_if_index, 0, 0, 0);
2154             vnet_feature_enable_disable ("ip4-unicast", feature_name,
2155                                          sw_if_index, 1, 0, 0);
2156             if (!is_inside)
2157               {
2158                 if (sm->endpoint_dependent)
2159                   vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
2160                                                sw_if_index, 0, 0, 0);
2161                 else
2162                   vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
2163                                                sw_if_index, 0, 0, 0);
2164               }
2165             goto set_flags;
2166           }
2167
2168         goto fib;
2169       }
2170   }));
2171   /* *INDENT-ON* */
2172
2173   if (is_del)
2174     {
2175       nat_log_err ("error interface couldn't be found");
2176       return VNET_API_ERROR_NO_SUCH_ENTRY;
2177     }
2178
2179   pool_get (sm->interfaces, i);
2180   i->sw_if_index = sw_if_index;
2181   i->flags = 0;
2182   nat_validate_counters (sm, sw_if_index);
2183
2184   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0,
2185                                0);
2186
2187   int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
2188   if (rv)
2189     return rv;
2190
2191   if (is_inside && !sm->out2in_dpo)
2192     {
2193       if (sm->endpoint_dependent)
2194         vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
2195                                      sw_if_index, 1, 0, 0);
2196       else
2197         vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
2198                                      sw_if_index, 1, 0, 0);
2199     }
2200
2201 set_flags:
2202   if (is_inside)
2203     {
2204       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
2205       return 0;
2206     }
2207   else
2208     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
2209
2210   /* Add/delete external addresses to FIB */
2211 fib:
2212   /* *INDENT-OFF* */
2213   vec_foreach (ap, sm->addresses)
2214     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
2215
2216   pool_foreach (m, sm->static_mappings,
2217   ({
2218     if (!(is_addr_only_static_mapping(m)) || (m->local_addr.as_u32 == m->external_addr.as_u32))
2219       continue;
2220
2221     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
2222   }));
2223   /* *INDENT-ON* */
2224
2225   return 0;
2226 }
2227
2228 int
2229 snat_interface_add_del_output_feature (u32 sw_if_index,
2230                                        u8 is_inside, int is_del)
2231 {
2232   snat_main_t *sm = &snat_main;
2233   snat_interface_t *i;
2234   snat_address_t *ap;
2235   snat_static_mapping_t *m;
2236   nat_outside_fib_t *outside_fib;
2237   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2238                                                        sw_if_index);
2239
2240   if (!sm->enabled)
2241     {
2242       nat_log_err ("nat44 is disabled");
2243       return VNET_API_ERROR_UNSUPPORTED;
2244     }
2245
2246   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
2247     {
2248       nat_log_err ("error unsupported");
2249       return VNET_API_ERROR_UNSUPPORTED;
2250     }
2251
2252   /* *INDENT-OFF* */
2253   pool_foreach (i, sm->interfaces,
2254   ({
2255     if (i->sw_if_index == sw_if_index)
2256       {
2257         nat_log_err ("error interface already configured");
2258         return VNET_API_ERROR_VALUE_EXIST;
2259       }
2260   }));
2261   /* *INDENT-ON* */
2262
2263   if (sm->endpoint_dependent)
2264     update_per_vrf_sessions_vec (fib_index, is_del);
2265
2266   if (!is_inside)
2267     {
2268       /* *INDENT-OFF* */
2269       vec_foreach (outside_fib, sm->outside_fibs)
2270         {
2271           if (outside_fib->fib_index == fib_index)
2272             {
2273               if (is_del)
2274                 {
2275                   outside_fib->refcount--;
2276                   if (!outside_fib->refcount)
2277                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2278                 }
2279               else
2280                 outside_fib->refcount++;
2281               goto feature_set;
2282             }
2283         }
2284       /* *INDENT-ON* */
2285       if (!is_del)
2286         {
2287           vec_add2 (sm->outside_fibs, outside_fib, 1);
2288           outside_fib->refcount = 1;
2289           outside_fib->fib_index = fib_index;
2290         }
2291     }
2292
2293 feature_set:
2294   if (is_inside)
2295     {
2296       if (sm->endpoint_dependent)
2297         {
2298           int rv =
2299             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2300           if (rv)
2301             return rv;
2302           rv =
2303             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2304                                                             !is_del);
2305           if (rv)
2306             return rv;
2307           vnet_feature_enable_disable ("ip4-unicast", "nat44-ed-hairpin-dst",
2308                                        sw_if_index, !is_del, 0, 0);
2309           vnet_feature_enable_disable ("ip4-output", "nat44-ed-hairpin-src",
2310                                        sw_if_index, !is_del, 0, 0);
2311         }
2312       else
2313         {
2314           int rv =
2315             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2316           if (rv)
2317             return rv;
2318           rv =
2319             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2320                                                             !is_del);
2321           if (rv)
2322             return rv;
2323           vnet_feature_enable_disable ("ip4-unicast", "nat44-hairpin-dst",
2324                                        sw_if_index, !is_del, 0, 0);
2325           vnet_feature_enable_disable ("ip4-output", "nat44-hairpin-src",
2326                                        sw_if_index, !is_del, 0, 0);
2327         }
2328       goto fq;
2329     }
2330
2331   if (sm->num_workers > 1)
2332     {
2333       int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2334       if (rv)
2335         return rv;
2336       rv =
2337         ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
2338       if (rv)
2339         return rv;
2340       vnet_feature_enable_disable ("ip4-unicast",
2341                                    "nat44-out2in-worker-handoff",
2342                                    sw_if_index, !is_del, 0, 0);
2343       vnet_feature_enable_disable ("ip4-output",
2344                                    "nat44-in2out-output-worker-handoff",
2345                                    sw_if_index, !is_del, 0, 0);
2346     }
2347   else
2348     {
2349       if (sm->endpoint_dependent)
2350         {
2351           int rv =
2352             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2353           if (rv)
2354             return rv;
2355           rv =
2356             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2357                                                             !is_del);
2358           if (rv)
2359             return rv;
2360           vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
2361                                        sw_if_index, !is_del, 0, 0);
2362           vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
2363                                        sw_if_index, !is_del, 0, 0);
2364         }
2365       else
2366         {
2367           int rv =
2368             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2369           if (rv)
2370             return rv;
2371           rv =
2372             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2373                                                             !is_del);
2374           if (rv)
2375             return rv;
2376           vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in",
2377                                        sw_if_index, !is_del, 0, 0);
2378           vnet_feature_enable_disable ("ip4-output", "nat44-in2out-output",
2379                                        sw_if_index, !is_del, 0, 0);
2380         }
2381     }
2382
2383 fq:
2384   if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
2385     sm->fq_in2out_output_index =
2386       vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
2387
2388   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
2389     sm->fq_out2in_index =
2390       vlib_frame_queue_main_init (sm->out2in_node_index, 0);
2391
2392   /* *INDENT-OFF* */
2393   pool_foreach (i, sm->output_feature_interfaces,
2394   ({
2395     if (i->sw_if_index == sw_if_index)
2396       {
2397         if (is_del)
2398           pool_put (sm->output_feature_interfaces, i);
2399         else
2400           return VNET_API_ERROR_VALUE_EXIST;
2401
2402         goto fib;
2403       }
2404   }));
2405   /* *INDENT-ON* */
2406
2407   if (is_del)
2408     {
2409       nat_log_err ("error interface couldn't be found");
2410       return VNET_API_ERROR_NO_SUCH_ENTRY;
2411     }
2412
2413   pool_get (sm->output_feature_interfaces, i);
2414   i->sw_if_index = sw_if_index;
2415   i->flags = 0;
2416   nat_validate_counters (sm, sw_if_index);
2417   if (is_inside)
2418     i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
2419   else
2420     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
2421
2422   /* Add/delete external addresses to FIB */
2423 fib:
2424   if (is_inside)
2425     return 0;
2426
2427   /* *INDENT-OFF* */
2428   vec_foreach (ap, sm->addresses)
2429     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
2430
2431   pool_foreach (m, sm->static_mappings,
2432   ({
2433     if (!((is_addr_only_static_mapping(m)))  || (m->local_addr.as_u32 == m->external_addr.as_u32))
2434       continue;
2435
2436     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
2437   }));
2438   /* *INDENT-ON* */
2439
2440   return 0;
2441 }
2442
2443 int
2444 snat_set_workers (uword * bitmap)
2445 {
2446   snat_main_t *sm = &snat_main;
2447   int i, j = 0;
2448
2449   if (sm->num_workers < 2)
2450     return VNET_API_ERROR_FEATURE_DISABLED;
2451
2452   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
2453     return VNET_API_ERROR_INVALID_WORKER;
2454
2455   vec_free (sm->workers);
2456   /* *INDENT-OFF* */
2457   clib_bitmap_foreach (i, bitmap,
2458     ({
2459       vec_add1(sm->workers, i);
2460       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
2461       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
2462       j++;
2463     }));
2464   /* *INDENT-ON* */
2465
2466   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
2467
2468   return 0;
2469 }
2470
2471 static void
2472 snat_update_outside_fib (ip4_main_t * im, uword opaque,
2473                          u32 sw_if_index, u32 new_fib_index,
2474                          u32 old_fib_index)
2475 {
2476   snat_main_t *sm = &snat_main;
2477   nat_outside_fib_t *outside_fib;
2478   snat_interface_t *i;
2479   u8 is_add = 1;
2480   u8 match = 0;
2481
2482   if (!sm->enabled || (new_fib_index == old_fib_index)
2483       || (!vec_len (sm->outside_fibs)))
2484     {
2485       return;
2486     }
2487
2488   /* *INDENT-OFF* */
2489   pool_foreach (i, sm->interfaces,
2490     ({
2491       if (i->sw_if_index == sw_if_index)
2492         {
2493           if (!(nat_interface_is_outside (i)))
2494             return;
2495           match = 1;
2496         }
2497     }));
2498
2499   pool_foreach (i, sm->output_feature_interfaces,
2500     ({
2501       if (i->sw_if_index == sw_if_index)
2502         {
2503           if (!(nat_interface_is_outside (i)))
2504             return;
2505           match = 1;
2506         }
2507     }));
2508   /* *INDENT-ON* */
2509
2510   if (!match)
2511     return;
2512
2513   vec_foreach (outside_fib, sm->outside_fibs)
2514   {
2515     if (outside_fib->fib_index == old_fib_index)
2516       {
2517         outside_fib->refcount--;
2518         if (!outside_fib->refcount)
2519           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2520         break;
2521       }
2522   }
2523
2524   vec_foreach (outside_fib, sm->outside_fibs)
2525   {
2526     if (outside_fib->fib_index == new_fib_index)
2527       {
2528         outside_fib->refcount++;
2529         is_add = 0;
2530         break;
2531       }
2532   }
2533
2534   if (is_add)
2535     {
2536       vec_add2 (sm->outside_fibs, outside_fib, 1);
2537       outside_fib->refcount = 1;
2538       outside_fib->fib_index = new_fib_index;
2539     }
2540 }
2541
2542 static void
2543 snat_update_outside_fib (ip4_main_t * im, uword opaque,
2544                          u32 sw_if_index, u32 new_fib_index,
2545                          u32 old_fib_index);
2546
2547 static void
2548 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
2549                                        uword opaque,
2550                                        u32 sw_if_index,
2551                                        ip4_address_t * address,
2552                                        u32 address_length,
2553                                        u32 if_address_index, u32 is_delete);
2554
2555 static void
2556 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
2557                                  uword opaque,
2558                                  u32 sw_if_index,
2559                                  ip4_address_t * address,
2560                                  u32 address_length,
2561                                  u32 if_address_index, u32 is_delete);
2562
2563 static int
2564 nat_alloc_addr_and_port_default (snat_address_t * addresses, u32 fib_index,
2565                                  u32 thread_index, nat_protocol_t proto,
2566                                  ip4_address_t * addr, u16 * port,
2567                                  u16 port_per_thread, u32 snat_thread_index);
2568
2569 void
2570 test_key_calc_split ()
2571 {
2572   ip4_address_t l_addr;
2573   l_addr.as_u8[0] = 1;
2574   l_addr.as_u8[1] = 1;
2575   l_addr.as_u8[2] = 1;
2576   l_addr.as_u8[3] = 1;
2577   ip4_address_t r_addr;
2578   r_addr.as_u8[0] = 2;
2579   r_addr.as_u8[1] = 2;
2580   r_addr.as_u8[2] = 2;
2581   r_addr.as_u8[3] = 2;
2582   u16 l_port = 40001;
2583   u16 r_port = 40301;
2584   u8 proto = 9;
2585   u32 fib_index = 9000001;
2586   u32 thread_index = 3000000001;
2587   u32 session_index = 3000000221;
2588   clib_bihash_kv_16_8_t kv;
2589   init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
2590               thread_index, session_index);
2591   ip4_address_t l_addr2;
2592   ip4_address_t r_addr2;
2593   clib_memset (&l_addr2, 0, sizeof (l_addr2));
2594   clib_memset (&r_addr2, 0, sizeof (r_addr2));
2595   u16 l_port2 = 0;
2596   u16 r_port2 = 0;
2597   u8 proto2 = 0;
2598   u32 fib_index2 = 0;
2599   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
2600                &r_port2);
2601   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2602   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
2603   ASSERT (l_port == l_port2);
2604   ASSERT (r_port == r_port2);
2605   ASSERT (proto == proto2);
2606   ASSERT (fib_index == fib_index2);
2607   ASSERT (thread_index == ed_value_get_thread_index (&kv));
2608   ASSERT (session_index == ed_value_get_session_index (&kv));
2609
2610   fib_index = 7001;
2611   proto = 5;
2612   nat_protocol_t proto3 = ~0;
2613   u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
2614   split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
2615   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2616   ASSERT (l_port == l_port2);
2617   ASSERT (proto == proto3);
2618   ASSERT (fib_index == fib_index2);
2619 }
2620
2621 static clib_error_t *
2622 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
2623 {
2624   snat_main_t *sm = &snat_main;
2625   u32 fib_index;
2626
2627   if (sm->endpoint_dependent)
2628     {
2629       // TODO: consider removing all NAT interfaces
2630
2631       if (!is_add)
2632         {
2633           fib_index = ip4_fib_index_from_table_id (table_id);
2634           if (fib_index != ~0)
2635             expire_per_vrf_sessions (fib_index);
2636         }
2637     }
2638   return 0;
2639 }
2640
2641 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
2642
2643 void
2644 nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm)
2645 {
2646   vlib_node_t *node;
2647
2648   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in");
2649   sm->ei_out2in_node_index = node->index;
2650   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out");
2651   sm->ei_in2out_node_index = node->index;
2652   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output");
2653   sm->ei_in2out_output_node_index = node->index;
2654
2655   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
2656   sm->ed_out2in_node_index = node->index;
2657   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
2658   sm->ed_in2out_node_index = node->index;
2659   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
2660   sm->ed_in2out_output_node_index = node->index;
2661
2662   node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
2663   sm->error_node_index = node->index;
2664   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-in2out");
2665   sm->pre_in2out_node_index = node->index;
2666   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-out2in");
2667   sm->pre_out2in_node_index = node->index;
2668   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-in2out");
2669   sm->pre_in2out_node_index = node->index;
2670   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-out2in");
2671   sm->pre_out2in_node_index = node->index;
2672   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-fast");
2673   sm->in2out_fast_node_index = node->index;
2674   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-slowpath");
2675   sm->in2out_slowpath_node_index = node->index;
2676   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output-slowpath");
2677   sm->in2out_slowpath_output_node_index = node->index;
2678   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-slowpath");
2679   sm->ed_in2out_slowpath_node_index = node->index;
2680   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in-fast");
2681   sm->out2in_fast_node_index = node->index;
2682   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in-slowpath");
2683   sm->ed_out2in_slowpath_node_index = node->index;
2684   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpinning");
2685   sm->hairpinning_node_index = node->index;
2686   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-dst");
2687   sm->hairpin_dst_node_index = node->index;
2688   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-src");
2689   sm->hairpin_src_node_index = node->index;
2690   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpinning");
2691   sm->ed_hairpinning_node_index = node->index;
2692   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-dst");
2693   sm->ed_hairpin_dst_node_index = node->index;
2694   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-src");
2695   sm->ed_hairpin_src_node_index = node->index;
2696 }
2697
2698 #define nat_init_simple_counter(c, n, sn) \
2699 do                                        \
2700   {                                       \
2701     c.name = n;                           \
2702     c.stat_segment_name = sn;             \
2703     vlib_validate_simple_counter (&c, 0); \
2704     vlib_zero_simple_counter (&c, 0);     \
2705   } while (0);
2706
2707 static clib_error_t *
2708 nat_init (vlib_main_t * vm)
2709 {
2710   snat_main_t *sm = &snat_main;
2711   clib_error_t *error = 0;
2712   vlib_thread_main_t *tm = vlib_get_thread_main ();
2713   vlib_thread_registration_t *tr;
2714   ip4_add_del_interface_address_callback_t cbi = { 0 };
2715   ip4_table_bind_callback_t cbt = { 0 };
2716   u32 i, num_threads = 0;
2717   uword *p, *bitmap = 0;
2718
2719   clib_memset (sm, 0, sizeof (*sm));
2720
2721   // required
2722   sm->vnet_main = vnet_get_main ();
2723   // convenience
2724   sm->ip4_main = &ip4_main;
2725   sm->api_main = vlibapi_get_main ();
2726   sm->ip4_lookup_main = &ip4_main.lookup_main;
2727
2728   // frame queue indices used for handoff
2729   sm->fq_out2in_index = ~0;
2730   sm->fq_in2out_index = ~0;
2731   sm->fq_in2out_output_index = ~0;
2732
2733   sm->log_level = SNAT_LOG_ERROR;
2734
2735   nat44_set_node_indexes (sm, vm);
2736   sm->log_class = vlib_log_register_class ("nat", 0);
2737   nat_ipfix_logging_init (vm);
2738
2739   nat_init_simple_counter (sm->total_users, "total-users",
2740                            "/nat44/total-users");
2741   nat_init_simple_counter (sm->total_sessions, "total-sessions",
2742                            "/nat44/total-sessions");
2743   nat_init_simple_counter (sm->user_limit_reached, "user-limit-reached",
2744                            "/nat44/user-limit-reached");
2745
2746 #define _(x)                                            \
2747   sm->counters.fastpath.in2out.x.name = #x;             \
2748   sm->counters.fastpath.in2out.x.stat_segment_name =    \
2749       "/nat44/in2out/fastpath/" #x;                     \
2750   sm->counters.slowpath.in2out.x.name = #x;             \
2751   sm->counters.slowpath.in2out.x.stat_segment_name =    \
2752       "/nat44/in2out/slowpath/" #x;                     \
2753   sm->counters.fastpath.out2in.x.name = #x;             \
2754   sm->counters.fastpath.out2in.x.stat_segment_name =    \
2755       "/nat44/out2in/fastpath/" #x;                     \
2756   sm->counters.slowpath.out2in.x.name = #x;             \
2757   sm->counters.slowpath.out2in.x.stat_segment_name =    \
2758       "/nat44/out2in/slowpath/" #x;                     \
2759   sm->counters.fastpath.in2out_ed.x.name = #x;          \
2760   sm->counters.fastpath.in2out_ed.x.stat_segment_name = \
2761       "/nat44/ed/in2out/fastpath/" #x;                  \
2762   sm->counters.slowpath.in2out_ed.x.name = #x;          \
2763   sm->counters.slowpath.in2out_ed.x.stat_segment_name = \
2764       "/nat44/ed/in2out/slowpath/" #x;                  \
2765   sm->counters.fastpath.out2in_ed.x.name = #x;          \
2766   sm->counters.fastpath.out2in_ed.x.stat_segment_name = \
2767       "/nat44/ed/out2in/fastpath/" #x;                  \
2768   sm->counters.slowpath.out2in_ed.x.name = #x;          \
2769   sm->counters.slowpath.out2in_ed.x.stat_segment_name = \
2770       "/nat44/ed/out2in/slowpath/" #x;
2771   foreach_nat_counter;
2772 #undef _
2773   sm->counters.hairpinning.name = "hairpinning";
2774   sm->counters.hairpinning.stat_segment_name = "/nat44/hairpinning";
2775
2776   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2777   if (p)
2778     {
2779       tr = (vlib_thread_registration_t *) p[0];
2780       if (tr)
2781         {
2782           sm->num_workers = tr->count;
2783           sm->first_worker_index = tr->first_index;
2784         }
2785     }
2786   num_threads = tm->n_vlib_mains - 1;
2787   sm->port_per_thread = 0xffff - 1024;
2788   vec_validate (sm->per_thread_data, num_threads);
2789
2790   /* Use all available workers by default */
2791   if (sm->num_workers > 1)
2792     {
2793
2794       for (i = 0; i < sm->num_workers; i++)
2795         bitmap = clib_bitmap_set (bitmap, i, 1);
2796       snat_set_workers (bitmap);
2797       clib_bitmap_free (bitmap);
2798     }
2799   else
2800     sm->per_thread_data[0].snat_thread_index = 0;
2801
2802   /* callbacks to call when interface address changes. */
2803   cbi.function = snat_ip4_add_del_interface_address_cb;
2804   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2805   cbi.function = nat_ip4_add_del_addr_only_sm_cb;
2806   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2807
2808   /* callbacks to call when interface to table biding changes */
2809   cbt.function = snat_update_outside_fib;
2810   vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
2811
2812   // TODO: is it possible to move it into snat_main ?
2813   nat_fib_src_low =
2814     fib_source_allocate ("nat-low", FIB_SOURCE_PRIORITY_LOW,
2815                          FIB_SOURCE_BH_SIMPLE);
2816   nat_fib_src_hi =
2817     fib_source_allocate ("nat-hi", FIB_SOURCE_PRIORITY_HI,
2818                          FIB_SOURCE_BH_SIMPLE);
2819
2820   /* used only by out2in-dpo feature */
2821   nat_dpo_module_init ();
2822
2823   nat_affinity_init (vm);
2824   nat_ha_init (vm, sm->num_workers, num_threads);
2825
2826   test_key_calc_split ();
2827   error = snat_api_init (vm, sm);
2828   return error;
2829 }
2830
2831 VLIB_INIT_FUNCTION (nat_init);
2832
2833 int
2834 nat44_plugin_enable (nat44_config_t c)
2835 {
2836   snat_main_t *sm = &snat_main;
2837   u32 static_mapping_buckets = 1024;
2838   u32 static_mapping_memory_size = 64 << 20;
2839
2840   if (sm->enabled)
2841     {
2842       nat_log_err ("nat44 is enabled");
2843       return 1;
2844     }
2845
2846   // c.static_mapping_only + c.connection_tracking
2847   //  - supported in NAT EI & NAT ED
2848   // c.out2in_dpo, c.static_mapping_only
2849   //  - supported in NAT EI
2850
2851   if (c.endpoint_dependent)
2852     {
2853       if ((c.static_mapping_only && !c.connection_tracking) || c.out2in_dpo)
2854         {
2855           nat_log_err ("unsupported combination of configuration");
2856           return 1;
2857         }
2858       if (c.users || c.user_sessions)
2859         {
2860           nat_log_err ("unsupported combination of configuration");
2861           return 1;
2862         }
2863     }
2864
2865   // reset to defaults:
2866   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
2867   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
2868   //
2869   sm->udp_timeout = SNAT_UDP_TIMEOUT;
2870   sm->icmp_timeout = SNAT_ICMP_TIMEOUT;
2871   sm->tcp_transitory_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
2872   sm->tcp_established_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
2873
2874   // nat44 feature configuration
2875   sm->endpoint_dependent = c.endpoint_dependent;
2876   sm->static_mapping_only = c.static_mapping_only;
2877   sm->static_mapping_connection_tracking = c.connection_tracking;
2878   sm->forwarding_enabled = 0;
2879   sm->mss_clamping = 0;
2880
2881   if (!c.users)
2882     {
2883       c.users = 1024;
2884     }
2885   sm->max_users_per_thread = c.users;
2886   sm->user_buckets = nat_calc_bihash_buckets (c.users);
2887
2888   if (!c.user_memory)
2889     {
2890       c.user_memory =
2891         nat_calc_bihash_memory (c.users, sizeof (clib_bihash_8_8_t));
2892     }
2893   sm->user_memory_size = c.user_memory;
2894
2895   if (!c.sessions)
2896     {
2897       // default value based on legacy setting of load factor 10 * default
2898       // translation buckets 1024
2899       c.sessions = 10 * 1024;
2900     }
2901   sm->max_translations_per_thread = c.sessions;
2902   sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
2903
2904   if (!c.session_memory)
2905     {
2906       c.session_memory =
2907         nat_calc_bihash_memory
2908         (sm->translation_buckets, sizeof (clib_bihash_16_8_t));
2909     }
2910   sm->translation_memory_size = c.session_memory;
2911   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
2912   sm->max_translations_per_user
2913     = c.user_sessions ? c.user_sessions : sm->max_translations_per_thread;
2914
2915   sm->outside_vrf_id = c.outside_vrf;
2916   sm->outside_fib_index =
2917     fib_table_find_or_create_and_lock
2918     (FIB_PROTOCOL_IP4, c.outside_vrf, nat_fib_src_hi);
2919
2920   sm->inside_vrf_id = c.inside_vrf;
2921   sm->inside_fib_index =
2922     fib_table_find_or_create_and_lock
2923     (FIB_PROTOCOL_IP4, c.inside_vrf, nat_fib_src_hi);
2924
2925   if (c.endpoint_dependent)
2926     {
2927       sm->worker_out2in_cb = nat44_ed_get_worker_out2in_cb;
2928       sm->worker_in2out_cb = nat44_ed_get_worker_in2out_cb;
2929       sm->out2in_node_index = sm->ed_out2in_node_index;
2930       sm->in2out_node_index = sm->ed_in2out_node_index;
2931       sm->in2out_output_node_index = sm->ed_in2out_output_node_index;
2932       sm->icmp_match_out2in_cb = icmp_match_out2in_ed;
2933       sm->icmp_match_in2out_cb = icmp_match_in2out_ed;
2934
2935       clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
2936                              sm->translation_buckets,
2937                              sm->translation_memory_size);
2938       clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed,
2939                                           format_ed_session_kvp);
2940
2941
2942       nat_affinity_enable ();
2943
2944       nat_ha_enable (nat_ha_sadd_ed_cb, nat_ha_sdel_ed_cb, nat_ha_sref_ed_cb);
2945     }
2946   else
2947     {
2948       sm->worker_out2in_cb = snat_get_worker_out2in_cb;
2949       sm->worker_in2out_cb = snat_get_worker_in2out_cb;
2950       sm->out2in_node_index = sm->ei_out2in_node_index;
2951       sm->in2out_node_index = sm->ei_in2out_node_index;
2952       sm->in2out_output_node_index = sm->ei_in2out_output_node_index;
2953       sm->icmp_match_out2in_cb = icmp_match_out2in_slow;
2954       sm->icmp_match_in2out_cb = icmp_match_in2out_slow;
2955
2956       nat_ha_enable (nat_ha_sadd_cb, nat_ha_sdel_cb, nat_ha_sref_cb);
2957     }
2958
2959   // c.static_mapping & c.connection_tracking require
2960   // session database
2961   if (!c.static_mapping_only
2962       || (c.static_mapping_only && c.connection_tracking))
2963     {
2964       snat_main_per_thread_data_t *tsm;
2965       /* *INDENT-OFF* */
2966       vec_foreach (tsm, sm->per_thread_data)
2967         {
2968           nat44_db_init (tsm);
2969         }
2970       /* *INDENT-ON* */
2971     }
2972   else
2973     {
2974       sm->icmp_match_in2out_cb = icmp_match_in2out_fast;
2975       sm->icmp_match_out2in_cb = icmp_match_out2in_fast;
2976     }
2977
2978   clib_bihash_init_8_8 (&sm->static_mapping_by_local,
2979                         "static_mapping_by_local", static_mapping_buckets,
2980                         static_mapping_memory_size);
2981   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
2982                                      format_static_mapping_kvp);
2983
2984   clib_bihash_init_8_8 (&sm->static_mapping_by_external,
2985                         "static_mapping_by_external",
2986                         static_mapping_buckets, static_mapping_memory_size);
2987   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
2988                                      format_static_mapping_kvp);
2989
2990   // last: reset counters
2991   vlib_zero_simple_counter (&sm->total_users, 0);
2992   vlib_zero_simple_counter (&sm->total_sessions, 0);
2993   vlib_zero_simple_counter (&sm->user_limit_reached, 0);
2994
2995   sm->enabled = 1;
2996
2997
2998   nat_log_err ("nat44 enable");
2999
3000   return 0;
3001 }
3002
3003 void
3004 nat44_addresses_free (snat_address_t ** addresses)
3005 {
3006   snat_address_t *ap;
3007   /* *INDENT-OFF* */
3008   vec_foreach (ap, *addresses)
3009     {
3010     #define _(N, i, n, s) \
3011       vec_free (ap->busy_##n##_ports_per_thread);
3012       foreach_nat_protocol
3013     #undef _
3014     }
3015   /* *INDENT-ON* */
3016   vec_free (*addresses);
3017   *addresses = 0;
3018 }
3019
3020 int
3021 nat44_plugin_disable ()
3022 {
3023   snat_main_t *sm = &snat_main;
3024   snat_interface_t *i, *vec;
3025   int error = 0;
3026
3027   if (!sm->enabled)
3028     {
3029       nat_log_err ("nat44 is disabled");
3030       return 1;
3031     }
3032
3033   // first unregister all nodes from interfaces
3034   vec = vec_dup (sm->interfaces);
3035   /* *INDENT-OFF* */
3036   vec_foreach (i, vec)
3037     {
3038       if (nat_interface_is_inside(i))
3039         error = snat_interface_add_del (i->sw_if_index, 1, 1);
3040       if (nat_interface_is_outside(i))
3041         error = snat_interface_add_del (i->sw_if_index, 0, 1);
3042
3043       if (error)
3044         {
3045           nat_log_err ("error occurred while removing interface %u",
3046                        i->sw_if_index);
3047         }
3048     }
3049   /* *INDENT-ON* */
3050   vec_free (vec);
3051   sm->interfaces = 0;
3052
3053   vec = vec_dup (sm->output_feature_interfaces);
3054   /* *INDENT-OFF* */
3055   vec_foreach (i, vec)
3056     {
3057       if (nat_interface_is_inside(i))
3058         error = snat_interface_add_del_output_feature (i->sw_if_index, 1, 1);
3059       if (nat_interface_is_outside(i))
3060         error = snat_interface_add_del_output_feature (i->sw_if_index, 0, 1);
3061
3062       if (error)
3063         {
3064           nat_log_err ("error occurred while removing interface %u",
3065                        i->sw_if_index);
3066         }
3067     }
3068   /* *INDENT-ON* */
3069   vec_free (vec);
3070   sm->output_feature_interfaces = 0;
3071
3072   vec_free (sm->max_translations_per_fib);
3073
3074   if (sm->endpoint_dependent)
3075     {
3076       nat_affinity_disable ();
3077       clib_bihash_free_16_8 (&sm->out2in_ed);
3078     }
3079
3080   clib_bihash_free_8_8 (&sm->static_mapping_by_local);
3081   clib_bihash_free_8_8 (&sm->static_mapping_by_external);
3082
3083   if (!sm->static_mapping_only ||
3084       (sm->static_mapping_only && sm->static_mapping_connection_tracking))
3085     {
3086       snat_main_per_thread_data_t *tsm;
3087      /* *INDENT-OFF* */
3088       vec_foreach (tsm, sm->per_thread_data)
3089         {
3090           nat44_db_free (tsm);
3091         }
3092       /* *INDENT-ON* */
3093     }
3094
3095   pool_free (sm->static_mappings);
3096
3097   nat44_addresses_free (&sm->addresses);
3098   nat44_addresses_free (&sm->twice_nat_addresses);
3099
3100
3101   vec_free (sm->to_resolve);
3102   vec_free (sm->auto_add_sw_if_indices);
3103   vec_free (sm->auto_add_sw_if_indices_twice_nat);
3104
3105   sm->to_resolve = 0;
3106   sm->auto_add_sw_if_indices = 0;
3107   sm->auto_add_sw_if_indices_twice_nat = 0;
3108
3109   sm->forwarding_enabled = 0;
3110
3111   sm->enabled = 0;
3112
3113   return 0;
3114 }
3115
3116 void
3117 snat_free_outside_address_and_port (snat_address_t * addresses,
3118                                     u32 thread_index,
3119                                     ip4_address_t * addr,
3120                                     u16 port, nat_protocol_t protocol)
3121 {
3122   snat_address_t *a;
3123   u32 address_index;
3124   u16 port_host_byte_order = clib_net_to_host_u16 (port);
3125
3126   for (address_index = 0; address_index < vec_len (addresses);
3127        address_index++)
3128     {
3129       if (addresses[address_index].addr.as_u32 == addr->as_u32)
3130         break;
3131     }
3132
3133   ASSERT (address_index < vec_len (addresses));
3134
3135   a = addresses + address_index;
3136
3137   switch (protocol)
3138     {
3139 #define _(N, i, n, s) \
3140     case NAT_PROTOCOL_##N: \
3141       ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
3142       --a->busy_##n##_port_refcounts[port_host_byte_order]; \
3143       a->busy_##n##_ports--; \
3144       a->busy_##n##_ports_per_thread[thread_index]--; \
3145       break;
3146       foreach_nat_protocol
3147 #undef _
3148     default:
3149       nat_elog_info ("unknown protocol");
3150       return;
3151     }
3152 }
3153
3154 static int
3155 nat_set_outside_address_and_port (snat_address_t * addresses,
3156                                   u32 thread_index, ip4_address_t addr,
3157                                   u16 port, nat_protocol_t protocol)
3158 {
3159   snat_address_t *a = 0;
3160   u32 address_index;
3161   u16 port_host_byte_order = clib_net_to_host_u16 (port);
3162
3163   for (address_index = 0; address_index < vec_len (addresses);
3164        address_index++)
3165     {
3166       if (addresses[address_index].addr.as_u32 != addr.as_u32)
3167         continue;
3168
3169       a = addresses + address_index;
3170       switch (protocol)
3171         {
3172 #define _(N, j, n, s) \
3173         case NAT_PROTOCOL_##N: \
3174           if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
3175             return VNET_API_ERROR_INSTANCE_IN_USE; \
3176           ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
3177           a->busy_##n##_ports_per_thread[thread_index]++; \
3178           a->busy_##n##_ports++; \
3179           return 0;
3180           foreach_nat_protocol
3181 #undef _
3182         default:
3183           nat_elog_info ("unknown protocol");
3184           return 1;
3185         }
3186     }
3187
3188   return VNET_API_ERROR_NO_SUCH_ENTRY;
3189 }
3190
3191 int
3192 snat_static_mapping_match (snat_main_t * sm,
3193                            ip4_address_t match_addr,
3194                            u16 match_port,
3195                            u32 match_fib_index,
3196                            nat_protocol_t match_protocol,
3197                            ip4_address_t * mapping_addr,
3198                            u16 * mapping_port,
3199                            u32 * mapping_fib_index,
3200                            u8 by_external,
3201                            u8 * is_addr_only,
3202                            twice_nat_type_t * twice_nat,
3203                            lb_nat_type_t * lb, ip4_address_t * ext_host_addr,
3204                            u8 * is_identity_nat, snat_static_mapping_t ** out)
3205 {
3206   clib_bihash_kv_8_8_t kv, value;
3207   clib_bihash_8_8_t *mapping_hash;
3208   snat_static_mapping_t *m;
3209   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
3210   nat44_lb_addr_port_t *local;
3211   u8 backend_index;
3212
3213   if (!by_external)
3214     {
3215       mapping_hash = &sm->static_mapping_by_local;
3216       init_nat_k (&kv, match_addr, match_port, match_fib_index,
3217                   match_protocol);
3218       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
3219         {
3220           /* Try address only mapping */
3221           init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
3222           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
3223             return 1;
3224         }
3225     }
3226   else
3227     {
3228       mapping_hash = &sm->static_mapping_by_external;
3229       init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
3230       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
3231         {
3232           /* Try address only mapping */
3233           init_nat_k (&kv, match_addr, 0, 0, 0);
3234           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
3235             return 1;
3236         }
3237     }
3238
3239   m = pool_elt_at_index (sm->static_mappings, value.value);
3240
3241   if (by_external)
3242     {
3243       if (is_lb_static_mapping (m))
3244         {
3245           if (PREDICT_FALSE (lb != 0))
3246             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
3247           if (m->affinity && !nat_affinity_find_and_lock (ext_host_addr[0],
3248                                                           match_addr,
3249                                                           match_protocol,
3250                                                           match_port,
3251                                                           &backend_index))
3252             {
3253               local = pool_elt_at_index (m->locals, backend_index);
3254               *mapping_addr = local->addr;
3255               *mapping_port = local->port;
3256               *mapping_fib_index = local->fib_index;
3257               goto end;
3258             }
3259           // pick locals matching this worker
3260           if (PREDICT_FALSE (sm->num_workers > 1))
3261             {
3262               u32 thread_index = vlib_get_thread_index ();
3263               /* *INDENT-OFF* */
3264               pool_foreach_index (i, m->locals,
3265               ({
3266                 local = pool_elt_at_index (m->locals, i);
3267
3268                 ip4_header_t ip = {
3269                   .src_address = local->addr,
3270                 };
3271
3272                 if (sm->worker_in2out_cb (&ip, m->fib_index, 0) ==
3273                     thread_index)
3274                   {
3275                     vec_add1 (tmp, i);
3276                   }
3277               }));
3278               /* *INDENT-ON* */
3279               ASSERT (vec_len (tmp) != 0);
3280             }
3281           else
3282             {
3283               /* *INDENT-OFF* */
3284               pool_foreach_index (i, m->locals,
3285               ({
3286                 vec_add1 (tmp, i);
3287               }));
3288               /* *INDENT-ON* */
3289             }
3290           hi = vec_len (tmp) - 1;
3291           local = pool_elt_at_index (m->locals, tmp[hi]);
3292           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
3293           while (lo < hi)
3294             {
3295               mid = ((hi - lo) >> 1) + lo;
3296               local = pool_elt_at_index (m->locals, tmp[mid]);
3297               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
3298             }
3299           local = pool_elt_at_index (m->locals, tmp[lo]);
3300           if (!(local->prefix >= rand))
3301             return 1;
3302           *mapping_addr = local->addr;
3303           *mapping_port = local->port;
3304           *mapping_fib_index = local->fib_index;
3305           if (m->affinity)
3306             {
3307               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
3308                                                 match_protocol, match_port,
3309                                                 tmp[lo], m->affinity,
3310                                                 m->affinity_per_service_list_head_index))
3311                 nat_elog_info ("create affinity record failed");
3312             }
3313           vec_free (tmp);
3314         }
3315       else
3316         {
3317           if (PREDICT_FALSE (lb != 0))
3318             *lb = NO_LB_NAT;
3319           *mapping_fib_index = m->fib_index;
3320           *mapping_addr = m->local_addr;
3321           /* Address only mapping doesn't change port */
3322           *mapping_port = is_addr_only_static_mapping (m) ? match_port
3323             : m->local_port;
3324         }
3325     }
3326   else
3327     {
3328       *mapping_addr = m->external_addr;
3329       /* Address only mapping doesn't change port */
3330       *mapping_port = is_addr_only_static_mapping (m) ? match_port
3331         : m->external_port;
3332       *mapping_fib_index = sm->outside_fib_index;
3333     }
3334
3335 end:
3336   if (PREDICT_FALSE (is_addr_only != 0))
3337     *is_addr_only = is_addr_only_static_mapping (m);
3338
3339   if (PREDICT_FALSE (twice_nat != 0))
3340     *twice_nat = m->twice_nat;
3341
3342   if (PREDICT_FALSE (is_identity_nat != 0))
3343     *is_identity_nat = is_identity_static_mapping (m);
3344
3345   if (out != 0)
3346     *out = m;
3347
3348   return 0;
3349 }
3350
3351 int
3352 snat_alloc_outside_address_and_port (snat_address_t * addresses,
3353                                      u32 fib_index,
3354                                      u32 thread_index,
3355                                      nat_protocol_t proto,
3356                                      ip4_address_t * addr,
3357                                      u16 * port,
3358                                      u16 port_per_thread,
3359                                      u32 snat_thread_index)
3360 {
3361   snat_main_t *sm = &snat_main;
3362
3363   return sm->alloc_addr_and_port (addresses, fib_index, thread_index, proto,
3364                                   addr, port, port_per_thread,
3365                                   snat_thread_index);
3366 }
3367
3368 static int
3369 nat_alloc_addr_and_port_default (snat_address_t * addresses,
3370                                  u32 fib_index,
3371                                  u32 thread_index,
3372                                  nat_protocol_t proto,
3373                                  ip4_address_t * addr,
3374                                  u16 * port,
3375                                  u16 port_per_thread, u32 snat_thread_index)
3376 {
3377   int i;
3378   snat_address_t *a, *ga = 0;
3379   u32 portnum;
3380
3381   for (i = 0; i < vec_len (addresses); i++)
3382     {
3383       a = addresses + i;
3384       switch (proto)
3385         {
3386 #define _(N, j, n, s) \
3387         case NAT_PROTOCOL_##N: \
3388           if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
3389             { \
3390               if (a->fib_index == fib_index) \
3391                 { \
3392                   while (1) \
3393                     { \
3394                       portnum = (port_per_thread * \
3395                         snat_thread_index) + \
3396                         snat_random_port(0, port_per_thread - 1) + 1024; \
3397                       if (a->busy_##n##_port_refcounts[portnum]) \
3398                         continue; \
3399                       --a->busy_##n##_port_refcounts[portnum]; \
3400                       a->busy_##n##_ports_per_thread[thread_index]++; \
3401                       a->busy_##n##_ports++; \
3402                       *addr = a->addr; \
3403                       *port = clib_host_to_net_u16(portnum); \
3404                       return 0; \
3405                     } \
3406                 } \
3407               else if (a->fib_index == ~0) \
3408                 { \
3409                   ga = a; \
3410                 } \
3411             } \
3412           break;
3413           foreach_nat_protocol
3414 #undef _
3415         default:
3416           nat_elog_info ("unknown protocol");
3417           return 1;
3418         }
3419
3420     }
3421
3422   if (ga)
3423     {
3424       a = ga;
3425       switch (proto)
3426         {
3427 #define _(N, j, n, s) \
3428         case NAT_PROTOCOL_##N: \
3429           while (1) \
3430             { \
3431               portnum = (port_per_thread * \
3432                 snat_thread_index) + \
3433                 snat_random_port(0, port_per_thread - 1) + 1024; \
3434               if (a->busy_##n##_port_refcounts[portnum]) \
3435                 continue; \
3436               ++a->busy_##n##_port_refcounts[portnum]; \
3437               a->busy_##n##_ports_per_thread[thread_index]++; \
3438               a->busy_##n##_ports++; \
3439               *addr = a->addr; \
3440               *port = clib_host_to_net_u16(portnum); \
3441               return 0; \
3442             }
3443           break;
3444           foreach_nat_protocol
3445 #undef _
3446         default:
3447           nat_elog_info ("unknown protocol");
3448           return 1;
3449         }
3450     }
3451
3452   /* Totally out of translations to use... */
3453   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
3454   return 1;
3455 }
3456
3457 static int
3458 nat_alloc_addr_and_port_mape (snat_address_t * addresses, u32 fib_index,
3459                               u32 thread_index, nat_protocol_t proto,
3460                               ip4_address_t * addr, u16 * port,
3461                               u16 port_per_thread, u32 snat_thread_index)
3462 {
3463   snat_main_t *sm = &snat_main;
3464   snat_address_t *a = addresses;
3465   u16 m, ports, portnum, A, j;
3466   m = 16 - (sm->psid_offset + sm->psid_length);
3467   ports = (1 << (16 - sm->psid_length)) - (1 << m);
3468
3469   if (!vec_len (addresses))
3470     goto exhausted;
3471
3472   switch (proto)
3473     {
3474 #define _(N, i, n, s) \
3475     case NAT_PROTOCOL_##N: \
3476       if (a->busy_##n##_ports < ports) \
3477         { \
3478           while (1) \
3479             { \
3480               A = snat_random_port(1, pow2_mask(sm->psid_offset)); \
3481               j = snat_random_port(0, pow2_mask(m)); \
3482               portnum = A | (sm->psid << sm->psid_offset) | (j << (16 - m)); \
3483               if (a->busy_##n##_port_refcounts[portnum]) \
3484                 continue; \
3485               ++a->busy_##n##_port_refcounts[portnum]; \
3486               a->busy_##n##_ports++; \
3487               *addr = a->addr; \
3488               *port = clib_host_to_net_u16 (portnum); \
3489               return 0; \
3490             } \
3491         } \
3492       break;
3493       foreach_nat_protocol
3494 #undef _
3495     default:
3496       nat_elog_info ("unknown protocol");
3497       return 1;
3498     }
3499
3500 exhausted:
3501   /* Totally out of translations to use... */
3502   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
3503   return 1;
3504 }
3505
3506 static int
3507 nat_alloc_addr_and_port_range (snat_address_t * addresses, u32 fib_index,
3508                                u32 thread_index, nat_protocol_t proto,
3509                                ip4_address_t * addr, u16 * port,
3510                                u16 port_per_thread, u32 snat_thread_index)
3511 {
3512   snat_main_t *sm = &snat_main;
3513   snat_address_t *a = addresses;
3514   u16 portnum, ports;
3515
3516   ports = sm->end_port - sm->start_port + 1;
3517
3518   if (!vec_len (addresses))
3519     goto exhausted;
3520
3521   switch (proto)
3522     {
3523 #define _(N, i, n, s) \
3524     case NAT_PROTOCOL_##N: \
3525       if (a->busy_##n##_ports < ports) \
3526         { \
3527           while (1) \
3528             { \
3529               portnum = snat_random_port(sm->start_port, sm->end_port); \
3530               if (a->busy_##n##_port_refcounts[portnum]) \
3531                 continue; \
3532               ++a->busy_##n##_port_refcounts[portnum]; \
3533               a->busy_##n##_ports++; \
3534               *addr = a->addr; \
3535               *port = clib_host_to_net_u16 (portnum); \
3536               return 0; \
3537             } \
3538         } \
3539       break;
3540       foreach_nat_protocol
3541 #undef _
3542     default:
3543       nat_elog_info ("unknown protocol");
3544       return 1;
3545     }
3546
3547 exhausted:
3548   /* Totally out of translations to use... */
3549   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
3550   return 1;
3551 }
3552
3553 void
3554 nat44_add_del_address_dpo (ip4_address_t addr, u8 is_add)
3555 {
3556   dpo_id_t dpo_v4 = DPO_INVALID;
3557   fib_prefix_t pfx = {
3558     .fp_proto = FIB_PROTOCOL_IP4,
3559     .fp_len = 32,
3560     .fp_addr.ip4.as_u32 = addr.as_u32,
3561   };
3562
3563   if (is_add)
3564     {
3565       nat_dpo_create (DPO_PROTO_IP4, 0, &dpo_v4);
3566       fib_table_entry_special_dpo_add (0, &pfx, nat_fib_src_hi,
3567                                        FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4);
3568       dpo_reset (&dpo_v4);
3569     }
3570   else
3571     {
3572       fib_table_entry_special_remove (0, &pfx, nat_fib_src_hi);
3573     }
3574 }
3575
3576 u8 *
3577 format_session_kvp (u8 * s, va_list * args)
3578 {
3579   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3580
3581   s = format (s, "%U session-index %llu", format_snat_key, v->key, v->value);
3582
3583   return s;
3584 }
3585
3586 u8 *
3587 format_static_mapping_kvp (u8 * s, va_list * args)
3588 {
3589   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3590
3591   s = format (s, "%U static-mapping-index %llu",
3592               format_snat_key, v->key, v->value);
3593
3594   return s;
3595 }
3596
3597 u8 *
3598 format_user_kvp (u8 * s, va_list * args)
3599 {
3600   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3601   snat_user_key_t k;
3602
3603   k.as_u64 = v->key;
3604
3605   s = format (s, "%U fib %d user-index %llu", format_ip4_address, &k.addr,
3606               k.fib_index, v->value);
3607
3608   return s;
3609 }
3610
3611 u8 *
3612 format_ed_session_kvp (u8 * s, va_list * args)
3613 {
3614   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
3615
3616   u8 proto;
3617   u16 r_port, l_port;
3618   ip4_address_t l_addr, r_addr;
3619   u32 fib_index;
3620
3621   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
3622   s =
3623     format (s,
3624             "local %U:%d remote %U:%d proto %U fib %d thread-index %u session-index %u",
3625             format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
3626             format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
3627             format_ip_protocol, proto, fib_index,
3628             ed_value_get_session_index (v), ed_value_get_thread_index (v));
3629
3630   return s;
3631 }
3632
3633 static u32
3634 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0,
3635                            u8 is_output)
3636 {
3637   snat_main_t *sm = &snat_main;
3638   u32 next_worker_index = 0;
3639   u32 hash;
3640
3641   next_worker_index = sm->first_worker_index;
3642   hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) +
3643     (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >> 24);
3644
3645   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
3646     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
3647   else
3648     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
3649
3650   return next_worker_index;
3651 }
3652
3653 static u32
3654 snat_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip0,
3655                            u32 rx_fib_index0, u8 is_output)
3656 {
3657   snat_main_t *sm = &snat_main;
3658   udp_header_t *udp;
3659   u16 port;
3660   clib_bihash_kv_8_8_t kv, value;
3661   snat_static_mapping_t *m;
3662   u32 proto;
3663   u32 next_worker_index = 0;
3664
3665   /* first try static mappings without port */
3666   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3667     {
3668       init_nat_k (&kv, ip0->dst_address, 0, rx_fib_index0, 0);
3669       if (!clib_bihash_search_8_8
3670           (&sm->static_mapping_by_external, &kv, &value))
3671         {
3672           m = pool_elt_at_index (sm->static_mappings, value.value);
3673           return m->workers[0];
3674         }
3675     }
3676
3677   proto = ip_proto_to_nat_proto (ip0->protocol);
3678   udp = ip4_next_header (ip0);
3679   port = udp->dst_port;
3680
3681   /* unknown protocol */
3682   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
3683     {
3684       /* use current thread */
3685       return vlib_get_thread_index ();
3686     }
3687
3688   if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_ICMP))
3689     {
3690       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3691       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3692       if (!icmp_type_is_error_message
3693           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3694         port = vnet_buffer (b)->ip.reass.l4_src_port;
3695       else
3696         {
3697           /* if error message, then it's not fragmented and we can access it */
3698           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3699           proto = ip_proto_to_nat_proto (inner_ip->protocol);
3700           void *l4_header = ip4_next_header (inner_ip);
3701           switch (proto)
3702             {
3703             case NAT_PROTOCOL_ICMP:
3704               icmp = (icmp46_header_t *) l4_header;
3705               echo = (icmp_echo_header_t *) (icmp + 1);
3706               port = echo->identifier;
3707               break;
3708             case NAT_PROTOCOL_UDP:
3709             case NAT_PROTOCOL_TCP:
3710               port = ((tcp_udp_header_t *) l4_header)->src_port;
3711               break;
3712             default:
3713               return vlib_get_thread_index ();
3714             }
3715         }
3716     }
3717
3718   /* try static mappings with port */
3719   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3720     {
3721       init_nat_k (&kv, ip0->dst_address, port, rx_fib_index0, proto);
3722       if (!clib_bihash_search_8_8
3723           (&sm->static_mapping_by_external, &kv, &value))
3724         {
3725           m = pool_elt_at_index (sm->static_mappings, value.value);
3726           return m->workers[0];
3727         }
3728     }
3729
3730   /* worker by outside port */
3731   next_worker_index = sm->first_worker_index;
3732   next_worker_index +=
3733     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3734   return next_worker_index;
3735 }
3736
3737 static u32
3738 nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index,
3739                                u8 is_output)
3740 {
3741   snat_main_t *sm = &snat_main;
3742   u32 next_worker_index = sm->first_worker_index;
3743   u32 hash;
3744
3745   clib_bihash_kv_16_8_t kv16, value16;
3746   snat_main_per_thread_data_t *tsm;
3747   udp_header_t *udp;
3748
3749   if (PREDICT_FALSE (is_output))
3750     {
3751       u32 fib_index = sm->outside_fib_index;
3752       nat_outside_fib_t *outside_fib;
3753       fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3754       fib_prefix_t pfx = {
3755         .fp_proto = FIB_PROTOCOL_IP4,
3756         .fp_len = 32,
3757         .fp_addr = {
3758                     .ip4.as_u32 = ip->dst_address.as_u32,
3759                     }
3760         ,
3761       };
3762
3763       udp = ip4_next_header (ip);
3764
3765       switch (vec_len (sm->outside_fibs))
3766         {
3767         case 0:
3768           fib_index = sm->outside_fib_index;
3769           break;
3770         case 1:
3771           fib_index = sm->outside_fibs[0].fib_index;
3772           break;
3773         default:
3774             /* *INDENT-OFF* */
3775             vec_foreach (outside_fib, sm->outside_fibs)
3776               {
3777                 fei = fib_table_lookup (outside_fib->fib_index, &pfx);
3778                 if (FIB_NODE_INDEX_INVALID != fei)
3779                   {
3780                     if (fib_entry_get_resolving_interface (fei) != ~0)
3781                       {
3782                         fib_index = outside_fib->fib_index;
3783                         break;
3784                       }
3785                   }
3786               }
3787             /* *INDENT-ON* */
3788           break;
3789         }
3790
3791       init_ed_k (&kv16, ip->src_address, udp->src_port, ip->dst_address,
3792                  udp->dst_port, fib_index, ip->protocol);
3793
3794       if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3795                                                   &kv16, &value16)))
3796         {
3797           tsm =
3798             vec_elt_at_index (sm->per_thread_data,
3799                               ed_value_get_thread_index (&value16));
3800           next_worker_index += tsm->thread_index;
3801
3802           nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE (session)",
3803                                   next_worker_index, fib_index,
3804                                   clib_net_to_host_u32 (ip->
3805                                                         src_address.as_u32),
3806                                   clib_net_to_host_u32 (ip->
3807                                                         dst_address.as_u32));
3808
3809           return next_worker_index;
3810         }
3811     }
3812
3813   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3814     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3815
3816   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
3817     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
3818   else
3819     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
3820
3821   if (PREDICT_TRUE (!is_output))
3822     {
3823       nat_elog_debug_handoff ("HANDOFF IN2OUT",
3824                               next_worker_index, rx_fib_index,
3825                               clib_net_to_host_u32 (ip->src_address.as_u32),
3826                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3827     }
3828   else
3829     {
3830       nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE",
3831                               next_worker_index, rx_fib_index,
3832                               clib_net_to_host_u32 (ip->src_address.as_u32),
3833                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3834     }
3835
3836   return next_worker_index;
3837 }
3838
3839 static u32
3840 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
3841                                u32 rx_fib_index, u8 is_output)
3842 {
3843   snat_main_t *sm = &snat_main;
3844   clib_bihash_kv_8_8_t kv, value;
3845   clib_bihash_kv_16_8_t kv16, value16;
3846   snat_main_per_thread_data_t *tsm;
3847
3848   u32 proto, next_worker_index = 0;
3849   udp_header_t *udp;
3850   u16 port;
3851   snat_static_mapping_t *m;
3852   u32 hash;
3853
3854   proto = ip_proto_to_nat_proto (ip->protocol);
3855
3856   if (PREDICT_TRUE (proto == NAT_PROTOCOL_UDP || proto == NAT_PROTOCOL_TCP))
3857     {
3858       udp = ip4_next_header (ip);
3859
3860       init_ed_k (&kv16, ip->dst_address, udp->dst_port, ip->src_address,
3861                  udp->src_port, rx_fib_index, ip->protocol);
3862
3863       if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3864                                                   &kv16, &value16)))
3865         {
3866           tsm =
3867             vec_elt_at_index (sm->per_thread_data,
3868                               ed_value_get_thread_index (&value16));
3869           vnet_buffer2 (b)->nat.ed_out2in_nat_session_index =
3870             ed_value_get_session_index (&value16);
3871           next_worker_index = sm->first_worker_index + tsm->thread_index;
3872           nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
3873                                   next_worker_index, rx_fib_index,
3874                                   clib_net_to_host_u32 (ip->
3875                                                         src_address.as_u32),
3876                                   clib_net_to_host_u32 (ip->
3877                                                         dst_address.as_u32));
3878           return next_worker_index;
3879         }
3880     }
3881   else if (proto == NAT_PROTOCOL_ICMP)
3882     {
3883       if (!get_icmp_o2i_ed_key (b, ip, rx_fib_index, ~0, ~0, 0, 0, 0, &kv16))
3884         {
3885           if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3886                                                       &kv16, &value16)))
3887             {
3888               tsm =
3889                 vec_elt_at_index (sm->per_thread_data,
3890                                   ed_value_get_thread_index (&value16));
3891               next_worker_index = sm->first_worker_index + tsm->thread_index;
3892               nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
3893                                       next_worker_index, rx_fib_index,
3894                                       clib_net_to_host_u32 (ip->
3895                                                             src_address.as_u32),
3896                                       clib_net_to_host_u32 (ip->
3897                                                             dst_address.as_u32));
3898               return next_worker_index;
3899             }
3900         }
3901     }
3902
3903   /* first try static mappings without port */
3904   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3905     {
3906       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
3907       if (!clib_bihash_search_8_8
3908           (&sm->static_mapping_by_external, &kv, &value))
3909         {
3910           m = pool_elt_at_index (sm->static_mappings, value.value);
3911           next_worker_index = m->workers[0];
3912           goto done;
3913         }
3914     }
3915
3916   /* unknown protocol */
3917   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
3918     {
3919       /* use current thread */
3920       next_worker_index = vlib_get_thread_index ();
3921       goto done;
3922     }
3923
3924   udp = ip4_next_header (ip);
3925   port = udp->dst_port;
3926
3927   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
3928     {
3929       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3930       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3931       if (!icmp_type_is_error_message
3932           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3933         port = vnet_buffer (b)->ip.reass.l4_src_port;
3934       else
3935         {
3936           /* if error message, then it's not fragmented and we can access it */
3937           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3938           proto = ip_proto_to_nat_proto (inner_ip->protocol);
3939           void *l4_header = ip4_next_header (inner_ip);
3940           switch (proto)
3941             {
3942             case NAT_PROTOCOL_ICMP:
3943               icmp = (icmp46_header_t *) l4_header;
3944               echo = (icmp_echo_header_t *) (icmp + 1);
3945               port = echo->identifier;
3946               break;
3947             case NAT_PROTOCOL_UDP:
3948             case NAT_PROTOCOL_TCP:
3949               port = ((tcp_udp_header_t *) l4_header)->src_port;
3950               break;
3951             default:
3952               next_worker_index = vlib_get_thread_index ();
3953               goto done;
3954             }
3955         }
3956     }
3957
3958   /* try static mappings with port */
3959   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3960     {
3961       init_nat_k (&kv, ip->dst_address, port, 0, proto);
3962       if (!clib_bihash_search_8_8
3963           (&sm->static_mapping_by_external, &kv, &value))
3964         {
3965           m = pool_elt_at_index (sm->static_mappings, value.value);
3966           if (!is_lb_static_mapping (m))
3967             {
3968               next_worker_index = m->workers[0];
3969               goto done;
3970             }
3971
3972           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3973             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3974
3975           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
3976             next_worker_index =
3977               m->workers[hash & (_vec_len (m->workers) - 1)];
3978           else
3979             next_worker_index = m->workers[hash % _vec_len (m->workers)];
3980           goto done;
3981         }
3982     }
3983
3984   /* worker by outside port */
3985   next_worker_index = sm->first_worker_index;
3986   next_worker_index +=
3987     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3988
3989 done:
3990   nat_elog_debug_handoff ("HANDOFF OUT2IN", next_worker_index, rx_fib_index,
3991                           clib_net_to_host_u32 (ip->src_address.as_u32),
3992                           clib_net_to_host_u32 (ip->dst_address.as_u32));
3993   return next_worker_index;
3994 }
3995
3996 void
3997 nat_ha_sadd_cb (ip4_address_t * in_addr, u16 in_port,
3998                 ip4_address_t * out_addr, u16 out_port,
3999                 ip4_address_t * eh_addr, u16 eh_port,
4000                 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
4001                 u32 fib_index, u16 flags, u32 thread_index)
4002 {
4003   snat_main_t *sm = &snat_main;
4004   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
4005   snat_user_t *u;
4006   snat_session_t *s;
4007   clib_bihash_kv_8_8_t kv;
4008   vlib_main_t *vm = vlib_get_main ();
4009   f64 now = vlib_time_now (vm);
4010   nat_outside_fib_t *outside_fib;
4011   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
4012   fib_prefix_t pfx = {
4013     .fp_proto = FIB_PROTOCOL_IP4,
4014     .fp_len = 32,
4015     .fp_addr = {
4016                 .ip4.as_u32 = eh_addr->as_u32,
4017                 },
4018   };
4019
4020   if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
4021     {
4022       if (nat_set_outside_address_and_port
4023           (sm->addresses, thread_index, *out_addr, out_port, proto))
4024         return;
4025     }
4026
4027   u = nat_user_get_or_create (sm, in_addr, fib_index, thread_index);
4028   if (!u)
4029     return;
4030
4031   s = nat_session_alloc_or_recycle (sm, u, thread_index, now);
4032   if (!s)
4033     return;
4034
4035   if (sm->endpoint_dependent)
4036     {
4037       nat_ed_lru_insert (tsm, s, now, nat_proto_to_ip_proto (proto));
4038     }
4039
4040   s->out2in.addr.as_u32 = out_addr->as_u32;
4041   s->out2in.port = out_port;
4042   s->nat_proto = proto;
4043   s->last_heard = now;
4044   s->flags = flags;
4045   s->ext_host_addr.as_u32 = eh_addr->as_u32;
4046   s->ext_host_port = eh_port;
4047   user_session_increment (sm, u, snat_is_session_static (s));
4048   switch (vec_len (sm->outside_fibs))
4049     {
4050     case 0:
4051       s->out2in.fib_index = sm->outside_fib_index;
4052       break;
4053     case 1:
4054       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
4055       break;
4056     default:
4057       /* *INDENT-OFF* */
4058       vec_foreach (outside_fib, sm->outside_fibs)
4059         {
4060           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
4061           if (FIB_NODE_INDEX_INVALID != fei)
4062             {
4063               if (fib_entry_get_resolving_interface (fei) != ~0)
4064                 {
4065                   s->out2in.fib_index = outside_fib->fib_index;
4066                   break;
4067                 }
4068             }
4069         }
4070       /* *INDENT-ON* */
4071       break;
4072     }
4073   init_nat_o2i_kv (&kv, s, s - tsm->sessions);
4074   if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 1))
4075     nat_elog_warn ("out2in key add failed");
4076
4077   s->in2out.addr.as_u32 = in_addr->as_u32;
4078   s->in2out.port = in_port;
4079   s->in2out.fib_index = fib_index;
4080   init_nat_i2o_kv (&kv, s, s - tsm->sessions);
4081   if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 1))
4082     nat_elog_warn ("in2out key add failed");
4083 }
4084
4085 void
4086 nat_ha_sdel_cb (ip4_address_t * out_addr, u16 out_port,
4087                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
4088                 u32 ti)
4089 {
4090   snat_main_t *sm = &snat_main;
4091   clib_bihash_kv_8_8_t kv, value;
4092   u32 thread_index;
4093   snat_session_t *s;
4094   snat_main_per_thread_data_t *tsm;
4095
4096   if (sm->num_workers > 1)
4097     thread_index =
4098       sm->first_worker_index +
4099       (sm->workers[(clib_net_to_host_u16 (out_port) -
4100                     1024) / sm->port_per_thread]);
4101   else
4102     thread_index = sm->num_workers;
4103   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
4104
4105   init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
4106   if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
4107     return;
4108
4109   s = pool_elt_at_index (tsm->sessions, value.value);
4110   nat_free_session_data (sm, s, thread_index, 1);
4111   nat44_delete_session (sm, s, thread_index);
4112 }
4113
4114 void
4115 nat_ha_sref_cb (ip4_address_t * out_addr, u16 out_port,
4116                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
4117                 u32 total_pkts, u64 total_bytes, u32 thread_index)
4118 {
4119   snat_main_t *sm = &snat_main;
4120   clib_bihash_kv_8_8_t kv, value;
4121   snat_session_t *s;
4122   snat_main_per_thread_data_t *tsm;
4123
4124   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
4125
4126   init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
4127   if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
4128     return;
4129
4130   s = pool_elt_at_index (tsm->sessions, value.value);
4131   s->total_pkts = total_pkts;
4132   s->total_bytes = total_bytes;
4133 }
4134
4135 void
4136 nat_ha_sadd_ed_cb (ip4_address_t * in_addr, u16 in_port,
4137                    ip4_address_t * out_addr, u16 out_port,
4138                    ip4_address_t * eh_addr, u16 eh_port,
4139                    ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
4140                    u32 fib_index, u16 flags, u32 thread_index)
4141 {
4142   snat_main_t *sm = &snat_main;
4143   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
4144   snat_session_t *s;
4145   clib_bihash_kv_16_8_t kv;
4146   vlib_main_t *vm = vlib_get_main ();
4147   f64 now = vlib_time_now (vm);
4148   nat_outside_fib_t *outside_fib;
4149   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
4150   fib_prefix_t pfx = {
4151     .fp_proto = FIB_PROTOCOL_IP4,
4152     .fp_len = 32,
4153     .fp_addr = {
4154                 .ip4.as_u32 = eh_addr->as_u32,
4155                 },
4156   };
4157
4158
4159   if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
4160     {
4161       if (nat_set_outside_address_and_port
4162           (sm->addresses, thread_index, *out_addr, out_port, proto))
4163         return;
4164     }
4165
4166   if (flags & SNAT_SESSION_FLAG_TWICE_NAT)
4167     {
4168       if (nat_set_outside_address_and_port
4169           (sm->addresses, thread_index, *ehn_addr, ehn_port, proto))
4170         return;
4171     }
4172
4173   s = nat_ed_session_alloc (sm, thread_index, now, proto);
4174   if (!s)
4175     return;
4176
4177   s->last_heard = now;
4178   s->flags = flags;
4179   s->ext_host_nat_addr.as_u32 = s->ext_host_addr.as_u32 = eh_addr->as_u32;
4180   s->ext_host_nat_port = s->ext_host_port = eh_port;
4181   if (is_twice_nat_session (s))
4182     {
4183       s->ext_host_nat_addr.as_u32 = ehn_addr->as_u32;
4184       s->ext_host_nat_port = ehn_port;
4185     }
4186   switch (vec_len (sm->outside_fibs))
4187     {
4188     case 0:
4189       s->out2in.fib_index = sm->outside_fib_index;
4190       break;
4191     case 1:
4192       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
4193       break;
4194     default:
4195       /* *INDENT-OFF* */
4196       vec_foreach (outside_fib, sm->outside_fibs)
4197         {
4198           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
4199           if (FIB_NODE_INDEX_INVALID != fei)
4200             {
4201               if (fib_entry_get_resolving_interface (fei) != ~0)
4202                 {
4203                   s->out2in.fib_index = outside_fib->fib_index;
4204                   break;
4205                 }
4206             }
4207         }
4208       /* *INDENT-ON* */
4209       break;
4210     }
4211   s->nat_proto = proto;
4212   s->out2in.addr.as_u32 = out_addr->as_u32;
4213   s->out2in.port = out_port;
4214
4215   s->in2out.addr.as_u32 = in_addr->as_u32;
4216   s->in2out.port = in_port;
4217   s->in2out.fib_index = fib_index;
4218
4219   init_ed_kv (&kv, *in_addr, in_port, s->ext_host_nat_addr,
4220               s->ext_host_nat_port, fib_index, nat_proto_to_ip_proto (proto),
4221               thread_index, s - tsm->sessions);
4222   if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
4223     nat_elog_warn ("in2out key add failed");
4224
4225   init_ed_kv (&kv, *out_addr, out_port, *eh_addr, eh_port,
4226               s->out2in.fib_index, nat_proto_to_ip_proto (proto),
4227               thread_index, s - tsm->sessions);
4228   if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &kv, 1))
4229     nat_elog_warn ("out2in key add failed");
4230 }
4231
4232 void
4233 nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
4234                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
4235                    u32 fib_index, u32 ti)
4236 {
4237   snat_main_t *sm = &snat_main;
4238   clib_bihash_kv_16_8_t kv, value;
4239   u32 thread_index;
4240   snat_session_t *s;
4241   snat_main_per_thread_data_t *tsm;
4242
4243   if (sm->num_workers > 1)
4244     thread_index =
4245       sm->first_worker_index +
4246       (sm->workers[(clib_net_to_host_u16 (out_port) -
4247                     1024) / sm->port_per_thread]);
4248   else
4249     thread_index = sm->num_workers;
4250   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
4251
4252   init_ed_k (&kv, *out_addr, out_port, *eh_addr, eh_port, fib_index, proto);
4253   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
4254     return;
4255
4256   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
4257   nat_free_session_data (sm, s, thread_index, 1);
4258   nat44_delete_session (sm, s, thread_index);
4259 }
4260
4261 void
4262 nat_ha_sref_ed_cb (ip4_address_t * out_addr, u16 out_port,
4263                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
4264                    u32 fib_index, u32 total_pkts, u64 total_bytes,
4265                    u32 thread_index)
4266 {
4267   snat_main_t *sm = &snat_main;
4268   clib_bihash_kv_16_8_t kv, value;
4269   snat_session_t *s;
4270   snat_main_per_thread_data_t *tsm;
4271
4272   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
4273
4274   init_ed_k (&kv, *out_addr, out_port, *eh_addr, eh_port, fib_index, proto);
4275   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
4276     return;
4277
4278   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
4279   s->total_pkts = total_pkts;
4280   s->total_bytes = total_bytes;
4281 }
4282
4283 static u32
4284 nat_calc_bihash_buckets (u32 n_elts)
4285 {
4286   return 1 << (max_log2 (n_elts >> 1) + 1);
4287 }
4288
4289 static u32
4290 nat_calc_bihash_memory (u32 n_buckets, uword kv_size)
4291 {
4292   return n_buckets * (8 + kv_size * 4);
4293 }
4294
4295 u32
4296 nat44_get_max_session_limit ()
4297 {
4298   snat_main_t *sm = &snat_main;
4299   u32 max_limit = 0, len = 0;
4300
4301   for (; len < vec_len (sm->max_translations_per_fib); len++)
4302     {
4303       if (max_limit < sm->max_translations_per_fib[len])
4304         max_limit = sm->max_translations_per_fib[len];
4305     }
4306   return max_limit;
4307 }
4308
4309 int
4310 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
4311 {
4312   snat_main_t *sm = &snat_main;
4313   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4314   u32 len = vec_len (sm->max_translations_per_fib);
4315
4316   if (len <= fib_index)
4317     {
4318       vec_validate (sm->max_translations_per_fib, fib_index + 1);
4319
4320       for (; len < vec_len (sm->max_translations_per_fib); len++)
4321         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
4322     }
4323
4324   sm->max_translations_per_fib[fib_index] = session_limit;
4325   return 0;
4326 }
4327
4328 int
4329 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
4330 {
4331   snat_main_t *sm = &snat_main;
4332
4333   if (nat44_set_session_limit (session_limit, vrf_id))
4334     return 1;
4335   sm->max_translations_per_thread = nat44_get_max_session_limit ();
4336
4337   sm->translation_buckets =
4338     nat_calc_bihash_buckets (sm->max_translations_per_thread);
4339
4340   if (!sm->translation_memory_size_set)
4341     {
4342       sm->translation_memory_size =
4343         nat_calc_bihash_memory (sm->translation_buckets,
4344                                 sizeof (clib_bihash_16_8_t));
4345     }
4346
4347   nat44_sessions_clear ();
4348   return 0;
4349 }
4350
4351 void
4352 nat44_db_init (snat_main_per_thread_data_t * tsm)
4353 {
4354   snat_main_t *sm = &snat_main;
4355
4356   pool_alloc (tsm->sessions, sm->max_translations_per_thread);
4357   pool_alloc (tsm->lru_pool, sm->max_translations_per_thread);
4358
4359   dlist_elt_t *head;
4360
4361   pool_get (tsm->lru_pool, head);
4362   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
4363   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
4364
4365   pool_get (tsm->lru_pool, head);
4366   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
4367   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
4368
4369   pool_get (tsm->lru_pool, head);
4370   tsm->udp_lru_head_index = head - tsm->lru_pool;
4371   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
4372
4373   pool_get (tsm->lru_pool, head);
4374   tsm->icmp_lru_head_index = head - tsm->lru_pool;
4375   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
4376
4377   pool_get (tsm->lru_pool, head);
4378   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
4379   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
4380
4381   if (sm->endpoint_dependent)
4382     {
4383       clib_bihash_init_16_8 (&tsm->in2out_ed, "in2out-ed",
4384                              sm->translation_buckets,
4385                              sm->translation_memory_size);
4386       clib_bihash_set_kvp_format_fn_16_8 (&tsm->in2out_ed,
4387                                           format_ed_session_kvp);
4388
4389     }
4390   else
4391     {
4392       clib_bihash_init_8_8 (&tsm->in2out, "in2out",
4393                             sm->translation_buckets,
4394                             sm->translation_memory_size);
4395       clib_bihash_set_kvp_format_fn_8_8 (&tsm->in2out, format_session_kvp);
4396       clib_bihash_init_8_8 (&tsm->out2in, "out2in",
4397                             sm->translation_buckets,
4398                             sm->translation_memory_size);
4399       clib_bihash_set_kvp_format_fn_8_8 (&tsm->out2in, format_session_kvp);
4400     }
4401
4402   // TODO: ED nat is not using these
4403   // before removal large refactor required
4404   pool_alloc (tsm->list_pool, sm->max_translations_per_thread);
4405   clib_bihash_init_8_8 (&tsm->user_hash, "users", sm->user_buckets,
4406                         sm->user_memory_size);
4407   clib_bihash_set_kvp_format_fn_8_8 (&tsm->user_hash, format_user_kvp);
4408 }
4409
4410 void
4411 nat44_db_free (snat_main_per_thread_data_t * tsm)
4412 {
4413   snat_main_t *sm = &snat_main;
4414
4415   pool_free (tsm->sessions);
4416   pool_free (tsm->lru_pool);
4417
4418   if (sm->endpoint_dependent)
4419     {
4420       clib_bihash_free_16_8 (&tsm->in2out_ed);
4421       vec_free (tsm->per_vrf_sessions_vec);
4422     }
4423   else
4424     {
4425       clib_bihash_free_8_8 (&tsm->in2out);
4426       clib_bihash_free_8_8 (&tsm->out2in);
4427     }
4428
4429   // TODO: resolve static mappings (put only to !ED)
4430   pool_free (tsm->users);
4431   pool_free (tsm->list_pool);
4432   clib_bihash_free_8_8 (&tsm->user_hash);
4433 }
4434
4435 void
4436 nat44_sessions_clear ()
4437 {
4438   snat_main_t *sm = &snat_main;
4439   snat_main_per_thread_data_t *tsm;
4440
4441   if (sm->endpoint_dependent)
4442     {
4443       clib_bihash_free_16_8 (&sm->out2in_ed);
4444       clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
4445                              clib_max (1, sm->num_workers) *
4446                              sm->translation_buckets,
4447                              clib_max (1, sm->num_workers) *
4448                              sm->translation_memory_size);
4449       clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed,
4450                                           format_ed_session_kvp);
4451     }
4452
4453   /* *INDENT-OFF* */
4454   vec_foreach (tsm, sm->per_thread_data)
4455     {
4456       u32 ti;
4457
4458       nat44_db_free (tsm);
4459       nat44_db_init (tsm);
4460
4461       ti = tsm->snat_thread_index;
4462       vlib_set_simple_counter (&sm->total_users, ti, 0, 0);
4463       vlib_set_simple_counter (&sm->total_sessions, ti, 0, 0);
4464     }
4465   /* *INDENT-ON* */
4466 }
4467
4468 static void
4469 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
4470                                  uword opaque,
4471                                  u32 sw_if_index,
4472                                  ip4_address_t * address,
4473                                  u32 address_length,
4474                                  u32 if_address_index, u32 is_delete)
4475 {
4476   snat_main_t *sm = &snat_main;
4477   snat_static_map_resolve_t *rp;
4478   snat_static_mapping_t *m;
4479   clib_bihash_kv_8_8_t kv, value;
4480   int i, rv;
4481   ip4_address_t l_addr;
4482
4483   if (!sm->enabled)
4484     return;
4485
4486   for (i = 0; i < vec_len (sm->to_resolve); i++)
4487     {
4488       rp = sm->to_resolve + i;
4489       if (rp->addr_only == 0)
4490         continue;
4491       if (rp->sw_if_index == sw_if_index)
4492         goto match;
4493     }
4494
4495   return;
4496
4497 match:
4498   init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
4499               sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
4500   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
4501     m = 0;
4502   else
4503     m = pool_elt_at_index (sm->static_mappings, value.value);
4504
4505   if (!is_delete)
4506     {
4507       /* Don't trip over lease renewal, static config */
4508       if (m)
4509         return;
4510     }
4511   else
4512     {
4513       if (!m)
4514         return;
4515     }
4516
4517   /* Indetity mapping? */
4518   if (rp->l_addr.as_u32 == 0)
4519     l_addr.as_u32 = address[0].as_u32;
4520   else
4521     l_addr.as_u32 = rp->l_addr.as_u32;
4522   /* Add the static mapping */
4523   rv = snat_add_static_mapping (l_addr,
4524                                 address[0],
4525                                 rp->l_port,
4526                                 rp->e_port,
4527                                 rp->vrf_id,
4528                                 rp->addr_only, ~0 /* sw_if_index */ ,
4529                                 rp->proto, !is_delete, rp->twice_nat,
4530                                 rp->out2in_only, rp->tag, rp->identity_nat,
4531                                 rp->pool_addr, rp->exact);
4532   if (rv)
4533     nat_elog_notice_X1 ("snat_add_static_mapping returned %d", "i4", rv);
4534 }
4535
4536 static void
4537 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
4538                                        uword opaque,
4539                                        u32 sw_if_index,
4540                                        ip4_address_t * address,
4541                                        u32 address_length,
4542                                        u32 if_address_index, u32 is_delete)
4543 {
4544   snat_main_t *sm = &snat_main;
4545   snat_static_map_resolve_t *rp;
4546   ip4_address_t l_addr;
4547   int i, j;
4548   int rv;
4549   u8 twice_nat = 0;
4550   snat_address_t *addresses = sm->addresses;
4551
4552   if (!sm->enabled)
4553     return;
4554
4555   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
4556     {
4557       if (sw_if_index == sm->auto_add_sw_if_indices[i])
4558         goto match;
4559     }
4560
4561   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
4562     {
4563       twice_nat = 1;
4564       addresses = sm->twice_nat_addresses;
4565       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
4566         goto match;
4567     }
4568
4569   return;
4570
4571 match:
4572   if (!is_delete)
4573     {
4574       /* Don't trip over lease renewal, static config */
4575       for (j = 0; j < vec_len (addresses); j++)
4576         if (addresses[j].addr.as_u32 == address->as_u32)
4577           return;
4578
4579       (void) snat_add_address (sm, address, ~0, twice_nat);
4580       /* Scan static map resolution vector */
4581       for (j = 0; j < vec_len (sm->to_resolve); j++)
4582         {
4583           rp = sm->to_resolve + j;
4584           if (rp->addr_only)
4585             continue;
4586           /* On this interface? */
4587           if (rp->sw_if_index == sw_if_index)
4588             {
4589               /* Indetity mapping? */
4590               if (rp->l_addr.as_u32 == 0)
4591                 l_addr.as_u32 = address[0].as_u32;
4592               else
4593                 l_addr.as_u32 = rp->l_addr.as_u32;
4594               /* Add the static mapping */
4595               rv = snat_add_static_mapping (l_addr,
4596                                             address[0],
4597                                             rp->l_port,
4598                                             rp->e_port,
4599                                             rp->vrf_id,
4600                                             rp->addr_only,
4601                                             ~0 /* sw_if_index */ ,
4602                                             rp->proto,
4603                                             rp->is_add, rp->twice_nat,
4604                                             rp->out2in_only, rp->tag,
4605                                             rp->identity_nat,
4606                                             rp->pool_addr, rp->exact);
4607               if (rv)
4608                 nat_elog_notice_X1 ("snat_add_static_mapping returned %d",
4609                                     "i4", rv);
4610             }
4611         }
4612       return;
4613     }
4614   else
4615     {
4616       (void) snat_del_address (sm, address[0], 1, twice_nat);
4617       return;
4618     }
4619 }
4620
4621
4622 int
4623 snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
4624                             u8 twice_nat)
4625 {
4626   ip4_main_t *ip4_main = sm->ip4_main;
4627   ip4_address_t *first_int_addr;
4628   snat_static_map_resolve_t *rp;
4629   u32 *indices_to_delete = 0;
4630   int i, j;
4631   u32 *auto_add_sw_if_indices =
4632     twice_nat ? sm->
4633     auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
4634
4635   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0        /* just want the address */
4636     );
4637
4638   for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
4639     {
4640       if (auto_add_sw_if_indices[i] == sw_if_index)
4641         {
4642           if (is_del)
4643             {
4644               /* if have address remove it */
4645               if (first_int_addr)
4646                 (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
4647               else
4648                 {
4649                   for (j = 0; j < vec_len (sm->to_resolve); j++)
4650                     {
4651                       rp = sm->to_resolve + j;
4652                       if (rp->sw_if_index == sw_if_index)
4653                         vec_add1 (indices_to_delete, j);
4654                     }
4655                   if (vec_len (indices_to_delete))
4656                     {
4657                       for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
4658                         vec_del1 (sm->to_resolve, j);
4659                       vec_free (indices_to_delete);
4660                     }
4661                 }
4662               if (twice_nat)
4663                 vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
4664               else
4665                 vec_del1 (sm->auto_add_sw_if_indices, i);
4666             }
4667           else
4668             return VNET_API_ERROR_VALUE_EXIST;
4669
4670           return 0;
4671         }
4672     }
4673
4674   if (is_del)
4675     return VNET_API_ERROR_NO_SUCH_ENTRY;
4676
4677   /* add to the auto-address list */
4678   if (twice_nat)
4679     vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
4680   else
4681     vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
4682
4683   /* If the address is already bound - or static - add it now */
4684   if (first_int_addr)
4685     (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
4686
4687   return 0;
4688 }
4689
4690 int
4691 nat44_del_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
4692                    nat_protocol_t proto, u32 vrf_id, int is_in)
4693 {
4694   snat_main_per_thread_data_t *tsm;
4695   clib_bihash_kv_8_8_t kv, value;
4696   ip4_header_t ip;
4697   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4698   snat_session_t *s;
4699   clib_bihash_8_8_t *t;
4700
4701   if (sm->endpoint_dependent)
4702     return VNET_API_ERROR_UNSUPPORTED;
4703
4704   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
4705   if (sm->num_workers > 1)
4706     tsm =
4707       vec_elt_at_index (sm->per_thread_data,
4708                         sm->worker_in2out_cb (&ip, fib_index, 0));
4709   else
4710     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
4711
4712   init_nat_k (&kv, *addr, port, fib_index, proto);
4713   t = is_in ? &tsm->in2out : &tsm->out2in;
4714   if (!clib_bihash_search_8_8 (t, &kv, &value))
4715     {
4716       if (pool_is_free_index (tsm->sessions, value.value))
4717         return VNET_API_ERROR_UNSPECIFIED;
4718
4719       s = pool_elt_at_index (tsm->sessions, value.value);
4720       nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
4721       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
4722       return 0;
4723     }
4724
4725   return VNET_API_ERROR_NO_SUCH_ENTRY;
4726 }
4727
4728 int
4729 nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
4730                       ip4_address_t * eh_addr, u16 eh_port, u8 proto,
4731                       u32 vrf_id, int is_in)
4732 {
4733   ip4_header_t ip;
4734   clib_bihash_16_8_t *t;
4735   clib_bihash_kv_16_8_t kv, value;
4736   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4737   snat_session_t *s;
4738   snat_main_per_thread_data_t *tsm;
4739
4740   if (!sm->endpoint_dependent)
4741     return VNET_API_ERROR_FEATURE_DISABLED;
4742
4743   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
4744   if (sm->num_workers > 1)
4745     tsm =
4746       vec_elt_at_index (sm->per_thread_data,
4747                         sm->worker_in2out_cb (&ip, fib_index, 0));
4748   else
4749     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
4750
4751   t = is_in ? &tsm->in2out_ed : &sm->out2in_ed;
4752   init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
4753   if (clib_bihash_search_16_8 (t, &kv, &value))
4754     {
4755       return VNET_API_ERROR_NO_SUCH_ENTRY;
4756     }
4757
4758   if (pool_is_free_index (tsm->sessions, value.value))
4759     return VNET_API_ERROR_UNSPECIFIED;
4760   s = pool_elt_at_index (tsm->sessions, value.value);
4761   nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
4762   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
4763   return 0;
4764 }
4765
4766 void
4767 nat_set_alloc_addr_and_port_mape (u16 psid, u16 psid_offset, u16 psid_length)
4768 {
4769   snat_main_t *sm = &snat_main;
4770
4771   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_MAPE;
4772   sm->alloc_addr_and_port = nat_alloc_addr_and_port_mape;
4773   sm->psid = psid;
4774   sm->psid_offset = psid_offset;
4775   sm->psid_length = psid_length;
4776 }
4777
4778 void
4779 nat_set_alloc_addr_and_port_range (u16 start_port, u16 end_port)
4780 {
4781   snat_main_t *sm = &snat_main;
4782
4783   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_RANGE;
4784   sm->alloc_addr_and_port = nat_alloc_addr_and_port_range;
4785   sm->start_port = start_port;
4786   sm->end_port = end_port;
4787 }
4788
4789 void
4790 nat_set_alloc_addr_and_port_default (void)
4791 {
4792   snat_main_t *sm = &snat_main;
4793
4794   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
4795   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
4796 }
4797
4798 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
4799                                  vlib_node_runtime_t * node,
4800                                  vlib_frame_t * frame)
4801 {
4802   return 0;
4803 }
4804
4805 /* *INDENT-OFF* */
4806 VLIB_REGISTER_NODE (nat_default_node) = {
4807   .name = "nat-default",
4808   .vector_size = sizeof (u32),
4809   .format_trace = 0,
4810   .type = VLIB_NODE_TYPE_INTERNAL,
4811   .n_errors = 0,
4812   .n_next_nodes = NAT_N_NEXT,
4813   .next_nodes = {
4814     [NAT_NEXT_DROP] = "error-drop",
4815     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4816     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
4817     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
4818     [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output",
4819     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
4820     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
4821     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
4822     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
4823     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
4824   },
4825 };
4826 /* *INDENT-ON* */
4827
4828 /*
4829  * fd.io coding-style-patch-verification: ON
4830  *
4831  * Local Variables:
4832  * eval: (c-set-style "gnu")
4833  * End:
4834  */