cb0e346c8fc8d054dba42c4f4c3bc26ce5127200
[vpp.git] / src / plugins / nat / nat.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/vnet.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/ip/ip4.h>
21 #include <vnet/plugin/plugin.h>
22 #include <nat/nat.h>
23 #include <nat/nat_dpo.h>
24 #include <nat/lib/ipfix_logging.h>
25 #include <nat/nat_inlines.h>
26 #include <nat/nat44/inlines.h>
27 #include <nat/nat_affinity.h>
28 #include <nat/nat_syslog.h>
29 #include <nat/nat_ha.h>
30 #include <vnet/fib/fib_table.h>
31 #include <vnet/fib/ip4_fib.h>
32 #include <vnet/ip/reass/ip4_sv_reass.h>
33 #include <vppinfra/bihash_16_8.h>
34 #include <nat/nat44/ed_inlines.h>
35 #include <vnet/ip/ip_table.h>
36
37 #include <vpp/app/version.h>
38
39 snat_main_t snat_main;
40
41 fib_source_t nat_fib_src_hi;
42 fib_source_t nat_fib_src_low;
43
44 /* *INDENT-OFF* */
45 /* Hook up input features */
46 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
47   .arc_name = "ip4-unicast",
48   .node_name = "nat-pre-in2out",
49   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
50                                "ip4-sv-reassembly-feature"),
51 };
52 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
53   .arc_name = "ip4-unicast",
54   .node_name = "nat-pre-out2in",
55   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
56                                "ip4-dhcp-client-detect",
57                                "ip4-sv-reassembly-feature"),
58 };
59 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
60   .arc_name = "ip4-unicast",
61   .node_name = "nat44-in2out-worker-handoff",
62   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
63 };
64 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
65   .arc_name = "ip4-unicast",
66   .node_name = "nat44-out2in-worker-handoff",
67   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
68                                "ip4-dhcp-client-detect"),
69 };
70 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
71   .arc_name = "ip4-unicast",
72   .node_name = "nat44-in2out",
73   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
74 };
75 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
76   .arc_name = "ip4-unicast",
77   .node_name = "nat44-out2in",
78   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
79                                "ip4-dhcp-client-detect"),
80 };
81 VNET_FEATURE_INIT (ip4_nat_classify, static) = {
82   .arc_name = "ip4-unicast",
83   .node_name = "nat44-classify",
84   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
85 };
86 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
87   .arc_name = "ip4-unicast",
88   .node_name = "nat44-ed-in2out",
89   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
90 };
91 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
92   .arc_name = "ip4-unicast",
93   .node_name = "nat44-ed-out2in",
94   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
95                                "ip4-dhcp-client-detect"),
96 };
97 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
98   .arc_name = "ip4-unicast",
99   .node_name = "nat44-ed-classify",
100   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
101 };
102 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
103   .arc_name = "ip4-unicast",
104   .node_name = "nat44-handoff-classify",
105   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
106 };
107 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
108   .arc_name = "ip4-unicast",
109   .node_name = "nat44-in2out-fast",
110   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
111 };
112 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
113   .arc_name = "ip4-unicast",
114   .node_name = "nat44-out2in-fast",
115   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
116                                "ip4-dhcp-client-detect"),
117 };
118 VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = {
119   .arc_name = "ip4-unicast",
120   .node_name = "nat44-hairpin-dst",
121   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
122 };
123 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_dst, static) = {
124   .arc_name = "ip4-unicast",
125   .node_name = "nat44-ed-hairpin-dst",
126   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
127 };
128
129 /* Hook up output features */
130 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
131   .arc_name = "ip4-output",
132   .node_name = "nat44-in2out-output",
133   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
134 };
135 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
136   .arc_name = "ip4-output",
137   .node_name = "nat44-in2out-output-worker-handoff",
138   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
139 };
140 VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = {
141   .arc_name = "ip4-output",
142   .node_name = "nat44-hairpin-src",
143   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
144 };
145 VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
146   .arc_name = "ip4-output",
147   .node_name = "nat-pre-in2out-output",
148   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
149   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
150 };
151 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
152   .arc_name = "ip4-output",
153   .node_name = "nat44-ed-in2out-output",
154   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
155   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
156 };
157 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_src, static) = {
158   .arc_name = "ip4-output",
159   .node_name = "nat44-ed-hairpin-src",
160   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
161   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
162 };
163
164 /* Hook up ip4-local features */
165 VNET_FEATURE_INIT (ip4_nat_hairpinning, static) =
166 {
167   .arc_name = "ip4-local",
168   .node_name = "nat44-hairpinning",
169   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
170 };
171 VNET_FEATURE_INIT (ip4_nat44_ed_hairpinning, static) =
172 {
173   .arc_name = "ip4-local",
174   .node_name = "nat44-ed-hairpinning",
175   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
176 };
177
178
179 VLIB_PLUGIN_REGISTER () = {
180     .version = VPP_BUILD_VER,
181     .description = "Network Address Translation (NAT)",
182 };
183 /* *INDENT-ON* */
184
185 static u32
186 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
187                                u32 rx_fib_index, u8 is_output);
188
189 static u32
190 nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index,
191                                u8 is_output);
192
193 static u32
194 snat_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip0,
195                            u32 rx_fib_index0, u8 is_output);
196
197 static u32
198 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0,
199                            u8 is_output);
200
201 static u32 nat_calc_bihash_buckets (u32 n_elts);
202
203 static u32 nat_calc_bihash_memory (u32 n_buckets, uword kv_size);
204
205 u8 *format_static_mapping_kvp (u8 * s, va_list * args);
206
207 u8 *format_ed_session_kvp (u8 * s, va_list * args);
208
209 void
210 nat_ha_sadd_cb (ip4_address_t * in_addr, u16 in_port,
211                 ip4_address_t * out_addr, u16 out_port,
212                 ip4_address_t * eh_addr, u16 eh_port,
213                 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
214                 u32 fib_index, u16 flags, u32 thread_index);
215
216 void
217 nat_ha_sdel_cb (ip4_address_t * out_addr, u16 out_port,
218                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
219                 u32 ti);
220
221 void
222 nat_ha_sref_cb (ip4_address_t * out_addr, u16 out_port,
223                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
224                 u32 total_pkts, u64 total_bytes, u32 thread_index);
225
226 void
227 nat_ha_sadd_ed_cb (ip4_address_t * in_addr, u16 in_port,
228                    ip4_address_t * out_addr, u16 out_port,
229                    ip4_address_t * eh_addr, u16 eh_port,
230                    ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
231                    u32 fib_index, u16 flags, u32 thread_index);
232
233 void
234 nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
235                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
236                    u32 fib_index, u32 ti);
237
238 void
239 nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
240                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
241                    u32 fib_index, u32 ti);
242
243 void
244 nat_ha_sref_ed_cb (ip4_address_t * out_addr, u16 out_port,
245                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
246                    u32 fib_index, u32 total_pkts, u64 total_bytes,
247                    u32 thread_index);
248
249 void
250 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
251                        u8 is_ha)
252 {
253   clib_bihash_kv_8_8_t kv;
254   u8 proto;
255   u16 r_port, l_port;
256   ip4_address_t *l_addr, *r_addr;
257   u32 fib_index = 0;
258   clib_bihash_kv_16_8_t ed_kv;
259   snat_main_per_thread_data_t *tsm =
260     vec_elt_at_index (sm->per_thread_data, thread_index);
261
262   if (is_ed_session (s))
263     {
264       per_vrf_sessions_unregister_session (s, thread_index);
265     }
266
267   if (is_fwd_bypass_session (s))
268     {
269       if (snat_is_unk_proto_session (s))
270         {
271           init_ed_k (&ed_kv, s->in2out.addr, 0, s->ext_host_addr, 0, 0,
272                      s->in2out.port);
273         }
274       else
275         {
276           l_port = s->in2out.port;
277           r_port = s->ext_host_port;
278           l_addr = &s->in2out.addr;
279           r_addr = &s->ext_host_addr;
280           proto = nat_proto_to_ip_proto (s->nat_proto);
281           fib_index = s->in2out.fib_index;
282           init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index,
283                      proto);
284         }
285       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
286         nat_elog_warn ("in2out_ed key del failed");
287       return;
288     }
289
290   /* session lookup tables */
291   if (is_ed_session (s))
292     {
293       if (is_affinity_sessions (s))
294         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
295                              s->nat_proto, s->out2in.port);
296       l_addr = &s->out2in.addr;
297       r_addr = &s->ext_host_addr;
298       fib_index = s->out2in.fib_index;
299       if (snat_is_unk_proto_session (s))
300         {
301           proto = s->in2out.port;
302           r_port = 0;
303           l_port = 0;
304         }
305       else
306         {
307           proto = nat_proto_to_ip_proto (s->nat_proto);
308           l_port = s->out2in.port;
309           r_port = s->ext_host_port;
310         }
311       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
312       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0))
313         nat_elog_warn ("out2in_ed key del failed");
314       l_addr = &s->in2out.addr;
315       fib_index = s->in2out.fib_index;
316       if (!snat_is_unk_proto_session (s))
317         l_port = s->in2out.port;
318       if (is_twice_nat_session (s))
319         {
320           r_addr = &s->ext_host_nat_addr;
321           r_port = s->ext_host_nat_port;
322         }
323       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
324       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
325         nat_elog_warn ("in2out_ed key del failed");
326
327       if (!is_ha)
328         nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
329                                &s->in2out.addr, s->in2out.port,
330                                &s->ext_host_nat_addr, s->ext_host_nat_port,
331                                &s->out2in.addr, s->out2in.port,
332                                &s->ext_host_addr, s->ext_host_port,
333                                s->nat_proto, is_twice_nat_session (s));
334     }
335   else
336     {
337       init_nat_i2o_k (&kv, s);
338       if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0))
339         nat_elog_warn ("in2out key del failed");
340       init_nat_o2i_k (&kv, s);
341       if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0))
342         nat_elog_warn ("out2in key del failed");
343
344       if (!is_ha)
345         nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index,
346                                  &s->in2out.addr, s->in2out.port,
347                                  &s->out2in.addr, s->out2in.port,
348                                  s->nat_proto);
349     }
350
351   if (snat_is_unk_proto_session (s))
352     return;
353
354   if (!is_ha)
355     {
356       /* log NAT event */
357       nat_ipfix_logging_nat44_ses_delete (thread_index,
358                                           s->in2out.addr.as_u32,
359                                           s->out2in.addr.as_u32,
360                                           s->nat_proto,
361                                           s->in2out.port,
362                                           s->out2in.port,
363                                           s->in2out.fib_index);
364
365       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
366                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
367                    thread_index);
368     }
369
370   /* Twice NAT address and port for external host */
371   if (is_twice_nat_session (s))
372     {
373       snat_free_outside_address_and_port (sm->twice_nat_addresses,
374                                           thread_index,
375                                           &s->ext_host_nat_addr,
376                                           s->ext_host_nat_port, s->nat_proto);
377     }
378
379   if (snat_is_session_static (s))
380     return;
381
382   snat_free_outside_address_and_port (sm->addresses, thread_index,
383                                       &s->out2in.addr, s->out2in.port,
384                                       s->nat_proto);
385 }
386
387 void
388 nat44_free_session_data (snat_main_t * sm, snat_session_t * s,
389                          u32 thread_index, u8 is_ha)
390 {
391   u8 proto;
392   u16 r_port, l_port;
393   ip4_address_t *l_addr, *r_addr;
394   u32 fib_index;
395   clib_bihash_kv_16_8_t ed_kv;
396   snat_main_per_thread_data_t *tsm =
397     vec_elt_at_index (sm->per_thread_data, thread_index);
398
399   if (is_fwd_bypass_session (s))
400     {
401       if (snat_is_unk_proto_session (s))
402         {
403           proto = s->in2out.port;
404           r_port = 0;
405           l_port = 0;
406         }
407       else
408         {
409           proto = nat_proto_to_ip_proto (s->nat_proto);
410           l_port = s->in2out.port;
411           r_port = s->ext_host_port;
412         }
413
414       l_addr = &s->in2out.addr;
415       r_addr = &s->ext_host_addr;
416       fib_index = 0;
417       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
418
419       if (PREDICT_FALSE
420           (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)))
421         nat_elog_warn ("in2out_ed key del failed");
422       return;
423     }
424
425   /* session lookup tables */
426   if (is_affinity_sessions (s))
427     nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
428                          s->nat_proto, s->out2in.port);
429   l_addr = &s->out2in.addr;
430   r_addr = &s->ext_host_addr;
431   fib_index = s->out2in.fib_index;
432   if (snat_is_unk_proto_session (s))
433     {
434       proto = s->in2out.port;
435       r_port = 0;
436       l_port = 0;
437     }
438   else
439     {
440       proto = nat_proto_to_ip_proto (s->nat_proto);
441       l_port = s->out2in.port;
442       r_port = s->ext_host_port;
443     }
444   init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
445
446   if (PREDICT_FALSE (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0)))
447     nat_elog_warn ("out2in_ed key del failed");
448
449   l_addr = &s->in2out.addr;
450   fib_index = s->in2out.fib_index;
451
452   if (!snat_is_unk_proto_session (s))
453     l_port = s->in2out.port;
454
455   if (is_twice_nat_session (s))
456     {
457       r_addr = &s->ext_host_nat_addr;
458       r_port = s->ext_host_nat_port;
459     }
460   init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
461
462   if (PREDICT_FALSE (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)))
463     nat_elog_warn ("in2out_ed key del failed");
464
465   if (!is_ha)
466     {
467       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
468                              &s->in2out.addr, s->in2out.port,
469                              &s->ext_host_nat_addr, s->ext_host_nat_port,
470                              &s->out2in.addr, s->out2in.port,
471                              &s->ext_host_addr, s->ext_host_port,
472                              s->nat_proto, is_twice_nat_session (s));
473     }
474
475   if (snat_is_unk_proto_session (s))
476     return;
477
478   if (!is_ha)
479     {
480       nat_ipfix_logging_nat44_ses_delete (thread_index,
481                                           s->in2out.addr.as_u32,
482                                           s->out2in.addr.as_u32,
483                                           s->nat_proto,
484                                           s->in2out.port,
485                                           s->out2in.port,
486                                           s->in2out.fib_index);
487       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
488                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
489                    thread_index);
490     }
491
492   /* Twice NAT address and port for external host */
493   if (is_twice_nat_session (s))
494     {
495       snat_free_outside_address_and_port (sm->twice_nat_addresses,
496                                           thread_index,
497                                           &s->ext_host_nat_addr,
498                                           s->ext_host_nat_port, s->nat_proto);
499     }
500
501   if (snat_is_session_static (s))
502     return;
503
504   snat_free_outside_address_and_port (sm->addresses, thread_index,
505                                       &s->out2in.addr, s->out2in.port,
506                                       s->nat_proto);
507 }
508
509
510 snat_user_t *
511 nat_user_get_or_create (snat_main_t * sm, ip4_address_t * addr, u32 fib_index,
512                         u32 thread_index)
513 {
514   snat_user_t *u = 0;
515   snat_user_key_t user_key;
516   clib_bihash_kv_8_8_t kv, value;
517   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
518   dlist_elt_t *per_user_list_head_elt;
519
520   user_key.addr.as_u32 = addr->as_u32;
521   user_key.fib_index = fib_index;
522   kv.key = user_key.as_u64;
523
524   /* Ever heard of the "user" = src ip4 address before? */
525   if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
526     {
527       if (pool_elts (tsm->users) >= sm->max_users_per_thread)
528         {
529           vlib_increment_simple_counter (&sm->user_limit_reached,
530                                          thread_index, 0, 1);
531           nat_elog_warn ("maximum user limit reached");
532           return NULL;
533         }
534       /* no, make a new one */
535       pool_get (tsm->users, u);
536       clib_memset (u, 0, sizeof (*u));
537
538       u->addr.as_u32 = addr->as_u32;
539       u->fib_index = fib_index;
540
541       pool_get (tsm->list_pool, per_user_list_head_elt);
542
543       u->sessions_per_user_list_head_index = per_user_list_head_elt -
544         tsm->list_pool;
545
546       clib_dlist_init (tsm->list_pool, u->sessions_per_user_list_head_index);
547
548       kv.value = u - tsm->users;
549
550       /* add user */
551       if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
552         {
553           nat_elog_warn ("user_hash key add failed");
554           nat44_delete_user_with_no_session (sm, u, thread_index);
555           return NULL;
556         }
557
558       vlib_set_simple_counter (&sm->total_users, thread_index, 0,
559                                pool_elts (tsm->users));
560     }
561   else
562     {
563       u = pool_elt_at_index (tsm->users, value.value);
564     }
565
566   return u;
567 }
568
569 snat_session_t *
570 nat_session_alloc_or_recycle (snat_main_t * sm, snat_user_t * u,
571                               u32 thread_index, f64 now)
572 {
573   snat_session_t *s;
574   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
575   u32 oldest_per_user_translation_list_index, session_index;
576   dlist_elt_t *oldest_per_user_translation_list_elt;
577   dlist_elt_t *per_user_translation_list_elt;
578
579   /* Over quota? Recycle the least recently used translation */
580   if ((u->nsessions + u->nstaticsessions) >= sm->max_translations_per_user)
581     {
582       oldest_per_user_translation_list_index =
583         clib_dlist_remove_head (tsm->list_pool,
584                                 u->sessions_per_user_list_head_index);
585
586       ASSERT (oldest_per_user_translation_list_index != ~0);
587
588       /* Add it back to the end of the LRU list */
589       clib_dlist_addtail (tsm->list_pool,
590                           u->sessions_per_user_list_head_index,
591                           oldest_per_user_translation_list_index);
592       /* Get the list element */
593       oldest_per_user_translation_list_elt =
594         pool_elt_at_index (tsm->list_pool,
595                            oldest_per_user_translation_list_index);
596
597       /* Get the session index from the list element */
598       session_index = oldest_per_user_translation_list_elt->value;
599
600       /* Get the session */
601       s = pool_elt_at_index (tsm->sessions, session_index);
602       nat_free_session_data (sm, s, thread_index, 0);
603       if (snat_is_session_static (s))
604         u->nstaticsessions--;
605       else
606         u->nsessions--;
607       s->flags = 0;
608       s->total_bytes = 0;
609       s->total_pkts = 0;
610       s->state = 0;
611       s->ext_host_addr.as_u32 = 0;
612       s->ext_host_port = 0;
613       s->ext_host_nat_addr.as_u32 = 0;
614       s->ext_host_nat_port = 0;
615     }
616   else
617     {
618       pool_get (tsm->sessions, s);
619       clib_memset (s, 0, sizeof (*s));
620
621       /* Create list elts */
622       pool_get (tsm->list_pool, per_user_translation_list_elt);
623       clib_dlist_init (tsm->list_pool,
624                        per_user_translation_list_elt - tsm->list_pool);
625
626       per_user_translation_list_elt->value = s - tsm->sessions;
627       s->per_user_index = per_user_translation_list_elt - tsm->list_pool;
628       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
629
630       clib_dlist_addtail (tsm->list_pool,
631                           s->per_user_list_head_index,
632                           per_user_translation_list_elt - tsm->list_pool);
633
634       s->user_index = u - tsm->users;
635       vlib_set_simple_counter (&sm->total_sessions, thread_index, 0,
636                                pool_elts (tsm->sessions));
637     }
638
639   s->ha_last_refreshed = now;
640
641   return s;
642 }
643
644 void
645 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
646                           int is_add)
647 {
648   fib_prefix_t prefix = {
649     .fp_len = p_len,
650     .fp_proto = FIB_PROTOCOL_IP4,
651     .fp_addr = {
652                 .ip4.as_u32 = addr->as_u32,
653                 },
654   };
655   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
656
657   if (is_add)
658     fib_table_entry_update_one_path (fib_index,
659                                      &prefix,
660                                      nat_fib_src_low,
661                                      (FIB_ENTRY_FLAG_CONNECTED |
662                                       FIB_ENTRY_FLAG_LOCAL |
663                                       FIB_ENTRY_FLAG_EXCLUSIVE),
664                                      DPO_PROTO_IP4,
665                                      NULL,
666                                      sw_if_index,
667                                      ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
668   else
669     fib_table_entry_delete (fib_index, &prefix, nat_fib_src_low);
670 }
671
672 int
673 snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
674                   u8 twice_nat)
675 {
676   snat_address_t *ap;
677   snat_interface_t *i;
678   vlib_thread_main_t *tm = vlib_get_thread_main ();
679
680   if (twice_nat && !sm->endpoint_dependent)
681     {
682       nat_log_err ("unsupported");
683       return VNET_API_ERROR_UNSUPPORTED;
684     }
685
686   /* Check if address already exists */
687   /* *INDENT-OFF* */
688   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
689     {
690       if (ap->addr.as_u32 == addr->as_u32)
691         {
692           nat_log_err ("address exist");
693           return VNET_API_ERROR_VALUE_EXIST;
694         }
695     }
696   /* *INDENT-ON* */
697
698   if (twice_nat)
699     vec_add2 (sm->twice_nat_addresses, ap, 1);
700   else
701     vec_add2 (sm->addresses, ap, 1);
702
703   ap->addr = *addr;
704   if (vrf_id != ~0)
705     ap->fib_index =
706       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
707                                          nat_fib_src_low);
708   else
709     ap->fib_index = ~0;
710
711   /* *INDENT-OFF* */
712   #define _(N, i, n, s) \
713     clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
714     ap->busy_##n##_ports = 0; \
715     ap->busy_##n##_ports_per_thread = 0;\
716     vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
717     foreach_nat_protocol
718   #undef _
719   /* *INDENT-ON* */
720
721   if (twice_nat)
722     return 0;
723
724   /* Add external address to FIB */
725   /* *INDENT-OFF* */
726   pool_foreach (i, sm->interfaces,
727   ({
728     if (nat_interface_is_inside(i) || sm->out2in_dpo)
729       continue;
730
731     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
732     break;
733   }));
734   pool_foreach (i, sm->output_feature_interfaces,
735   ({
736     if (nat_interface_is_inside(i) || sm->out2in_dpo)
737       continue;
738
739     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
740     break;
741   }));
742   /* *INDENT-ON* */
743
744   return 0;
745 }
746
747 static int
748 is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
749 {
750   snat_static_mapping_t *m;
751   /* *INDENT-OFF* */
752   pool_foreach (m, sm->static_mappings,
753   ({
754       if (is_addr_only_static_mapping (m) ||
755           is_out2in_only_static_mapping (m) ||
756           is_identity_static_mapping (m))
757         continue;
758       if (m->external_addr.as_u32 == addr.as_u32)
759         return 1;
760   }));
761   /* *INDENT-ON* */
762
763   return 0;
764 }
765
766 static void
767 snat_add_static_mapping_when_resolved (snat_main_t * sm,
768                                        ip4_address_t l_addr,
769                                        u16 l_port,
770                                        u32 sw_if_index,
771                                        u16 e_port,
772                                        u32 vrf_id,
773                                        nat_protocol_t proto,
774                                        int addr_only, int is_add, u8 * tag,
775                                        int twice_nat, int out2in_only,
776                                        int identity_nat,
777                                        ip4_address_t pool_addr, int exact)
778 {
779   snat_static_map_resolve_t *rp;
780
781   vec_add2 (sm->to_resolve, rp, 1);
782   rp->l_addr.as_u32 = l_addr.as_u32;
783   rp->l_port = l_port;
784   rp->sw_if_index = sw_if_index;
785   rp->e_port = e_port;
786   rp->vrf_id = vrf_id;
787   rp->proto = proto;
788   rp->addr_only = addr_only;
789   rp->is_add = is_add;
790   rp->twice_nat = twice_nat;
791   rp->out2in_only = out2in_only;
792   rp->identity_nat = identity_nat;
793   rp->tag = vec_dup (tag);
794   rp->pool_addr = pool_addr;
795   rp->exact = exact;
796 }
797
798 static u32
799 get_thread_idx_by_port (u16 e_port)
800 {
801   snat_main_t *sm = &snat_main;
802   u32 thread_idx = sm->num_workers;
803   if (sm->num_workers > 1)
804     {
805       thread_idx =
806         sm->first_worker_index +
807         sm->workers[(e_port - 1024) / sm->port_per_thread];
808     }
809   return thread_idx;
810 }
811
812 void
813 snat_static_mapping_del_sessions (snat_main_t * sm,
814                                   snat_main_per_thread_data_t * tsm,
815                                   snat_user_key_t u_key, int addr_only,
816                                   ip4_address_t e_addr, u16 e_port)
817 {
818   clib_bihash_kv_8_8_t kv, value;
819   kv.key = u_key.as_u64;
820   u64 user_index;
821   dlist_elt_t *head, *elt;
822   snat_user_t *u;
823   snat_session_t *s;
824   u32 elt_index, head_index, ses_index;
825   if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
826     {
827       user_index = value.value;
828       u = pool_elt_at_index (tsm->users, user_index);
829       if (u->nstaticsessions)
830         {
831           head_index = u->sessions_per_user_list_head_index;
832           head = pool_elt_at_index (tsm->list_pool, head_index);
833           elt_index = head->next;
834           elt = pool_elt_at_index (tsm->list_pool, elt_index);
835           ses_index = elt->value;
836           while (ses_index != ~0)
837             {
838               s = pool_elt_at_index (tsm->sessions, ses_index);
839               elt = pool_elt_at_index (tsm->list_pool, elt->next);
840               ses_index = elt->value;
841
842               if (!addr_only)
843                 {
844                   if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
845                       (s->out2in.port != e_port))
846                     continue;
847                 }
848
849               if (is_lb_session (s))
850                 continue;
851
852               if (!snat_is_session_static (s))
853                 continue;
854
855               nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
856               nat44_delete_session (sm, s, tsm - sm->per_thread_data);
857
858               if (!addr_only)
859                 break;
860             }
861         }
862     }
863 }
864
865 void
866 snat_ed_static_mapping_del_sessions (snat_main_t * sm,
867                                      snat_main_per_thread_data_t * tsm,
868                                      ip4_address_t l_addr,
869                                      u16 l_port,
870                                      u8 protocol,
871                                      u32 fib_index, int addr_only,
872                                      ip4_address_t e_addr, u16 e_port)
873 {
874   snat_session_t *s;
875   u32 *indexes_to_free = NULL;
876   /* *INDENT-OFF* */
877   pool_foreach (s, tsm->sessions, {
878     if (s->in2out.fib_index != fib_index ||
879         s->in2out.addr.as_u32 != l_addr.as_u32)
880       {
881         continue;
882       }
883     if (!addr_only)
884       {
885         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
886             s->out2in.port != e_port ||
887             s->in2out.port != l_port ||
888             s->nat_proto != protocol)
889           continue;
890       }
891
892     if (is_lb_session (s))
893       continue;
894     if (!snat_is_session_static (s))
895       continue;
896     nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
897     vec_add1 (indexes_to_free, s - tsm->sessions);
898     if (!addr_only)
899       break;
900   });
901   /* *INDENT-ON* */
902   u32 *ses_index;
903   vec_foreach (ses_index, indexes_to_free)
904   {
905     s = pool_elt_at_index (tsm->sessions, *ses_index);
906     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
907   }
908   vec_free (indexes_to_free);
909 }
910
911 int
912 snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
913                          u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
914                          u32 sw_if_index, nat_protocol_t proto, int is_add,
915                          twice_nat_type_t twice_nat, u8 out2in_only, u8 * tag,
916                          u8 identity_nat, ip4_address_t pool_addr, int exact)
917 {
918   snat_main_t *sm = &snat_main;
919   snat_static_mapping_t *m;
920   clib_bihash_kv_8_8_t kv, value;
921   snat_address_t *a = 0;
922   u32 fib_index = ~0;
923   snat_interface_t *interface;
924   int i;
925   snat_main_per_thread_data_t *tsm;
926   snat_user_key_t u_key;
927   snat_user_t *u;
928   dlist_elt_t *head, *elt;
929   u32 elt_index, head_index;
930   u32 ses_index;
931   u64 user_index;
932   snat_session_t *s;
933   snat_static_map_resolve_t *rp, *rp_match = 0;
934   nat44_lb_addr_port_t *local;
935   u32 find = ~0;
936
937   if (!sm->endpoint_dependent)
938     {
939       if (twice_nat || out2in_only)
940         return VNET_API_ERROR_FEATURE_DISABLED;
941     }
942
943   /* If the external address is a specific interface address */
944   if (sw_if_index != ~0)
945     {
946       ip4_address_t *first_int_addr;
947
948       for (i = 0; i < vec_len (sm->to_resolve); i++)
949         {
950           rp = sm->to_resolve + i;
951           if (rp->sw_if_index != sw_if_index ||
952               rp->l_addr.as_u32 != l_addr.as_u32 ||
953               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
954             continue;
955
956           if (!addr_only)
957             {
958               if ((rp->l_port != l_port && rp->e_port != e_port)
959                   || rp->proto != proto)
960                 continue;
961             }
962
963           rp_match = rp;
964           break;
965         }
966
967       /* Might be already set... */
968       first_int_addr = ip4_interface_first_address
969         (sm->ip4_main, sw_if_index, 0 /* just want the address */ );
970
971       if (is_add)
972         {
973           if (rp_match)
974             return VNET_API_ERROR_VALUE_EXIST;
975
976           snat_add_static_mapping_when_resolved
977             (sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto,
978              addr_only, is_add, tag, twice_nat, out2in_only,
979              identity_nat, pool_addr, exact);
980
981           /* DHCP resolution required? */
982           if (first_int_addr == 0)
983             {
984               return 0;
985             }
986           else
987             {
988               e_addr.as_u32 = first_int_addr->as_u32;
989               /* Identity mapping? */
990               if (l_addr.as_u32 == 0)
991                 l_addr.as_u32 = e_addr.as_u32;
992             }
993         }
994       else
995         {
996           if (!rp_match)
997             return VNET_API_ERROR_NO_SUCH_ENTRY;
998
999           vec_del1 (sm->to_resolve, i);
1000
1001           if (first_int_addr)
1002             {
1003               e_addr.as_u32 = first_int_addr->as_u32;
1004               /* Identity mapping? */
1005               if (l_addr.as_u32 == 0)
1006                 l_addr.as_u32 = e_addr.as_u32;
1007             }
1008           else
1009             return 0;
1010         }
1011     }
1012
1013   init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
1014   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1015     m = 0;
1016   else
1017     m = pool_elt_at_index (sm->static_mappings, value.value);
1018
1019   if (is_add)
1020     {
1021       if (m)
1022         {
1023           if (is_identity_static_mapping (m))
1024             {
1025               /* *INDENT-OFF* */
1026               pool_foreach (local, m->locals,
1027               ({
1028                 if (local->vrf_id == vrf_id)
1029                   return VNET_API_ERROR_VALUE_EXIST;
1030               }));
1031               /* *INDENT-ON* */
1032               pool_get (m->locals, local);
1033               local->vrf_id = vrf_id;
1034               local->fib_index =
1035                 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1036                                                    nat_fib_src_low);
1037               init_nat_kv (&kv, m->local_addr, m->local_port,
1038                            local->fib_index, m->proto,
1039                            m - sm->static_mappings);
1040               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1041               return 0;
1042             }
1043           else
1044             return VNET_API_ERROR_VALUE_EXIST;
1045         }
1046
1047       if (twice_nat && addr_only)
1048         return VNET_API_ERROR_UNSUPPORTED;
1049
1050       /* Convert VRF id to FIB index */
1051       if (vrf_id != ~0)
1052         fib_index =
1053           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1054                                              nat_fib_src_low);
1055       /* If not specified use inside VRF id from SNAT plugin startup config */
1056       else
1057         {
1058           fib_index = sm->inside_fib_index;
1059           vrf_id = sm->inside_vrf_id;
1060           fib_table_lock (fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1061         }
1062
1063       if (!(out2in_only || identity_nat))
1064         {
1065           init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
1066                       addr_only ? 0 : proto);
1067           if (!clib_bihash_search_8_8
1068               (&sm->static_mapping_by_local, &kv, &value))
1069             return VNET_API_ERROR_VALUE_EXIST;
1070         }
1071
1072       /* Find external address in allocated addresses and reserve port for
1073          address and port pair mapping when dynamic translations enabled */
1074       if (!(addr_only || sm->static_mapping_only || out2in_only))
1075         {
1076           for (i = 0; i < vec_len (sm->addresses); i++)
1077             {
1078               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1079                 {
1080                   a = sm->addresses + i;
1081                   /* External port must be unused */
1082                   switch (proto)
1083                     {
1084 #define _(N, j, n, s) \
1085                     case NAT_PROTOCOL_##N: \
1086                       if (a->busy_##n##_port_refcounts[e_port]) \
1087                         return VNET_API_ERROR_INVALID_VALUE; \
1088                       ++a->busy_##n##_port_refcounts[e_port]; \
1089                       if (e_port > 1024) \
1090                         { \
1091                           a->busy_##n##_ports++; \
1092                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1093                         } \
1094                       break;
1095                       foreach_nat_protocol
1096 #undef _
1097                     default:
1098                       nat_elog_info ("unknown protocol");
1099                       return VNET_API_ERROR_INVALID_VALUE_2;
1100                     }
1101                   break;
1102                 }
1103             }
1104           /* External address must be allocated */
1105           if (!a && (l_addr.as_u32 != e_addr.as_u32))
1106             {
1107               if (sw_if_index != ~0)
1108                 {
1109                   for (i = 0; i < vec_len (sm->to_resolve); i++)
1110                     {
1111                       rp = sm->to_resolve + i;
1112                       if (rp->addr_only)
1113                         continue;
1114                       if (rp->sw_if_index != sw_if_index &&
1115                           rp->l_addr.as_u32 != l_addr.as_u32 &&
1116                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
1117                           rp->e_port != e_port && rp->proto != proto)
1118                         continue;
1119
1120                       vec_del1 (sm->to_resolve, i);
1121                       break;
1122                     }
1123                 }
1124               return VNET_API_ERROR_NO_SUCH_ENTRY;
1125             }
1126         }
1127
1128       pool_get (sm->static_mappings, m);
1129       clib_memset (m, 0, sizeof (*m));
1130       m->tag = vec_dup (tag);
1131       m->local_addr = l_addr;
1132       m->external_addr = e_addr;
1133       m->twice_nat = twice_nat;
1134
1135       if (twice_nat == TWICE_NAT && exact)
1136         {
1137           m->flags |= NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS;
1138           m->pool_addr = pool_addr;
1139         }
1140
1141       if (out2in_only)
1142         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
1143       if (addr_only)
1144         m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY;
1145       if (identity_nat)
1146         {
1147           m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT;
1148           pool_get (m->locals, local);
1149           local->vrf_id = vrf_id;
1150           local->fib_index = fib_index;
1151         }
1152       else
1153         {
1154           m->vrf_id = vrf_id;
1155           m->fib_index = fib_index;
1156         }
1157       if (!addr_only)
1158         {
1159           m->local_port = l_port;
1160           m->external_port = e_port;
1161           m->proto = proto;
1162         }
1163
1164       if (sm->num_workers > 1)
1165         {
1166           ip4_header_t ip = {
1167             .src_address = m->local_addr,
1168           };
1169           vec_add1 (m->workers, sm->worker_in2out_cb (&ip, m->fib_index, 0));
1170           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1171         }
1172       else
1173         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1174
1175       init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto,
1176                    m - sm->static_mappings);
1177       if (!out2in_only)
1178         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1179
1180       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto,
1181                    m - sm->static_mappings);
1182       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
1183
1184       /* Delete dynamic sessions matching local address (+ local port) */
1185       if (!(sm->static_mapping_only))
1186         {
1187           u_key.addr = m->local_addr;
1188           u_key.fib_index = m->fib_index;
1189           kv.key = u_key.as_u64;
1190           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1191             {
1192               user_index = value.value;
1193               u = pool_elt_at_index (tsm->users, user_index);
1194               if (u->nsessions)
1195                 {
1196                   head_index = u->sessions_per_user_list_head_index;
1197                   head = pool_elt_at_index (tsm->list_pool, head_index);
1198                   elt_index = head->next;
1199                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1200                   ses_index = elt->value;
1201                   while (ses_index != ~0)
1202                     {
1203                       s = pool_elt_at_index (tsm->sessions, ses_index);
1204                       elt = pool_elt_at_index (tsm->list_pool, elt->next);
1205                       ses_index = elt->value;
1206
1207                       if (snat_is_session_static (s))
1208                         continue;
1209
1210                       if (!addr_only && s->in2out.port != m->local_port)
1211                         continue;
1212
1213                       nat_free_session_data (sm, s,
1214                                              tsm - sm->per_thread_data, 0);
1215                       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
1216
1217                       if (!addr_only && !sm->endpoint_dependent)
1218                         break;
1219                     }
1220                 }
1221             }
1222         }
1223     }
1224   else
1225     {
1226       if (!m)
1227         {
1228           if (sw_if_index != ~0)
1229             return 0;
1230           else
1231             return VNET_API_ERROR_NO_SUCH_ENTRY;
1232         }
1233
1234       if (identity_nat)
1235         {
1236           if (vrf_id == ~0)
1237             vrf_id = sm->inside_vrf_id;
1238
1239           /* *INDENT-OFF* */
1240           pool_foreach (local, m->locals,
1241           ({
1242             if (local->vrf_id == vrf_id)
1243               find = local - m->locals;
1244           }));
1245           /* *INDENT-ON* */
1246           if (find == ~0)
1247             return VNET_API_ERROR_NO_SUCH_ENTRY;
1248
1249           local = pool_elt_at_index (m->locals, find);
1250           fib_index = local->fib_index;
1251           pool_put (m->locals, local);
1252         }
1253       else
1254         fib_index = m->fib_index;
1255
1256       /* Free external address port */
1257       if (!(addr_only || sm->static_mapping_only || out2in_only))
1258         {
1259           for (i = 0; i < vec_len (sm->addresses); i++)
1260             {
1261               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1262                 {
1263                   a = sm->addresses + i;
1264                   switch (proto)
1265                     {
1266 #define _(N, j, n, s) \
1267                     case NAT_PROTOCOL_##N: \
1268                       --a->busy_##n##_port_refcounts[e_port]; \
1269                       if (e_port > 1024) \
1270                         { \
1271                           a->busy_##n##_ports--; \
1272                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1273                         } \
1274                       break;
1275                       foreach_nat_protocol
1276 #undef _
1277                     default:
1278                       nat_elog_info ("unknown protocol");
1279                       return VNET_API_ERROR_INVALID_VALUE_2;
1280                     }
1281                   break;
1282                 }
1283             }
1284         }
1285
1286       if (sm->num_workers > 1)
1287         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1288       else
1289         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1290
1291       init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
1292       if (!out2in_only)
1293         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
1294
1295       /* Delete session(s) for static mapping if exist */
1296       if (!(sm->static_mapping_only) ||
1297           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
1298         {
1299           if (sm->endpoint_dependent)
1300             {
1301               snat_ed_static_mapping_del_sessions (sm, tsm, m->local_addr,
1302                                                    m->local_port, m->proto,
1303                                                    fib_index, addr_only,
1304                                                    e_addr, e_port);
1305             }
1306           else
1307             {
1308               u_key.addr = m->local_addr;
1309               u_key.fib_index = fib_index;
1310               kv.key = u_key.as_u64;
1311               snat_static_mapping_del_sessions (sm, tsm, u_key, addr_only,
1312                                                 e_addr, e_port);
1313             }
1314         }
1315
1316       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1317       if (pool_elts (m->locals))
1318         return 0;
1319
1320       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1321       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
1322
1323       vec_free (m->tag);
1324       vec_free (m->workers);
1325       /* Delete static mapping from pool */
1326       pool_put (sm->static_mappings, m);
1327     }
1328
1329   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
1330     return 0;
1331
1332   /* Add/delete external address to FIB */
1333   /* *INDENT-OFF* */
1334   pool_foreach (interface, sm->interfaces,
1335   ({
1336     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1337       continue;
1338
1339     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1340     break;
1341   }));
1342   pool_foreach (interface, sm->output_feature_interfaces,
1343   ({
1344     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1345       continue;
1346
1347     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1348     break;
1349   }));
1350   /* *INDENT-ON* */
1351
1352   return 0;
1353 }
1354
1355 int
1356 nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
1357                                  nat_protocol_t proto,
1358                                  nat44_lb_addr_port_t * locals, u8 is_add,
1359                                  twice_nat_type_t twice_nat, u8 out2in_only,
1360                                  u8 * tag, u32 affinity)
1361 {
1362   snat_main_t *sm = &snat_main;
1363   snat_static_mapping_t *m;
1364   clib_bihash_kv_8_8_t kv, value;
1365   snat_address_t *a = 0;
1366   int i;
1367   nat44_lb_addr_port_t *local;
1368   snat_main_per_thread_data_t *tsm;
1369   snat_session_t *s;
1370   uword *bitmap = 0;
1371
1372   if (!sm->endpoint_dependent)
1373     return VNET_API_ERROR_FEATURE_DISABLED;
1374
1375   init_nat_k (&kv, e_addr, e_port, 0, proto);
1376   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1377     m = 0;
1378   else
1379     m = pool_elt_at_index (sm->static_mappings, value.value);
1380
1381   if (is_add)
1382     {
1383       if (m)
1384         return VNET_API_ERROR_VALUE_EXIST;
1385
1386       if (vec_len (locals) < 2)
1387         return VNET_API_ERROR_INVALID_VALUE;
1388
1389       /* Find external address in allocated addresses and reserve port for
1390          address and port pair mapping when dynamic translations enabled */
1391       if (!(sm->static_mapping_only || out2in_only))
1392         {
1393           for (i = 0; i < vec_len (sm->addresses); i++)
1394             {
1395               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1396                 {
1397                   a = sm->addresses + i;
1398                   /* External port must be unused */
1399                   switch (proto)
1400                     {
1401 #define _(N, j, n, s) \
1402                     case NAT_PROTOCOL_##N: \
1403                       if (a->busy_##n##_port_refcounts[e_port]) \
1404                         return VNET_API_ERROR_INVALID_VALUE; \
1405                       ++a->busy_##n##_port_refcounts[e_port]; \
1406                       if (e_port > 1024) \
1407                         { \
1408                           a->busy_##n##_ports++; \
1409                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1410                         } \
1411                       break;
1412                       foreach_nat_protocol
1413 #undef _
1414                     default:
1415                       nat_elog_info ("unknown protocol");
1416                       return VNET_API_ERROR_INVALID_VALUE_2;
1417                     }
1418                   break;
1419                 }
1420             }
1421           /* External address must be allocated */
1422           if (!a)
1423             return VNET_API_ERROR_NO_SUCH_ENTRY;
1424         }
1425
1426       pool_get (sm->static_mappings, m);
1427       clib_memset (m, 0, sizeof (*m));
1428       m->tag = vec_dup (tag);
1429       m->external_addr = e_addr;
1430       m->external_port = e_port;
1431       m->proto = proto;
1432       m->twice_nat = twice_nat;
1433       m->flags |= NAT_STATIC_MAPPING_FLAG_LB;
1434       if (out2in_only)
1435         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
1436       m->affinity = affinity;
1437
1438       if (affinity)
1439         m->affinity_per_service_list_head_index =
1440           nat_affinity_get_per_service_list_head_index ();
1441       else
1442         m->affinity_per_service_list_head_index = ~0;
1443
1444       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto,
1445                    m - sm->static_mappings);
1446       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
1447         {
1448           nat_elog_err ("static_mapping_by_external key add failed");
1449           return VNET_API_ERROR_UNSPECIFIED;
1450         }
1451
1452       for (i = 0; i < vec_len (locals); i++)
1453         {
1454           locals[i].fib_index =
1455             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
1456                                                locals[i].vrf_id,
1457                                                nat_fib_src_low);
1458           if (!out2in_only)
1459             {
1460               init_nat_kv (&kv, locals[i].addr, locals[i].port,
1461                            locals[i].fib_index, m->proto,
1462                            m - sm->static_mappings);
1463               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1464             }
1465           locals[i].prefix = (i == 0) ? locals[i].probability :
1466             (locals[i - 1].prefix + locals[i].probability);
1467           pool_get (m->locals, local);
1468           *local = locals[i];
1469           if (sm->num_workers > 1)
1470             {
1471               ip4_header_t ip = {
1472                 .src_address = locals[i].addr,
1473               };
1474               bitmap =
1475                 clib_bitmap_set (bitmap,
1476                                  sm->worker_in2out_cb (&ip, m->fib_index, 0),
1477                                  1);
1478             }
1479         }
1480
1481       /* Assign workers */
1482       if (sm->num_workers > 1)
1483         {
1484           /* *INDENT-OFF* */
1485           clib_bitmap_foreach (i, bitmap,
1486             ({
1487                vec_add1(m->workers, i);
1488             }));
1489           /* *INDENT-ON* */
1490         }
1491     }
1492   else
1493     {
1494       if (!m)
1495         return VNET_API_ERROR_NO_SUCH_ENTRY;
1496
1497       if (!is_lb_static_mapping (m))
1498         return VNET_API_ERROR_INVALID_VALUE;
1499
1500       /* Free external address port */
1501       if (!(sm->static_mapping_only || out2in_only))
1502         {
1503           for (i = 0; i < vec_len (sm->addresses); i++)
1504             {
1505               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1506                 {
1507                   a = sm->addresses + i;
1508                   switch (proto)
1509                     {
1510 #define _(N, j, n, s) \
1511                     case NAT_PROTOCOL_##N: \
1512                       --a->busy_##n##_port_refcounts[e_port]; \
1513                       if (e_port > 1024) \
1514                         { \
1515                           a->busy_##n##_ports--; \
1516                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1517                         } \
1518                       break;
1519                       foreach_nat_protocol
1520 #undef _
1521                     default:
1522                       nat_elog_info ("unknown protocol");
1523                       return VNET_API_ERROR_INVALID_VALUE_2;
1524                     }
1525                   break;
1526                 }
1527             }
1528         }
1529
1530       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1531       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
1532         {
1533           nat_elog_err ("static_mapping_by_external key del failed");
1534           return VNET_API_ERROR_UNSPECIFIED;
1535         }
1536
1537       /* *INDENT-OFF* */
1538       pool_foreach (local, m->locals,
1539       ({
1540           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1541                             nat_fib_src_low);
1542           if (!out2in_only)
1543             {
1544 init_nat_k(&              kv, local->addr, local->port, local->fib_index, m->proto);
1545               if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0))
1546                 {
1547                   nat_elog_err ("static_mapping_by_local key del failed");
1548                   return VNET_API_ERROR_UNSPECIFIED;
1549                 }
1550             }
1551
1552           if (sm->num_workers > 1)
1553             {
1554               ip4_header_t ip = {
1555                 .src_address = local->addr,
1556               };
1557               tsm = vec_elt_at_index (sm->per_thread_data,
1558                                       sm->worker_in2out_cb (&ip, m->fib_index, 0));
1559             }
1560           else
1561             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1562
1563           /* Delete sessions */
1564           pool_foreach (s, tsm->sessions, {
1565             if (!(is_lb_session (s)))
1566               continue;
1567
1568             if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1569                 s->in2out.port != local->port)
1570               continue;
1571
1572             nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1573             nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1574           });
1575       }));
1576       /* *INDENT-ON* */
1577       if (m->affinity)
1578         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1579       pool_free (m->locals);
1580       vec_free (m->tag);
1581       vec_free (m->workers);
1582
1583       pool_put (sm->static_mappings, m);
1584     }
1585
1586   return 0;
1587 }
1588
1589 int
1590 nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
1591                                        ip4_address_t l_addr, u16 l_port,
1592                                        nat_protocol_t proto, u32 vrf_id,
1593                                        u8 probability, u8 is_add)
1594 {
1595   snat_main_t *sm = &snat_main;
1596   snat_static_mapping_t *m = 0;
1597   clib_bihash_kv_8_8_t kv, value;
1598   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1599   snat_main_per_thread_data_t *tsm;
1600   snat_session_t *s;
1601   u32 *locals = 0;
1602   uword *bitmap = 0;
1603   int i;
1604
1605   if (!sm->endpoint_dependent)
1606     return VNET_API_ERROR_FEATURE_DISABLED;
1607
1608   init_nat_k (&kv, e_addr, e_port, 0, proto);
1609   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1610     m = pool_elt_at_index (sm->static_mappings, value.value);
1611
1612   if (!m)
1613     return VNET_API_ERROR_NO_SUCH_ENTRY;
1614
1615   if (!is_lb_static_mapping (m))
1616     return VNET_API_ERROR_INVALID_VALUE;
1617
1618   /* *INDENT-OFF* */
1619   pool_foreach (local, m->locals,
1620   ({
1621     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1622         (local->vrf_id == vrf_id))
1623       {
1624         match_local = local;
1625         break;
1626       }
1627   }));
1628   /* *INDENT-ON* */
1629
1630   if (is_add)
1631     {
1632       if (match_local)
1633         return VNET_API_ERROR_VALUE_EXIST;
1634
1635       pool_get (m->locals, local);
1636       clib_memset (local, 0, sizeof (*local));
1637       local->addr.as_u32 = l_addr.as_u32;
1638       local->port = l_port;
1639       local->probability = probability;
1640       local->vrf_id = vrf_id;
1641       local->fib_index =
1642         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1643                                            nat_fib_src_low);
1644
1645       if (!is_out2in_only_static_mapping (m))
1646         {
1647           init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto,
1648                        m - sm->static_mappings);
1649           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
1650             nat_elog_err ("static_mapping_by_local key add failed");
1651         }
1652     }
1653   else
1654     {
1655       if (!match_local)
1656         return VNET_API_ERROR_NO_SUCH_ENTRY;
1657
1658       if (pool_elts (m->locals) < 3)
1659         return VNET_API_ERROR_UNSPECIFIED;
1660
1661       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1662                         nat_fib_src_low);
1663
1664       if (!is_out2in_only_static_mapping (m))
1665         {
1666           init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
1667           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
1668             nat_elog_err ("static_mapping_by_local key del failed");
1669         }
1670
1671       if (sm->num_workers > 1)
1672         {
1673           ip4_header_t ip = {
1674             .src_address = local->addr,
1675           };
1676           tsm = vec_elt_at_index (sm->per_thread_data,
1677                                   sm->worker_in2out_cb (&ip, m->fib_index,
1678                                                         0));
1679         }
1680       else
1681         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1682
1683       /* Delete sessions */
1684       /* *INDENT-OFF* */
1685       pool_foreach (s, tsm->sessions, {
1686         if (!(is_lb_session (s)))
1687           continue;
1688
1689         if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1690             s->in2out.port != match_local->port)
1691           continue;
1692
1693         nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1694         nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1695       });
1696       /* *INDENT-ON* */
1697
1698       pool_put (m->locals, match_local);
1699     }
1700
1701   vec_free (m->workers);
1702
1703   /* *INDENT-OFF* */
1704   pool_foreach (local, m->locals,
1705   ({
1706     vec_add1 (locals, local - m->locals);
1707     if (sm->num_workers > 1)
1708       {
1709         ip4_header_t ip;
1710         ip.src_address.as_u32 = local->addr.as_u32,
1711         bitmap = clib_bitmap_set (bitmap,
1712                                   sm->worker_in2out_cb (&ip, local->fib_index, 0),
1713                                   1);
1714       }
1715   }));
1716   /* *INDENT-ON* */
1717
1718   ASSERT (vec_len (locals) > 1);
1719
1720   local = pool_elt_at_index (m->locals, locals[0]);
1721   local->prefix = local->probability;
1722   for (i = 1; i < vec_len (locals); i++)
1723     {
1724       local = pool_elt_at_index (m->locals, locals[i]);
1725       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1726       local->prefix = local->probability + prev_local->prefix;
1727     }
1728
1729   /* Assign workers */
1730   if (sm->num_workers > 1)
1731     {
1732       /* *INDENT-OFF* */
1733       clib_bitmap_foreach (i, bitmap, ({ vec_add1(m->workers, i); }));
1734       /* *INDENT-ON* */
1735     }
1736
1737   return 0;
1738 }
1739
1740 int
1741 snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
1742                   u8 twice_nat)
1743 {
1744   snat_address_t *a = 0;
1745   snat_session_t *ses;
1746   u32 *ses_to_be_removed = 0, *ses_index;
1747   snat_main_per_thread_data_t *tsm;
1748   snat_static_mapping_t *m;
1749   snat_interface_t *interface;
1750   int i;
1751   snat_address_t *addresses =
1752     twice_nat ? sm->twice_nat_addresses : sm->addresses;
1753
1754   /* Find SNAT address */
1755   for (i = 0; i < vec_len (addresses); i++)
1756     {
1757       if (addresses[i].addr.as_u32 == addr.as_u32)
1758         {
1759           a = addresses + i;
1760           break;
1761         }
1762     }
1763   if (!a)
1764     {
1765       nat_log_err ("no such address");
1766       return VNET_API_ERROR_NO_SUCH_ENTRY;
1767     }
1768
1769   if (delete_sm)
1770     {
1771       ip4_address_t pool_addr = { 0 };
1772       /* *INDENT-OFF* */
1773       pool_foreach (m, sm->static_mappings,
1774       ({
1775           if (m->external_addr.as_u32 == addr.as_u32)
1776             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1777                                             m->local_port, m->external_port,
1778                                             m->vrf_id,
1779                                             is_addr_only_static_mapping(m), ~0,
1780                                             m->proto, 0 /* is_add */,
1781                                             m->twice_nat,
1782                                             is_out2in_only_static_mapping(m),
1783                                             m->tag,
1784                                             is_identity_static_mapping(m),
1785                                             pool_addr, 0);
1786       }));
1787       /* *INDENT-ON* */
1788     }
1789   else
1790     {
1791       /* Check if address is used in some static mapping */
1792       if (is_snat_address_used_in_static_mapping (sm, addr))
1793         {
1794           nat_log_err ("address used in static mapping");
1795           return VNET_API_ERROR_UNSPECIFIED;
1796         }
1797     }
1798
1799   if (a->fib_index != ~0)
1800     fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1801
1802   /* Delete sessions using address */
1803   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1804     {
1805       /* *INDENT-OFF* */
1806       vec_foreach (tsm, sm->per_thread_data)
1807         {
1808           pool_foreach (ses, tsm->sessions, ({
1809             if (ses->out2in.addr.as_u32 == addr.as_u32)
1810               {
1811                 nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
1812                 vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1813               }
1814           }));
1815
1816           if (sm->endpoint_dependent){
1817               vec_foreach (ses_index, ses_to_be_removed)
1818                 {
1819                   ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1820                   nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
1821                 }
1822           }else{
1823               vec_foreach (ses_index, ses_to_be_removed)
1824                 {
1825                   ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1826                   nat44_delete_session (sm, ses, tsm - sm->per_thread_data);
1827                 }
1828           }
1829
1830           vec_free (ses_to_be_removed);
1831         }
1832       /* *INDENT-ON* */
1833     }
1834
1835 #define _(N, i, n, s) \
1836   vec_free (a->busy_##n##_ports_per_thread);
1837   foreach_nat_protocol
1838 #undef _
1839     if (twice_nat)
1840     {
1841       vec_del1 (sm->twice_nat_addresses, i);
1842       return 0;
1843     }
1844   else
1845     vec_del1 (sm->addresses, i);
1846
1847   /* Delete external address from FIB */
1848   /* *INDENT-OFF* */
1849   pool_foreach (interface, sm->interfaces,
1850   ({
1851     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1852       continue;
1853
1854     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1855     break;
1856   }));
1857   pool_foreach (interface, sm->output_feature_interfaces,
1858   ({
1859     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1860       continue;
1861
1862     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1863     break;
1864   }));
1865   /* *INDENT-ON* */
1866
1867   return 0;
1868 }
1869
1870 static void
1871 nat_validate_counters (snat_main_t * sm, u32 sw_if_index)
1872 {
1873 #define _(x)                                                                  \
1874   vlib_validate_simple_counter (&sm->counters.fastpath.in2out.x,              \
1875                                 sw_if_index);                                 \
1876   vlib_zero_simple_counter (&sm->counters.fastpath.in2out.x, sw_if_index);    \
1877   vlib_validate_simple_counter (&sm->counters.fastpath.out2in.x,              \
1878                                 sw_if_index);                                 \
1879   vlib_zero_simple_counter (&sm->counters.fastpath.out2in.x, sw_if_index);    \
1880   vlib_validate_simple_counter (&sm->counters.slowpath.in2out.x,              \
1881                                 sw_if_index);                                 \
1882   vlib_zero_simple_counter (&sm->counters.slowpath.in2out.x, sw_if_index);    \
1883   vlib_validate_simple_counter (&sm->counters.slowpath.out2in.x,              \
1884                                 sw_if_index);                                 \
1885   vlib_zero_simple_counter (&sm->counters.slowpath.out2in.x, sw_if_index);    \
1886   vlib_validate_simple_counter (&sm->counters.fastpath.in2out_ed.x,           \
1887                                 sw_if_index);                                 \
1888   vlib_zero_simple_counter (&sm->counters.fastpath.in2out_ed.x, sw_if_index); \
1889   vlib_validate_simple_counter (&sm->counters.fastpath.out2in_ed.x,           \
1890                                 sw_if_index);                                 \
1891   vlib_zero_simple_counter (&sm->counters.fastpath.out2in_ed.x, sw_if_index); \
1892   vlib_validate_simple_counter (&sm->counters.slowpath.in2out_ed.x,           \
1893                                 sw_if_index);                                 \
1894   vlib_zero_simple_counter (&sm->counters.slowpath.in2out_ed.x, sw_if_index); \
1895   vlib_validate_simple_counter (&sm->counters.slowpath.out2in_ed.x,           \
1896                                 sw_if_index);                                 \
1897   vlib_zero_simple_counter (&sm->counters.slowpath.out2in_ed.x, sw_if_index);
1898   foreach_nat_counter;
1899 #undef _
1900   vlib_validate_simple_counter (&sm->counters.hairpinning, sw_if_index);
1901   vlib_zero_simple_counter (&sm->counters.hairpinning, sw_if_index);
1902 }
1903
1904 void
1905 expire_per_vrf_sessions (u32 fib_index)
1906 {
1907   per_vrf_sessions_t *per_vrf_sessions;
1908   snat_main_per_thread_data_t *tsm;
1909   snat_main_t *sm = &snat_main;
1910
1911   /* *INDENT-OFF* */
1912   vec_foreach (tsm, sm->per_thread_data)
1913     {
1914       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1915         {
1916           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1917               (per_vrf_sessions->tx_fib_index == fib_index))
1918             {
1919               per_vrf_sessions->expired = 1;
1920             }
1921         }
1922     }
1923   /* *INDENT-ON* */
1924 }
1925
1926 void
1927 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1928 {
1929   snat_main_t *sm = &snat_main;
1930   nat_fib_t *fib;
1931
1932   // we don't care if it is outside/inside fib
1933   // we just care about their ref_count
1934   // if it reaches 0 sessions should expire
1935   // because the fib isn't valid for NAT anymore
1936
1937   vec_foreach (fib, sm->fibs)
1938   {
1939     if (fib->fib_index == fib_index)
1940       {
1941         if (is_del)
1942           {
1943             fib->ref_count--;
1944             if (!fib->ref_count)
1945               {
1946                 vec_del1 (sm->fibs, fib - sm->fibs);
1947                 expire_per_vrf_sessions (fib_index);
1948               }
1949             return;
1950           }
1951         else
1952           fib->ref_count++;
1953       }
1954   }
1955   if (!is_del)
1956     {
1957       vec_add2 (sm->fibs, fib, 1);
1958       fib->ref_count = 1;
1959       fib->fib_index = fib_index;
1960     }
1961 }
1962
1963 int
1964 snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
1965 {
1966   snat_main_t *sm = &snat_main;
1967   snat_interface_t *i;
1968   const char *feature_name, *del_feature_name;
1969   snat_address_t *ap;
1970   snat_static_mapping_t *m;
1971   nat_outside_fib_t *outside_fib;
1972   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1973                                                        sw_if_index);
1974
1975   if (!sm->enabled)
1976     {
1977       nat_log_err ("nat44 is disabled");
1978       return VNET_API_ERROR_UNSUPPORTED;
1979     }
1980
1981   if (sm->out2in_dpo && !is_inside)
1982     {
1983       nat_log_err ("error unsupported");
1984       return VNET_API_ERROR_UNSUPPORTED;
1985     }
1986
1987   /* *INDENT-OFF* */
1988   pool_foreach (i, sm->output_feature_interfaces,
1989   ({
1990     if (i->sw_if_index == sw_if_index)
1991       {
1992         nat_log_err ("error interface already configured");
1993         return VNET_API_ERROR_VALUE_EXIST;
1994       }
1995   }));
1996   /* *INDENT-ON* */
1997
1998   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1999     feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast";
2000   else
2001     {
2002       if (sm->num_workers > 1)
2003         feature_name =
2004           is_inside ? "nat44-in2out-worker-handoff" :
2005           "nat44-out2in-worker-handoff";
2006       else if (sm->endpoint_dependent)
2007         {
2008           feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
2009         }
2010       else
2011         feature_name = is_inside ? "nat44-in2out" : "nat44-out2in";
2012     }
2013
2014   if (sm->fq_in2out_index == ~0 && sm->num_workers > 1)
2015     sm->fq_in2out_index =
2016       vlib_frame_queue_main_init (sm->in2out_node_index, NAT_FQ_NELTS);
2017
2018   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
2019     sm->fq_out2in_index =
2020       vlib_frame_queue_main_init (sm->out2in_node_index, NAT_FQ_NELTS);
2021
2022   if (sm->endpoint_dependent)
2023     update_per_vrf_sessions_vec (fib_index, is_del);
2024
2025   if (!is_inside)
2026     {
2027       /* *INDENT-OFF* */
2028       vec_foreach (outside_fib, sm->outside_fibs)
2029         {
2030           if (outside_fib->fib_index == fib_index)
2031             {
2032               if (is_del)
2033                 {
2034                   outside_fib->refcount--;
2035                   if (!outside_fib->refcount)
2036                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2037                 }
2038               else
2039                 outside_fib->refcount++;
2040               goto feature_set;
2041             }
2042         }
2043       /* *INDENT-ON* */
2044       if (!is_del)
2045         {
2046           vec_add2 (sm->outside_fibs, outside_fib, 1);
2047           outside_fib->refcount = 1;
2048           outside_fib->fib_index = fib_index;
2049         }
2050     }
2051
2052 feature_set:
2053   /* *INDENT-OFF* */
2054   pool_foreach (i, sm->interfaces,
2055   ({
2056     if (i->sw_if_index == sw_if_index)
2057       {
2058         if (is_del)
2059           {
2060             if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
2061               {
2062                 if (is_inside)
2063                   i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
2064                 else
2065                   i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
2066
2067                 if (sm->num_workers > 1)
2068                   {
2069                     del_feature_name = "nat44-handoff-classify";
2070                     feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
2071                                                  "nat44-out2in-worker-handoff";
2072                   }
2073                 else if (sm->endpoint_dependent)
2074                   {
2075                     del_feature_name = "nat44-ed-classify";
2076                     feature_name = !is_inside ?  "nat-pre-in2out" :
2077                                                  "nat-pre-out2in";
2078                   }
2079                 else
2080                   {
2081                     del_feature_name = "nat44-classify";
2082                     feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
2083                   }
2084
2085                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
2086                 if (rv)
2087                   return rv;
2088                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
2089                                              sw_if_index, 0, 0, 0);
2090                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
2091                                              sw_if_index, 1, 0, 0);
2092                 if (!is_inside)
2093                   {
2094                     if (sm->endpoint_dependent)
2095                       vnet_feature_enable_disable ("ip4-local",
2096                                                    "nat44-ed-hairpinning",
2097                                                    sw_if_index, 1, 0, 0);
2098                     else
2099                       vnet_feature_enable_disable ("ip4-local",
2100                                                    "nat44-hairpinning",
2101                                                    sw_if_index, 1, 0, 0);
2102                   }
2103               }
2104             else
2105               {
2106                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
2107                 if (rv)
2108                   return rv;
2109                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
2110                                              sw_if_index, 0, 0, 0);
2111                 pool_put (sm->interfaces, i);
2112                 if (is_inside)
2113                   {
2114                     if (sm->endpoint_dependent)
2115                       vnet_feature_enable_disable ("ip4-local",
2116                                                    "nat44-ed-hairpinning",
2117                                                    sw_if_index, 0, 0, 0);
2118                     else
2119                       vnet_feature_enable_disable ("ip4-local",
2120                                                    "nat44-hairpinning",
2121                                                    sw_if_index, 0, 0, 0);
2122                   }
2123               }
2124           }
2125         else
2126           {
2127             if ((nat_interface_is_inside(i) && is_inside) ||
2128                 (nat_interface_is_outside(i) && !is_inside))
2129               return 0;
2130
2131             if (sm->num_workers > 1)
2132               {
2133                 del_feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
2134                                                  "nat44-out2in-worker-handoff";
2135                 feature_name = "nat44-handoff-classify";
2136               }
2137             else if (sm->endpoint_dependent)
2138               {
2139                 del_feature_name = !is_inside ?  "nat-pre-in2out" :
2140                                                  "nat-pre-out2in";
2141
2142                 feature_name = "nat44-ed-classify";
2143               }
2144             else
2145               {
2146                 del_feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
2147                 feature_name = "nat44-classify";
2148               }
2149
2150             int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
2151             if (rv)
2152               return rv;
2153             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
2154                                          sw_if_index, 0, 0, 0);
2155             vnet_feature_enable_disable ("ip4-unicast", feature_name,
2156                                          sw_if_index, 1, 0, 0);
2157             if (!is_inside)
2158               {
2159                 if (sm->endpoint_dependent)
2160                   vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
2161                                                sw_if_index, 0, 0, 0);
2162                 else
2163                   vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
2164                                                sw_if_index, 0, 0, 0);
2165               }
2166             goto set_flags;
2167           }
2168
2169         goto fib;
2170       }
2171   }));
2172   /* *INDENT-ON* */
2173
2174   if (is_del)
2175     {
2176       nat_log_err ("error interface couldn't be found");
2177       return VNET_API_ERROR_NO_SUCH_ENTRY;
2178     }
2179
2180   pool_get (sm->interfaces, i);
2181   i->sw_if_index = sw_if_index;
2182   i->flags = 0;
2183   nat_validate_counters (sm, sw_if_index);
2184
2185   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0,
2186                                0);
2187
2188   int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
2189   if (rv)
2190     return rv;
2191
2192   if (is_inside && !sm->out2in_dpo)
2193     {
2194       if (sm->endpoint_dependent)
2195         vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
2196                                      sw_if_index, 1, 0, 0);
2197       else
2198         vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
2199                                      sw_if_index, 1, 0, 0);
2200     }
2201
2202 set_flags:
2203   if (is_inside)
2204     {
2205       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
2206       return 0;
2207     }
2208   else
2209     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
2210
2211   /* Add/delete external addresses to FIB */
2212 fib:
2213   /* *INDENT-OFF* */
2214   vec_foreach (ap, sm->addresses)
2215     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
2216
2217   pool_foreach (m, sm->static_mappings,
2218   ({
2219     if (!(is_addr_only_static_mapping(m)) || (m->local_addr.as_u32 == m->external_addr.as_u32))
2220       continue;
2221
2222     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
2223   }));
2224   /* *INDENT-ON* */
2225
2226   return 0;
2227 }
2228
2229 int
2230 snat_interface_add_del_output_feature (u32 sw_if_index,
2231                                        u8 is_inside, int is_del)
2232 {
2233   snat_main_t *sm = &snat_main;
2234   snat_interface_t *i;
2235   snat_address_t *ap;
2236   snat_static_mapping_t *m;
2237   nat_outside_fib_t *outside_fib;
2238   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2239                                                        sw_if_index);
2240
2241   if (!sm->enabled)
2242     {
2243       nat_log_err ("nat44 is disabled");
2244       return VNET_API_ERROR_UNSUPPORTED;
2245     }
2246
2247   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
2248     {
2249       nat_log_err ("error unsupported");
2250       return VNET_API_ERROR_UNSUPPORTED;
2251     }
2252
2253   /* *INDENT-OFF* */
2254   pool_foreach (i, sm->interfaces,
2255   ({
2256     if (i->sw_if_index == sw_if_index)
2257       {
2258         nat_log_err ("error interface already configured");
2259         return VNET_API_ERROR_VALUE_EXIST;
2260       }
2261   }));
2262   /* *INDENT-ON* */
2263
2264   if (sm->endpoint_dependent)
2265     update_per_vrf_sessions_vec (fib_index, is_del);
2266
2267   if (!is_inside)
2268     {
2269       /* *INDENT-OFF* */
2270       vec_foreach (outside_fib, sm->outside_fibs)
2271         {
2272           if (outside_fib->fib_index == fib_index)
2273             {
2274               if (is_del)
2275                 {
2276                   outside_fib->refcount--;
2277                   if (!outside_fib->refcount)
2278                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2279                 }
2280               else
2281                 outside_fib->refcount++;
2282               goto feature_set;
2283             }
2284         }
2285       /* *INDENT-ON* */
2286       if (!is_del)
2287         {
2288           vec_add2 (sm->outside_fibs, outside_fib, 1);
2289           outside_fib->refcount = 1;
2290           outside_fib->fib_index = fib_index;
2291         }
2292     }
2293
2294 feature_set:
2295   if (is_inside)
2296     {
2297       if (sm->endpoint_dependent)
2298         {
2299           int rv =
2300             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2301           if (rv)
2302             return rv;
2303           rv =
2304             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2305                                                             !is_del);
2306           if (rv)
2307             return rv;
2308           vnet_feature_enable_disable ("ip4-unicast", "nat44-ed-hairpin-dst",
2309                                        sw_if_index, !is_del, 0, 0);
2310           vnet_feature_enable_disable ("ip4-output", "nat44-ed-hairpin-src",
2311                                        sw_if_index, !is_del, 0, 0);
2312         }
2313       else
2314         {
2315           int rv =
2316             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2317           if (rv)
2318             return rv;
2319           rv =
2320             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2321                                                             !is_del);
2322           if (rv)
2323             return rv;
2324           vnet_feature_enable_disable ("ip4-unicast", "nat44-hairpin-dst",
2325                                        sw_if_index, !is_del, 0, 0);
2326           vnet_feature_enable_disable ("ip4-output", "nat44-hairpin-src",
2327                                        sw_if_index, !is_del, 0, 0);
2328         }
2329       goto fq;
2330     }
2331
2332   if (sm->num_workers > 1)
2333     {
2334       int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2335       if (rv)
2336         return rv;
2337       rv =
2338         ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
2339       if (rv)
2340         return rv;
2341       vnet_feature_enable_disable ("ip4-unicast",
2342                                    "nat44-out2in-worker-handoff",
2343                                    sw_if_index, !is_del, 0, 0);
2344       vnet_feature_enable_disable ("ip4-output",
2345                                    "nat44-in2out-output-worker-handoff",
2346                                    sw_if_index, !is_del, 0, 0);
2347     }
2348   else
2349     {
2350       if (sm->endpoint_dependent)
2351         {
2352           int rv =
2353             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2354           if (rv)
2355             return rv;
2356           rv =
2357             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2358                                                             !is_del);
2359           if (rv)
2360             return rv;
2361           vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
2362                                        sw_if_index, !is_del, 0, 0);
2363           vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
2364                                        sw_if_index, !is_del, 0, 0);
2365         }
2366       else
2367         {
2368           int rv =
2369             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2370           if (rv)
2371             return rv;
2372           rv =
2373             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2374                                                             !is_del);
2375           if (rv)
2376             return rv;
2377           vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in",
2378                                        sw_if_index, !is_del, 0, 0);
2379           vnet_feature_enable_disable ("ip4-output", "nat44-in2out-output",
2380                                        sw_if_index, !is_del, 0, 0);
2381         }
2382     }
2383
2384 fq:
2385   if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
2386     sm->fq_in2out_output_index =
2387       vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
2388
2389   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
2390     sm->fq_out2in_index =
2391       vlib_frame_queue_main_init (sm->out2in_node_index, 0);
2392
2393   /* *INDENT-OFF* */
2394   pool_foreach (i, sm->output_feature_interfaces,
2395   ({
2396     if (i->sw_if_index == sw_if_index)
2397       {
2398         if (is_del)
2399           pool_put (sm->output_feature_interfaces, i);
2400         else
2401           return VNET_API_ERROR_VALUE_EXIST;
2402
2403         goto fib;
2404       }
2405   }));
2406   /* *INDENT-ON* */
2407
2408   if (is_del)
2409     {
2410       nat_log_err ("error interface couldn't be found");
2411       return VNET_API_ERROR_NO_SUCH_ENTRY;
2412     }
2413
2414   pool_get (sm->output_feature_interfaces, i);
2415   i->sw_if_index = sw_if_index;
2416   i->flags = 0;
2417   nat_validate_counters (sm, sw_if_index);
2418   if (is_inside)
2419     i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
2420   else
2421     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
2422
2423   /* Add/delete external addresses to FIB */
2424 fib:
2425   if (is_inside)
2426     return 0;
2427
2428   /* *INDENT-OFF* */
2429   vec_foreach (ap, sm->addresses)
2430     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
2431
2432   pool_foreach (m, sm->static_mappings,
2433   ({
2434     if (!((is_addr_only_static_mapping(m)))  || (m->local_addr.as_u32 == m->external_addr.as_u32))
2435       continue;
2436
2437     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
2438   }));
2439   /* *INDENT-ON* */
2440
2441   return 0;
2442 }
2443
2444 int
2445 snat_set_workers (uword * bitmap)
2446 {
2447   snat_main_t *sm = &snat_main;
2448   int i, j = 0;
2449
2450   if (sm->num_workers < 2)
2451     return VNET_API_ERROR_FEATURE_DISABLED;
2452
2453   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
2454     return VNET_API_ERROR_INVALID_WORKER;
2455
2456   vec_free (sm->workers);
2457   /* *INDENT-OFF* */
2458   clib_bitmap_foreach (i, bitmap,
2459     ({
2460       vec_add1(sm->workers, i);
2461       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
2462       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
2463       j++;
2464     }));
2465   /* *INDENT-ON* */
2466
2467   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
2468
2469   return 0;
2470 }
2471
2472 static void
2473 snat_update_outside_fib (ip4_main_t * im, uword opaque,
2474                          u32 sw_if_index, u32 new_fib_index,
2475                          u32 old_fib_index)
2476 {
2477   snat_main_t *sm = &snat_main;
2478   nat_outside_fib_t *outside_fib;
2479   snat_interface_t *i;
2480   u8 is_add = 1;
2481   u8 match = 0;
2482
2483   if (!sm->enabled || (new_fib_index == old_fib_index)
2484       || (!vec_len (sm->outside_fibs)))
2485     {
2486       return;
2487     }
2488
2489   /* *INDENT-OFF* */
2490   pool_foreach (i, sm->interfaces,
2491     ({
2492       if (i->sw_if_index == sw_if_index)
2493         {
2494           if (!(nat_interface_is_outside (i)))
2495             return;
2496           match = 1;
2497         }
2498     }));
2499
2500   pool_foreach (i, sm->output_feature_interfaces,
2501     ({
2502       if (i->sw_if_index == sw_if_index)
2503         {
2504           if (!(nat_interface_is_outside (i)))
2505             return;
2506           match = 1;
2507         }
2508     }));
2509   /* *INDENT-ON* */
2510
2511   if (!match)
2512     return;
2513
2514   vec_foreach (outside_fib, sm->outside_fibs)
2515   {
2516     if (outside_fib->fib_index == old_fib_index)
2517       {
2518         outside_fib->refcount--;
2519         if (!outside_fib->refcount)
2520           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2521         break;
2522       }
2523   }
2524
2525   vec_foreach (outside_fib, sm->outside_fibs)
2526   {
2527     if (outside_fib->fib_index == new_fib_index)
2528       {
2529         outside_fib->refcount++;
2530         is_add = 0;
2531         break;
2532       }
2533   }
2534
2535   if (is_add)
2536     {
2537       vec_add2 (sm->outside_fibs, outside_fib, 1);
2538       outside_fib->refcount = 1;
2539       outside_fib->fib_index = new_fib_index;
2540     }
2541 }
2542
2543 static void
2544 snat_update_outside_fib (ip4_main_t * im, uword opaque,
2545                          u32 sw_if_index, u32 new_fib_index,
2546                          u32 old_fib_index);
2547
2548 static void
2549 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
2550                                        uword opaque,
2551                                        u32 sw_if_index,
2552                                        ip4_address_t * address,
2553                                        u32 address_length,
2554                                        u32 if_address_index, u32 is_delete);
2555
2556 static void
2557 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
2558                                  uword opaque,
2559                                  u32 sw_if_index,
2560                                  ip4_address_t * address,
2561                                  u32 address_length,
2562                                  u32 if_address_index, u32 is_delete);
2563
2564 static int
2565 nat_alloc_addr_and_port_default (snat_address_t * addresses, u32 fib_index,
2566                                  u32 thread_index, nat_protocol_t proto,
2567                                  ip4_address_t * addr, u16 * port,
2568                                  u16 port_per_thread, u32 snat_thread_index);
2569
2570 void
2571 test_key_calc_split ()
2572 {
2573   ip4_address_t l_addr;
2574   l_addr.as_u8[0] = 1;
2575   l_addr.as_u8[1] = 1;
2576   l_addr.as_u8[2] = 1;
2577   l_addr.as_u8[3] = 1;
2578   ip4_address_t r_addr;
2579   r_addr.as_u8[0] = 2;
2580   r_addr.as_u8[1] = 2;
2581   r_addr.as_u8[2] = 2;
2582   r_addr.as_u8[3] = 2;
2583   u16 l_port = 40001;
2584   u16 r_port = 40301;
2585   u8 proto = 9;
2586   u32 fib_index = 9000001;
2587   u32 thread_index = 3000000001;
2588   u32 session_index = 3000000221;
2589   clib_bihash_kv_16_8_t kv;
2590   init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
2591               thread_index, session_index);
2592   ip4_address_t l_addr2;
2593   ip4_address_t r_addr2;
2594   clib_memset (&l_addr2, 0, sizeof (l_addr2));
2595   clib_memset (&r_addr2, 0, sizeof (r_addr2));
2596   u16 l_port2 = 0;
2597   u16 r_port2 = 0;
2598   u8 proto2 = 0;
2599   u32 fib_index2 = 0;
2600   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
2601                &r_port2);
2602   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2603   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
2604   ASSERT (l_port == l_port2);
2605   ASSERT (r_port == r_port2);
2606   ASSERT (proto == proto2);
2607   ASSERT (fib_index == fib_index2);
2608   ASSERT (thread_index == ed_value_get_thread_index (&kv));
2609   ASSERT (session_index == ed_value_get_session_index (&kv));
2610
2611   fib_index = 7001;
2612   proto = 5;
2613   nat_protocol_t proto3 = ~0;
2614   u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
2615   split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
2616   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2617   ASSERT (l_port == l_port2);
2618   ASSERT (proto == proto3);
2619   ASSERT (fib_index == fib_index2);
2620 }
2621
2622 static clib_error_t *
2623 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
2624 {
2625   snat_main_t *sm = &snat_main;
2626   u32 fib_index;
2627
2628   if (sm->endpoint_dependent)
2629     {
2630       // TODO: consider removing all NAT interfaces
2631
2632       if (!is_add)
2633         {
2634           fib_index = ip4_fib_index_from_table_id (table_id);
2635           if (fib_index != ~0)
2636             expire_per_vrf_sessions (fib_index);
2637         }
2638     }
2639   return 0;
2640 }
2641
2642 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
2643
2644 void
2645 nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm)
2646 {
2647   vlib_node_t *node;
2648
2649   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in");
2650   sm->ei_out2in_node_index = node->index;
2651   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out");
2652   sm->ei_in2out_node_index = node->index;
2653   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output");
2654   sm->ei_in2out_output_node_index = node->index;
2655
2656   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
2657   sm->ed_out2in_node_index = node->index;
2658   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
2659   sm->ed_in2out_node_index = node->index;
2660   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
2661   sm->ed_in2out_output_node_index = node->index;
2662
2663   node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
2664   sm->error_node_index = node->index;
2665   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-in2out");
2666   sm->pre_in2out_node_index = node->index;
2667   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-out2in");
2668   sm->pre_out2in_node_index = node->index;
2669   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-in2out");
2670   sm->pre_in2out_node_index = node->index;
2671   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-out2in");
2672   sm->pre_out2in_node_index = node->index;
2673   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-fast");
2674   sm->in2out_fast_node_index = node->index;
2675   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-slowpath");
2676   sm->in2out_slowpath_node_index = node->index;
2677   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output-slowpath");
2678   sm->in2out_slowpath_output_node_index = node->index;
2679   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-slowpath");
2680   sm->ed_in2out_slowpath_node_index = node->index;
2681   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in-fast");
2682   sm->out2in_fast_node_index = node->index;
2683   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in-slowpath");
2684   sm->ed_out2in_slowpath_node_index = node->index;
2685   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpinning");
2686   sm->hairpinning_node_index = node->index;
2687   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-dst");
2688   sm->hairpin_dst_node_index = node->index;
2689   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-src");
2690   sm->hairpin_src_node_index = node->index;
2691   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpinning");
2692   sm->ed_hairpinning_node_index = node->index;
2693   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-dst");
2694   sm->ed_hairpin_dst_node_index = node->index;
2695   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-src");
2696   sm->ed_hairpin_src_node_index = node->index;
2697 }
2698
2699 #define nat_init_simple_counter(c, n, sn) \
2700 do                                        \
2701   {                                       \
2702     c.name = n;                           \
2703     c.stat_segment_name = sn;             \
2704     vlib_validate_simple_counter (&c, 0); \
2705     vlib_zero_simple_counter (&c, 0);     \
2706   } while (0);
2707
2708 static clib_error_t *
2709 nat_init (vlib_main_t * vm)
2710 {
2711   snat_main_t *sm = &snat_main;
2712   clib_error_t *error = 0;
2713   vlib_thread_main_t *tm = vlib_get_thread_main ();
2714   vlib_thread_registration_t *tr;
2715   ip4_add_del_interface_address_callback_t cbi = { 0 };
2716   ip4_table_bind_callback_t cbt = { 0 };
2717   u32 i, num_threads = 0;
2718   uword *p, *bitmap = 0;
2719
2720   clib_memset (sm, 0, sizeof (*sm));
2721
2722   // required
2723   sm->vnet_main = vnet_get_main ();
2724   // convenience
2725   sm->ip4_main = &ip4_main;
2726   sm->api_main = vlibapi_get_main ();
2727   sm->ip4_lookup_main = &ip4_main.lookup_main;
2728
2729   // frame queue indices used for handoff
2730   sm->fq_out2in_index = ~0;
2731   sm->fq_in2out_index = ~0;
2732   sm->fq_in2out_output_index = ~0;
2733
2734   sm->log_level = SNAT_LOG_ERROR;
2735
2736   nat44_set_node_indexes (sm, vm);
2737   sm->log_class = vlib_log_register_class ("nat", 0);
2738   nat_ipfix_logging_init (vm);
2739
2740   nat_init_simple_counter (sm->total_users, "total-users",
2741                            "/nat44/total-users");
2742   nat_init_simple_counter (sm->total_sessions, "total-sessions",
2743                            "/nat44/total-sessions");
2744   nat_init_simple_counter (sm->user_limit_reached, "user-limit-reached",
2745                            "/nat44/user-limit-reached");
2746
2747 #define _(x)                                            \
2748   sm->counters.fastpath.in2out.x.name = #x;             \
2749   sm->counters.fastpath.in2out.x.stat_segment_name =    \
2750       "/nat44/in2out/fastpath/" #x;                     \
2751   sm->counters.slowpath.in2out.x.name = #x;             \
2752   sm->counters.slowpath.in2out.x.stat_segment_name =    \
2753       "/nat44/in2out/slowpath/" #x;                     \
2754   sm->counters.fastpath.out2in.x.name = #x;             \
2755   sm->counters.fastpath.out2in.x.stat_segment_name =    \
2756       "/nat44/out2in/fastpath/" #x;                     \
2757   sm->counters.slowpath.out2in.x.name = #x;             \
2758   sm->counters.slowpath.out2in.x.stat_segment_name =    \
2759       "/nat44/out2in/slowpath/" #x;                     \
2760   sm->counters.fastpath.in2out_ed.x.name = #x;          \
2761   sm->counters.fastpath.in2out_ed.x.stat_segment_name = \
2762       "/nat44/ed/in2out/fastpath/" #x;                  \
2763   sm->counters.slowpath.in2out_ed.x.name = #x;          \
2764   sm->counters.slowpath.in2out_ed.x.stat_segment_name = \
2765       "/nat44/ed/in2out/slowpath/" #x;                  \
2766   sm->counters.fastpath.out2in_ed.x.name = #x;          \
2767   sm->counters.fastpath.out2in_ed.x.stat_segment_name = \
2768       "/nat44/ed/out2in/fastpath/" #x;                  \
2769   sm->counters.slowpath.out2in_ed.x.name = #x;          \
2770   sm->counters.slowpath.out2in_ed.x.stat_segment_name = \
2771       "/nat44/ed/out2in/slowpath/" #x;
2772   foreach_nat_counter;
2773 #undef _
2774   sm->counters.hairpinning.name = "hairpinning";
2775   sm->counters.hairpinning.stat_segment_name = "/nat44/hairpinning";
2776
2777   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2778   if (p)
2779     {
2780       tr = (vlib_thread_registration_t *) p[0];
2781       if (tr)
2782         {
2783           sm->num_workers = tr->count;
2784           sm->first_worker_index = tr->first_index;
2785         }
2786     }
2787   num_threads = tm->n_vlib_mains - 1;
2788   sm->port_per_thread = 0xffff - 1024;
2789   vec_validate (sm->per_thread_data, num_threads);
2790
2791   /* Use all available workers by default */
2792   if (sm->num_workers > 1)
2793     {
2794
2795       for (i = 0; i < sm->num_workers; i++)
2796         bitmap = clib_bitmap_set (bitmap, i, 1);
2797       snat_set_workers (bitmap);
2798       clib_bitmap_free (bitmap);
2799     }
2800   else
2801     sm->per_thread_data[0].snat_thread_index = 0;
2802
2803   /* callbacks to call when interface address changes. */
2804   cbi.function = snat_ip4_add_del_interface_address_cb;
2805   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2806   cbi.function = nat_ip4_add_del_addr_only_sm_cb;
2807   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2808
2809   /* callbacks to call when interface to table biding changes */
2810   cbt.function = snat_update_outside_fib;
2811   vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
2812
2813   // TODO: is it possible to move it into snat_main ?
2814   nat_fib_src_low =
2815     fib_source_allocate ("nat-low", FIB_SOURCE_PRIORITY_LOW,
2816                          FIB_SOURCE_BH_SIMPLE);
2817   nat_fib_src_hi =
2818     fib_source_allocate ("nat-hi", FIB_SOURCE_PRIORITY_HI,
2819                          FIB_SOURCE_BH_SIMPLE);
2820
2821   /* used only by out2in-dpo feature */
2822   nat_dpo_module_init ();
2823
2824   nat_affinity_init (vm);
2825   nat_ha_init (vm, sm->num_workers, num_threads);
2826
2827   test_key_calc_split ();
2828   error = snat_api_init (vm, sm);
2829   return error;
2830 }
2831
2832 VLIB_INIT_FUNCTION (nat_init);
2833
2834 int
2835 nat44_plugin_enable (nat44_config_t c)
2836 {
2837   snat_main_t *sm = &snat_main;
2838   u32 static_mapping_buckets = 1024;
2839   u32 static_mapping_memory_size = 64 << 20;
2840
2841   if (sm->enabled)
2842     {
2843       nat_log_err ("nat44 is enabled");
2844       return 1;
2845     }
2846
2847   // c.static_mapping_only + c.connection_tracking
2848   //  - supported in NAT EI & NAT ED
2849   // c.out2in_dpo, c.static_mapping_only
2850   //  - supported in NAT EI
2851
2852   if (c.endpoint_dependent)
2853     {
2854       if ((c.static_mapping_only && !c.connection_tracking) || c.out2in_dpo)
2855         {
2856           nat_log_err ("unsupported combination of configuration");
2857           return 1;
2858         }
2859       if (c.users || c.user_sessions)
2860         {
2861           nat_log_err ("unsupported combination of configuration");
2862           return 1;
2863         }
2864     }
2865
2866   // reset to defaults:
2867   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
2868   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
2869   //
2870   sm->udp_timeout = SNAT_UDP_TIMEOUT;
2871   sm->icmp_timeout = SNAT_ICMP_TIMEOUT;
2872   sm->tcp_transitory_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
2873   sm->tcp_established_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
2874
2875   // nat44 feature configuration
2876   sm->endpoint_dependent = c.endpoint_dependent;
2877   sm->static_mapping_only = c.static_mapping_only;
2878   sm->static_mapping_connection_tracking = c.connection_tracking;
2879   sm->forwarding_enabled = 0;
2880   sm->mss_clamping = 0;
2881
2882   if (!c.users)
2883     {
2884       c.users = 1024;
2885     }
2886   sm->max_users_per_thread = c.users;
2887   sm->user_buckets = nat_calc_bihash_buckets (c.users);
2888
2889   if (!c.user_memory)
2890     {
2891       c.user_memory =
2892         nat_calc_bihash_memory (c.users, sizeof (clib_bihash_8_8_t));
2893     }
2894   sm->user_memory_size = c.user_memory;
2895
2896   if (!c.sessions)
2897     {
2898       // default value based on legacy setting of load factor 10 * default
2899       // translation buckets 1024
2900       c.sessions = 10 * 1024;
2901     }
2902   sm->max_translations_per_thread = c.sessions;
2903   sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
2904
2905   if (!c.session_memory)
2906     {
2907       c.session_memory =
2908         nat_calc_bihash_memory
2909         (sm->translation_buckets, sizeof (clib_bihash_16_8_t));
2910     }
2911   sm->translation_memory_size = c.session_memory;
2912   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
2913   sm->max_translations_per_user
2914     = c.user_sessions ? c.user_sessions : sm->max_translations_per_thread;
2915
2916   sm->outside_vrf_id = c.outside_vrf;
2917   sm->outside_fib_index =
2918     fib_table_find_or_create_and_lock
2919     (FIB_PROTOCOL_IP4, c.outside_vrf, nat_fib_src_hi);
2920
2921   sm->inside_vrf_id = c.inside_vrf;
2922   sm->inside_fib_index =
2923     fib_table_find_or_create_and_lock
2924     (FIB_PROTOCOL_IP4, c.inside_vrf, nat_fib_src_hi);
2925
2926   if (c.endpoint_dependent)
2927     {
2928       sm->worker_out2in_cb = nat44_ed_get_worker_out2in_cb;
2929       sm->worker_in2out_cb = nat44_ed_get_worker_in2out_cb;
2930       sm->out2in_node_index = sm->ed_out2in_node_index;
2931       sm->in2out_node_index = sm->ed_in2out_node_index;
2932       sm->in2out_output_node_index = sm->ed_in2out_output_node_index;
2933       sm->icmp_match_out2in_cb = icmp_match_out2in_ed;
2934       sm->icmp_match_in2out_cb = icmp_match_in2out_ed;
2935
2936       clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
2937                              sm->translation_buckets,
2938                              sm->translation_memory_size);
2939       clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed,
2940                                           format_ed_session_kvp);
2941
2942
2943       nat_affinity_enable ();
2944
2945       nat_ha_enable (nat_ha_sadd_ed_cb, nat_ha_sdel_ed_cb, nat_ha_sref_ed_cb);
2946     }
2947   else
2948     {
2949       sm->worker_out2in_cb = snat_get_worker_out2in_cb;
2950       sm->worker_in2out_cb = snat_get_worker_in2out_cb;
2951       sm->out2in_node_index = sm->ei_out2in_node_index;
2952       sm->in2out_node_index = sm->ei_in2out_node_index;
2953       sm->in2out_output_node_index = sm->ei_in2out_output_node_index;
2954       sm->icmp_match_out2in_cb = icmp_match_out2in_slow;
2955       sm->icmp_match_in2out_cb = icmp_match_in2out_slow;
2956
2957       nat_ha_enable (nat_ha_sadd_cb, nat_ha_sdel_cb, nat_ha_sref_cb);
2958     }
2959
2960   // c.static_mapping & c.connection_tracking require
2961   // session database
2962   if (!c.static_mapping_only
2963       || (c.static_mapping_only && c.connection_tracking))
2964     {
2965       snat_main_per_thread_data_t *tsm;
2966       /* *INDENT-OFF* */
2967       vec_foreach (tsm, sm->per_thread_data)
2968         {
2969           nat44_db_init (tsm);
2970         }
2971       /* *INDENT-ON* */
2972     }
2973   else
2974     {
2975       sm->icmp_match_in2out_cb = icmp_match_in2out_fast;
2976       sm->icmp_match_out2in_cb = icmp_match_out2in_fast;
2977     }
2978
2979   clib_bihash_init_8_8 (&sm->static_mapping_by_local,
2980                         "static_mapping_by_local", static_mapping_buckets,
2981                         static_mapping_memory_size);
2982   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
2983                                      format_static_mapping_kvp);
2984
2985   clib_bihash_init_8_8 (&sm->static_mapping_by_external,
2986                         "static_mapping_by_external",
2987                         static_mapping_buckets, static_mapping_memory_size);
2988   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
2989                                      format_static_mapping_kvp);
2990
2991   // last: reset counters
2992   vlib_zero_simple_counter (&sm->total_users, 0);
2993   vlib_zero_simple_counter (&sm->total_sessions, 0);
2994   vlib_zero_simple_counter (&sm->user_limit_reached, 0);
2995
2996   sm->enabled = 1;
2997
2998   nat_log_info ("nat44 enable");
2999
3000   return 0;
3001 }
3002
3003 void
3004 nat44_addresses_free (snat_address_t ** addresses)
3005 {
3006   snat_address_t *ap;
3007   /* *INDENT-OFF* */
3008   vec_foreach (ap, *addresses)
3009     {
3010     #define _(N, i, n, s) \
3011       vec_free (ap->busy_##n##_ports_per_thread);
3012       foreach_nat_protocol
3013     #undef _
3014     }
3015   /* *INDENT-ON* */
3016   vec_free (*addresses);
3017   *addresses = 0;
3018 }
3019
3020 int
3021 nat44_plugin_disable ()
3022 {
3023   snat_main_t *sm = &snat_main;
3024   snat_interface_t *i, *vec;
3025   int error = 0;
3026
3027   if (!sm->enabled)
3028     {
3029       nat_log_err ("nat44 is disabled");
3030       return 1;
3031     }
3032
3033   // first unregister all nodes from interfaces
3034   vec = vec_dup (sm->interfaces);
3035   /* *INDENT-OFF* */
3036   vec_foreach (i, vec)
3037     {
3038       if (nat_interface_is_inside(i))
3039         error = snat_interface_add_del (i->sw_if_index, 1, 1);
3040       if (nat_interface_is_outside(i))
3041         error = snat_interface_add_del (i->sw_if_index, 0, 1);
3042
3043       if (error)
3044         {
3045           nat_log_err ("error occurred while removing interface %u",
3046                        i->sw_if_index);
3047         }
3048     }
3049   /* *INDENT-ON* */
3050   vec_free (vec);
3051   sm->interfaces = 0;
3052
3053   vec = vec_dup (sm->output_feature_interfaces);
3054   /* *INDENT-OFF* */
3055   vec_foreach (i, vec)
3056     {
3057       if (nat_interface_is_inside(i))
3058         error = snat_interface_add_del_output_feature (i->sw_if_index, 1, 1);
3059       if (nat_interface_is_outside(i))
3060         error = snat_interface_add_del_output_feature (i->sw_if_index, 0, 1);
3061
3062       if (error)
3063         {
3064           nat_log_err ("error occurred while removing interface %u",
3065                        i->sw_if_index);
3066         }
3067     }
3068   /* *INDENT-ON* */
3069   vec_free (vec);
3070   sm->output_feature_interfaces = 0;
3071
3072   vec_free (sm->max_translations_per_fib);
3073
3074   if (sm->endpoint_dependent)
3075     {
3076       nat_affinity_disable ();
3077       clib_bihash_free_16_8 (&sm->out2in_ed);
3078     }
3079
3080   clib_bihash_free_8_8 (&sm->static_mapping_by_local);
3081   clib_bihash_free_8_8 (&sm->static_mapping_by_external);
3082
3083   if (!sm->static_mapping_only ||
3084       (sm->static_mapping_only && sm->static_mapping_connection_tracking))
3085     {
3086       snat_main_per_thread_data_t *tsm;
3087      /* *INDENT-OFF* */
3088       vec_foreach (tsm, sm->per_thread_data)
3089         {
3090           nat44_db_free (tsm);
3091         }
3092       /* *INDENT-ON* */
3093     }
3094
3095   pool_free (sm->static_mappings);
3096
3097   nat44_addresses_free (&sm->addresses);
3098   nat44_addresses_free (&sm->twice_nat_addresses);
3099
3100
3101   vec_free (sm->to_resolve);
3102   vec_free (sm->auto_add_sw_if_indices);
3103   vec_free (sm->auto_add_sw_if_indices_twice_nat);
3104
3105   sm->to_resolve = 0;
3106   sm->auto_add_sw_if_indices = 0;
3107   sm->auto_add_sw_if_indices_twice_nat = 0;
3108
3109   sm->forwarding_enabled = 0;
3110
3111   sm->enabled = 0;
3112
3113   return 0;
3114 }
3115
3116 void
3117 snat_free_outside_address_and_port (snat_address_t * addresses,
3118                                     u32 thread_index,
3119                                     ip4_address_t * addr,
3120                                     u16 port, nat_protocol_t protocol)
3121 {
3122   snat_address_t *a;
3123   u32 address_index;
3124   u16 port_host_byte_order = clib_net_to_host_u16 (port);
3125
3126   for (address_index = 0; address_index < vec_len (addresses);
3127        address_index++)
3128     {
3129       if (addresses[address_index].addr.as_u32 == addr->as_u32)
3130         break;
3131     }
3132
3133   ASSERT (address_index < vec_len (addresses));
3134
3135   a = addresses + address_index;
3136
3137   switch (protocol)
3138     {
3139 #define _(N, i, n, s) \
3140     case NAT_PROTOCOL_##N: \
3141       ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
3142       --a->busy_##n##_port_refcounts[port_host_byte_order]; \
3143       a->busy_##n##_ports--; \
3144       a->busy_##n##_ports_per_thread[thread_index]--; \
3145       break;
3146       foreach_nat_protocol
3147 #undef _
3148     default:
3149       nat_elog_info ("unknown protocol");
3150       return;
3151     }
3152 }
3153
3154 static int
3155 nat_set_outside_address_and_port (snat_address_t * addresses,
3156                                   u32 thread_index, ip4_address_t addr,
3157                                   u16 port, nat_protocol_t protocol)
3158 {
3159   snat_address_t *a = 0;
3160   u32 address_index;
3161   u16 port_host_byte_order = clib_net_to_host_u16 (port);
3162
3163   for (address_index = 0; address_index < vec_len (addresses);
3164        address_index++)
3165     {
3166       if (addresses[address_index].addr.as_u32 != addr.as_u32)
3167         continue;
3168
3169       a = addresses + address_index;
3170       switch (protocol)
3171         {
3172 #define _(N, j, n, s) \
3173         case NAT_PROTOCOL_##N: \
3174           if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
3175             return VNET_API_ERROR_INSTANCE_IN_USE; \
3176           ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
3177           a->busy_##n##_ports_per_thread[thread_index]++; \
3178           a->busy_##n##_ports++; \
3179           return 0;
3180           foreach_nat_protocol
3181 #undef _
3182         default:
3183           nat_elog_info ("unknown protocol");
3184           return 1;
3185         }
3186     }
3187
3188   return VNET_API_ERROR_NO_SUCH_ENTRY;
3189 }
3190
3191 int
3192 snat_static_mapping_match (snat_main_t * sm,
3193                            ip4_address_t match_addr,
3194                            u16 match_port,
3195                            u32 match_fib_index,
3196                            nat_protocol_t match_protocol,
3197                            ip4_address_t * mapping_addr,
3198                            u16 * mapping_port,
3199                            u32 * mapping_fib_index,
3200                            u8 by_external,
3201                            u8 * is_addr_only,
3202                            twice_nat_type_t * twice_nat,
3203                            lb_nat_type_t * lb, ip4_address_t * ext_host_addr,
3204                            u8 * is_identity_nat, snat_static_mapping_t ** out)
3205 {
3206   clib_bihash_kv_8_8_t kv, value;
3207   clib_bihash_8_8_t *mapping_hash;
3208   snat_static_mapping_t *m;
3209   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
3210   nat44_lb_addr_port_t *local;
3211   u8 backend_index;
3212
3213   if (!by_external)
3214     {
3215       mapping_hash = &sm->static_mapping_by_local;
3216       init_nat_k (&kv, match_addr, match_port, match_fib_index,
3217                   match_protocol);
3218       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
3219         {
3220           /* Try address only mapping */
3221           init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
3222           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
3223             return 1;
3224         }
3225     }
3226   else
3227     {
3228       mapping_hash = &sm->static_mapping_by_external;
3229       init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
3230       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
3231         {
3232           /* Try address only mapping */
3233           init_nat_k (&kv, match_addr, 0, 0, 0);
3234           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
3235             return 1;
3236         }
3237     }
3238
3239   m = pool_elt_at_index (sm->static_mappings, value.value);
3240
3241   if (by_external)
3242     {
3243       if (is_lb_static_mapping (m))
3244         {
3245           if (PREDICT_FALSE (lb != 0))
3246             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
3247           if (m->affinity && !nat_affinity_find_and_lock (ext_host_addr[0],
3248                                                           match_addr,
3249                                                           match_protocol,
3250                                                           match_port,
3251                                                           &backend_index))
3252             {
3253               local = pool_elt_at_index (m->locals, backend_index);
3254               *mapping_addr = local->addr;
3255               *mapping_port = local->port;
3256               *mapping_fib_index = local->fib_index;
3257               goto end;
3258             }
3259           // pick locals matching this worker
3260           if (PREDICT_FALSE (sm->num_workers > 1))
3261             {
3262               u32 thread_index = vlib_get_thread_index ();
3263               /* *INDENT-OFF* */
3264               pool_foreach_index (i, m->locals,
3265               ({
3266                 local = pool_elt_at_index (m->locals, i);
3267
3268                 ip4_header_t ip = {
3269                   .src_address = local->addr,
3270                 };
3271
3272                 if (sm->worker_in2out_cb (&ip, m->fib_index, 0) ==
3273                     thread_index)
3274                   {
3275                     vec_add1 (tmp, i);
3276                   }
3277               }));
3278               /* *INDENT-ON* */
3279               ASSERT (vec_len (tmp) != 0);
3280             }
3281           else
3282             {
3283               /* *INDENT-OFF* */
3284               pool_foreach_index (i, m->locals,
3285               ({
3286                 vec_add1 (tmp, i);
3287               }));
3288               /* *INDENT-ON* */
3289             }
3290           hi = vec_len (tmp) - 1;
3291           local = pool_elt_at_index (m->locals, tmp[hi]);
3292           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
3293           while (lo < hi)
3294             {
3295               mid = ((hi - lo) >> 1) + lo;
3296               local = pool_elt_at_index (m->locals, tmp[mid]);
3297               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
3298             }
3299           local = pool_elt_at_index (m->locals, tmp[lo]);
3300           if (!(local->prefix >= rand))
3301             return 1;
3302           *mapping_addr = local->addr;
3303           *mapping_port = local->port;
3304           *mapping_fib_index = local->fib_index;
3305           if (m->affinity)
3306             {
3307               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
3308                                                 match_protocol, match_port,
3309                                                 tmp[lo], m->affinity,
3310                                                 m->affinity_per_service_list_head_index))
3311                 nat_elog_info ("create affinity record failed");
3312             }
3313           vec_free (tmp);
3314         }
3315       else
3316         {
3317           if (PREDICT_FALSE (lb != 0))
3318             *lb = NO_LB_NAT;
3319           *mapping_fib_index = m->fib_index;
3320           *mapping_addr = m->local_addr;
3321           /* Address only mapping doesn't change port */
3322           *mapping_port = is_addr_only_static_mapping (m) ? match_port
3323             : m->local_port;
3324         }
3325     }
3326   else
3327     {
3328       *mapping_addr = m->external_addr;
3329       /* Address only mapping doesn't change port */
3330       *mapping_port = is_addr_only_static_mapping (m) ? match_port
3331         : m->external_port;
3332       *mapping_fib_index = sm->outside_fib_index;
3333     }
3334
3335 end:
3336   if (PREDICT_FALSE (is_addr_only != 0))
3337     *is_addr_only = is_addr_only_static_mapping (m);
3338
3339   if (PREDICT_FALSE (twice_nat != 0))
3340     *twice_nat = m->twice_nat;
3341
3342   if (PREDICT_FALSE (is_identity_nat != 0))
3343     *is_identity_nat = is_identity_static_mapping (m);
3344
3345   if (out != 0)
3346     *out = m;
3347
3348   return 0;
3349 }
3350
3351 int
3352 snat_alloc_outside_address_and_port (snat_address_t * addresses,
3353                                      u32 fib_index,
3354                                      u32 thread_index,
3355                                      nat_protocol_t proto,
3356                                      ip4_address_t * addr,
3357                                      u16 * port,
3358                                      u16 port_per_thread,
3359                                      u32 snat_thread_index)
3360 {
3361   snat_main_t *sm = &snat_main;
3362
3363   return sm->alloc_addr_and_port (addresses, fib_index, thread_index, proto,
3364                                   addr, port, port_per_thread,
3365                                   snat_thread_index);
3366 }
3367
3368 static int
3369 nat_alloc_addr_and_port_default (snat_address_t * addresses,
3370                                  u32 fib_index,
3371                                  u32 thread_index,
3372                                  nat_protocol_t proto,
3373                                  ip4_address_t * addr,
3374                                  u16 * port,
3375                                  u16 port_per_thread, u32 snat_thread_index)
3376 {
3377   int i;
3378   snat_address_t *a, *ga = 0;
3379   u32 portnum;
3380
3381   for (i = 0; i < vec_len (addresses); i++)
3382     {
3383       a = addresses + i;
3384       switch (proto)
3385         {
3386 #define _(N, j, n, s) \
3387         case NAT_PROTOCOL_##N: \
3388           if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
3389             { \
3390               if (a->fib_index == fib_index) \
3391                 { \
3392                   while (1) \
3393                     { \
3394                       portnum = (port_per_thread * \
3395                         snat_thread_index) + \
3396                         snat_random_port(0, port_per_thread - 1) + 1024; \
3397                       if (a->busy_##n##_port_refcounts[portnum]) \
3398                         continue; \
3399                       --a->busy_##n##_port_refcounts[portnum]; \
3400                       a->busy_##n##_ports_per_thread[thread_index]++; \
3401                       a->busy_##n##_ports++; \
3402                       *addr = a->addr; \
3403                       *port = clib_host_to_net_u16(portnum); \
3404                       return 0; \
3405                     } \
3406                 } \
3407               else if (a->fib_index == ~0) \
3408                 { \
3409                   ga = a; \
3410                 } \
3411             } \
3412           break;
3413           foreach_nat_protocol
3414 #undef _
3415         default:
3416           nat_elog_info ("unknown protocol");
3417           return 1;
3418         }
3419
3420     }
3421
3422   if (ga)
3423     {
3424       a = ga;
3425       switch (proto)
3426         {
3427 #define _(N, j, n, s) \
3428         case NAT_PROTOCOL_##N: \
3429           while (1) \
3430             { \
3431               portnum = (port_per_thread * \
3432                 snat_thread_index) + \
3433                 snat_random_port(0, port_per_thread - 1) + 1024; \
3434               if (a->busy_##n##_port_refcounts[portnum]) \
3435                 continue; \
3436               ++a->busy_##n##_port_refcounts[portnum]; \
3437               a->busy_##n##_ports_per_thread[thread_index]++; \
3438               a->busy_##n##_ports++; \
3439               *addr = a->addr; \
3440               *port = clib_host_to_net_u16(portnum); \
3441               return 0; \
3442             }
3443           break;
3444           foreach_nat_protocol
3445 #undef _
3446         default:
3447           nat_elog_info ("unknown protocol");
3448           return 1;
3449         }
3450     }
3451
3452   /* Totally out of translations to use... */
3453   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
3454   return 1;
3455 }
3456
3457 static int
3458 nat_alloc_addr_and_port_mape (snat_address_t * addresses, u32 fib_index,
3459                               u32 thread_index, nat_protocol_t proto,
3460                               ip4_address_t * addr, u16 * port,
3461                               u16 port_per_thread, u32 snat_thread_index)
3462 {
3463   snat_main_t *sm = &snat_main;
3464   snat_address_t *a = addresses;
3465   u16 m, ports, portnum, A, j;
3466   m = 16 - (sm->psid_offset + sm->psid_length);
3467   ports = (1 << (16 - sm->psid_length)) - (1 << m);
3468
3469   if (!vec_len (addresses))
3470     goto exhausted;
3471
3472   switch (proto)
3473     {
3474 #define _(N, i, n, s) \
3475     case NAT_PROTOCOL_##N: \
3476       if (a->busy_##n##_ports < ports) \
3477         { \
3478           while (1) \
3479             { \
3480               A = snat_random_port(1, pow2_mask(sm->psid_offset)); \
3481               j = snat_random_port(0, pow2_mask(m)); \
3482               portnum = A | (sm->psid << sm->psid_offset) | (j << (16 - m)); \
3483               if (a->busy_##n##_port_refcounts[portnum]) \
3484                 continue; \
3485               ++a->busy_##n##_port_refcounts[portnum]; \
3486               a->busy_##n##_ports++; \
3487               *addr = a->addr; \
3488               *port = clib_host_to_net_u16 (portnum); \
3489               return 0; \
3490             } \
3491         } \
3492       break;
3493       foreach_nat_protocol
3494 #undef _
3495     default:
3496       nat_elog_info ("unknown protocol");
3497       return 1;
3498     }
3499
3500 exhausted:
3501   /* Totally out of translations to use... */
3502   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
3503   return 1;
3504 }
3505
3506 static int
3507 nat_alloc_addr_and_port_range (snat_address_t * addresses, u32 fib_index,
3508                                u32 thread_index, nat_protocol_t proto,
3509                                ip4_address_t * addr, u16 * port,
3510                                u16 port_per_thread, u32 snat_thread_index)
3511 {
3512   snat_main_t *sm = &snat_main;
3513   snat_address_t *a = addresses;
3514   u16 portnum, ports;
3515
3516   ports = sm->end_port - sm->start_port + 1;
3517
3518   if (!vec_len (addresses))
3519     goto exhausted;
3520
3521   switch (proto)
3522     {
3523 #define _(N, i, n, s) \
3524     case NAT_PROTOCOL_##N: \
3525       if (a->busy_##n##_ports < ports) \
3526         { \
3527           while (1) \
3528             { \
3529               portnum = snat_random_port(sm->start_port, sm->end_port); \
3530               if (a->busy_##n##_port_refcounts[portnum]) \
3531                 continue; \
3532               ++a->busy_##n##_port_refcounts[portnum]; \
3533               a->busy_##n##_ports++; \
3534               *addr = a->addr; \
3535               *port = clib_host_to_net_u16 (portnum); \
3536               return 0; \
3537             } \
3538         } \
3539       break;
3540       foreach_nat_protocol
3541 #undef _
3542     default:
3543       nat_elog_info ("unknown protocol");
3544       return 1;
3545     }
3546
3547 exhausted:
3548   /* Totally out of translations to use... */
3549   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
3550   return 1;
3551 }
3552
3553 void
3554 nat44_add_del_address_dpo (ip4_address_t addr, u8 is_add)
3555 {
3556   dpo_id_t dpo_v4 = DPO_INVALID;
3557   fib_prefix_t pfx = {
3558     .fp_proto = FIB_PROTOCOL_IP4,
3559     .fp_len = 32,
3560     .fp_addr.ip4.as_u32 = addr.as_u32,
3561   };
3562
3563   if (is_add)
3564     {
3565       nat_dpo_create (DPO_PROTO_IP4, 0, &dpo_v4);
3566       fib_table_entry_special_dpo_add (0, &pfx, nat_fib_src_hi,
3567                                        FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4);
3568       dpo_reset (&dpo_v4);
3569     }
3570   else
3571     {
3572       fib_table_entry_special_remove (0, &pfx, nat_fib_src_hi);
3573     }
3574 }
3575
3576 u8 *
3577 format_session_kvp (u8 * s, va_list * args)
3578 {
3579   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3580
3581   s = format (s, "%U session-index %llu", format_snat_key, v->key, v->value);
3582
3583   return s;
3584 }
3585
3586 u8 *
3587 format_static_mapping_kvp (u8 * s, va_list * args)
3588 {
3589   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3590
3591   s = format (s, "%U static-mapping-index %llu",
3592               format_snat_key, v->key, v->value);
3593
3594   return s;
3595 }
3596
3597 u8 *
3598 format_user_kvp (u8 * s, va_list * args)
3599 {
3600   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3601   snat_user_key_t k;
3602
3603   k.as_u64 = v->key;
3604
3605   s = format (s, "%U fib %d user-index %llu", format_ip4_address, &k.addr,
3606               k.fib_index, v->value);
3607
3608   return s;
3609 }
3610
3611 u8 *
3612 format_ed_session_kvp (u8 * s, va_list * args)
3613 {
3614   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
3615
3616   u8 proto;
3617   u16 r_port, l_port;
3618   ip4_address_t l_addr, r_addr;
3619   u32 fib_index;
3620
3621   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
3622   s =
3623     format (s,
3624             "local %U:%d remote %U:%d proto %U fib %d thread-index %u session-index %u",
3625             format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
3626             format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
3627             format_ip_protocol, proto, fib_index,
3628             ed_value_get_session_index (v), ed_value_get_thread_index (v));
3629
3630   return s;
3631 }
3632
3633 static u32
3634 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0,
3635                            u8 is_output)
3636 {
3637   snat_main_t *sm = &snat_main;
3638   u32 next_worker_index = 0;
3639   u32 hash;
3640
3641   next_worker_index = sm->first_worker_index;
3642   hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) +
3643     (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >> 24);
3644
3645   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
3646     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
3647   else
3648     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
3649
3650   return next_worker_index;
3651 }
3652
3653 static u32
3654 snat_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip0,
3655                            u32 rx_fib_index0, u8 is_output)
3656 {
3657   snat_main_t *sm = &snat_main;
3658   udp_header_t *udp;
3659   u16 port;
3660   clib_bihash_kv_8_8_t kv, value;
3661   snat_static_mapping_t *m;
3662   u32 proto;
3663   u32 next_worker_index = 0;
3664
3665   /* first try static mappings without port */
3666   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3667     {
3668       init_nat_k (&kv, ip0->dst_address, 0, rx_fib_index0, 0);
3669       if (!clib_bihash_search_8_8
3670           (&sm->static_mapping_by_external, &kv, &value))
3671         {
3672           m = pool_elt_at_index (sm->static_mappings, value.value);
3673           return m->workers[0];
3674         }
3675     }
3676
3677   proto = ip_proto_to_nat_proto (ip0->protocol);
3678   udp = ip4_next_header (ip0);
3679   port = udp->dst_port;
3680
3681   /* unknown protocol */
3682   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
3683     {
3684       /* use current thread */
3685       return vlib_get_thread_index ();
3686     }
3687
3688   if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_ICMP))
3689     {
3690       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3691       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3692       if (!icmp_type_is_error_message
3693           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3694         port = vnet_buffer (b)->ip.reass.l4_src_port;
3695       else
3696         {
3697           /* if error message, then it's not fragmented and we can access it */
3698           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3699           proto = ip_proto_to_nat_proto (inner_ip->protocol);
3700           void *l4_header = ip4_next_header (inner_ip);
3701           switch (proto)
3702             {
3703             case NAT_PROTOCOL_ICMP:
3704               icmp = (icmp46_header_t *) l4_header;
3705               echo = (icmp_echo_header_t *) (icmp + 1);
3706               port = echo->identifier;
3707               break;
3708             case NAT_PROTOCOL_UDP:
3709             case NAT_PROTOCOL_TCP:
3710               port = ((tcp_udp_header_t *) l4_header)->src_port;
3711               break;
3712             default:
3713               return vlib_get_thread_index ();
3714             }
3715         }
3716     }
3717
3718   /* try static mappings with port */
3719   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3720     {
3721       init_nat_k (&kv, ip0->dst_address, port, rx_fib_index0, proto);
3722       if (!clib_bihash_search_8_8
3723           (&sm->static_mapping_by_external, &kv, &value))
3724         {
3725           m = pool_elt_at_index (sm->static_mappings, value.value);
3726           return m->workers[0];
3727         }
3728     }
3729
3730   /* worker by outside port */
3731   next_worker_index = sm->first_worker_index;
3732   next_worker_index +=
3733     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3734   return next_worker_index;
3735 }
3736
3737 static u32
3738 nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index,
3739                                u8 is_output)
3740 {
3741   snat_main_t *sm = &snat_main;
3742   u32 next_worker_index = sm->first_worker_index;
3743   u32 hash;
3744
3745   clib_bihash_kv_16_8_t kv16, value16;
3746   snat_main_per_thread_data_t *tsm;
3747   udp_header_t *udp;
3748
3749   if (PREDICT_FALSE (is_output))
3750     {
3751       u32 fib_index = sm->outside_fib_index;
3752       nat_outside_fib_t *outside_fib;
3753       fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3754       fib_prefix_t pfx = {
3755         .fp_proto = FIB_PROTOCOL_IP4,
3756         .fp_len = 32,
3757         .fp_addr = {
3758                     .ip4.as_u32 = ip->dst_address.as_u32,
3759                     }
3760         ,
3761       };
3762
3763       udp = ip4_next_header (ip);
3764
3765       switch (vec_len (sm->outside_fibs))
3766         {
3767         case 0:
3768           fib_index = sm->outside_fib_index;
3769           break;
3770         case 1:
3771           fib_index = sm->outside_fibs[0].fib_index;
3772           break;
3773         default:
3774             /* *INDENT-OFF* */
3775             vec_foreach (outside_fib, sm->outside_fibs)
3776               {
3777                 fei = fib_table_lookup (outside_fib->fib_index, &pfx);
3778                 if (FIB_NODE_INDEX_INVALID != fei)
3779                   {
3780                     if (fib_entry_get_resolving_interface (fei) != ~0)
3781                       {
3782                         fib_index = outside_fib->fib_index;
3783                         break;
3784                       }
3785                   }
3786               }
3787             /* *INDENT-ON* */
3788           break;
3789         }
3790
3791       init_ed_k (&kv16, ip->src_address, udp->src_port, ip->dst_address,
3792                  udp->dst_port, fib_index, ip->protocol);
3793
3794       if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3795                                                   &kv16, &value16)))
3796         {
3797           tsm =
3798             vec_elt_at_index (sm->per_thread_data,
3799                               ed_value_get_thread_index (&value16));
3800           next_worker_index += tsm->thread_index;
3801
3802           nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE (session)",
3803                                   next_worker_index, fib_index,
3804                                   clib_net_to_host_u32 (ip->
3805                                                         src_address.as_u32),
3806                                   clib_net_to_host_u32 (ip->
3807                                                         dst_address.as_u32));
3808
3809           return next_worker_index;
3810         }
3811     }
3812
3813   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3814     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3815
3816   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
3817     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
3818   else
3819     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
3820
3821   if (PREDICT_TRUE (!is_output))
3822     {
3823       nat_elog_debug_handoff ("HANDOFF IN2OUT",
3824                               next_worker_index, rx_fib_index,
3825                               clib_net_to_host_u32 (ip->src_address.as_u32),
3826                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3827     }
3828   else
3829     {
3830       nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE",
3831                               next_worker_index, rx_fib_index,
3832                               clib_net_to_host_u32 (ip->src_address.as_u32),
3833                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3834     }
3835
3836   return next_worker_index;
3837 }
3838
3839 static u32
3840 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
3841                                u32 rx_fib_index, u8 is_output)
3842 {
3843   snat_main_t *sm = &snat_main;
3844   clib_bihash_kv_8_8_t kv, value;
3845   clib_bihash_kv_16_8_t kv16, value16;
3846   snat_main_per_thread_data_t *tsm;
3847
3848   u32 proto, next_worker_index = 0;
3849   udp_header_t *udp;
3850   u16 port;
3851   snat_static_mapping_t *m;
3852   u32 hash;
3853
3854   proto = ip_proto_to_nat_proto (ip->protocol);
3855
3856   if (PREDICT_TRUE (proto == NAT_PROTOCOL_UDP || proto == NAT_PROTOCOL_TCP))
3857     {
3858       udp = ip4_next_header (ip);
3859
3860       init_ed_k (&kv16, ip->dst_address, udp->dst_port, ip->src_address,
3861                  udp->src_port, rx_fib_index, ip->protocol);
3862
3863       if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3864                                                   &kv16, &value16)))
3865         {
3866           tsm =
3867             vec_elt_at_index (sm->per_thread_data,
3868                               ed_value_get_thread_index (&value16));
3869           vnet_buffer2 (b)->nat.ed_out2in_nat_session_index =
3870             ed_value_get_session_index (&value16);
3871           next_worker_index = sm->first_worker_index + tsm->thread_index;
3872           nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
3873                                   next_worker_index, rx_fib_index,
3874                                   clib_net_to_host_u32 (ip->
3875                                                         src_address.as_u32),
3876                                   clib_net_to_host_u32 (ip->
3877                                                         dst_address.as_u32));
3878           return next_worker_index;
3879         }
3880     }
3881   else if (proto == NAT_PROTOCOL_ICMP)
3882     {
3883       if (!get_icmp_o2i_ed_key (b, ip, rx_fib_index, ~0, ~0, 0, 0, 0, &kv16))
3884         {
3885           if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3886                                                       &kv16, &value16)))
3887             {
3888               tsm =
3889                 vec_elt_at_index (sm->per_thread_data,
3890                                   ed_value_get_thread_index (&value16));
3891               next_worker_index = sm->first_worker_index + tsm->thread_index;
3892               nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
3893                                       next_worker_index, rx_fib_index,
3894                                       clib_net_to_host_u32 (ip->
3895                                                             src_address.as_u32),
3896                                       clib_net_to_host_u32 (ip->
3897                                                             dst_address.as_u32));
3898               return next_worker_index;
3899             }
3900         }
3901     }
3902
3903   /* first try static mappings without port */
3904   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3905     {
3906       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
3907       if (!clib_bihash_search_8_8
3908           (&sm->static_mapping_by_external, &kv, &value))
3909         {
3910           m = pool_elt_at_index (sm->static_mappings, value.value);
3911           next_worker_index = m->workers[0];
3912           goto done;
3913         }
3914     }
3915
3916   /* unknown protocol */
3917   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
3918     {
3919       /* use current thread */
3920       next_worker_index = vlib_get_thread_index ();
3921       goto done;
3922     }
3923
3924   udp = ip4_next_header (ip);
3925   port = udp->dst_port;
3926
3927   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
3928     {
3929       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3930       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3931       if (!icmp_type_is_error_message
3932           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3933         port = vnet_buffer (b)->ip.reass.l4_src_port;
3934       else
3935         {
3936           /* if error message, then it's not fragmented and we can access it */
3937           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3938           proto = ip_proto_to_nat_proto (inner_ip->protocol);
3939           void *l4_header = ip4_next_header (inner_ip);
3940           switch (proto)
3941             {
3942             case NAT_PROTOCOL_ICMP:
3943               icmp = (icmp46_header_t *) l4_header;
3944               echo = (icmp_echo_header_t *) (icmp + 1);
3945               port = echo->identifier;
3946               break;
3947             case NAT_PROTOCOL_UDP:
3948             case NAT_PROTOCOL_TCP:
3949               port = ((tcp_udp_header_t *) l4_header)->src_port;
3950               break;
3951             default:
3952               next_worker_index = vlib_get_thread_index ();
3953               goto done;
3954             }
3955         }
3956     }
3957
3958   /* try static mappings with port */
3959   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3960     {
3961       init_nat_k (&kv, ip->dst_address, port, 0, proto);
3962       if (!clib_bihash_search_8_8
3963           (&sm->static_mapping_by_external, &kv, &value))
3964         {
3965           m = pool_elt_at_index (sm->static_mappings, value.value);
3966           if (!is_lb_static_mapping (m))
3967             {
3968               next_worker_index = m->workers[0];
3969               goto done;
3970             }
3971
3972           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3973             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3974
3975           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
3976             next_worker_index =
3977               m->workers[hash & (_vec_len (m->workers) - 1)];
3978           else
3979             next_worker_index = m->workers[hash % _vec_len (m->workers)];
3980           goto done;
3981         }
3982     }
3983
3984   /* worker by outside port */
3985   next_worker_index = sm->first_worker_index;
3986   next_worker_index +=
3987     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3988
3989 done:
3990   nat_elog_debug_handoff ("HANDOFF OUT2IN", next_worker_index, rx_fib_index,
3991                           clib_net_to_host_u32 (ip->src_address.as_u32),
3992                           clib_net_to_host_u32 (ip->dst_address.as_u32));
3993   return next_worker_index;
3994 }
3995
3996 void
3997 nat_ha_sadd_cb (ip4_address_t * in_addr, u16 in_port,
3998                 ip4_address_t * out_addr, u16 out_port,
3999                 ip4_address_t * eh_addr, u16 eh_port,
4000                 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
4001                 u32 fib_index, u16 flags, u32 thread_index)
4002 {
4003   snat_main_t *sm = &snat_main;
4004   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
4005   snat_user_t *u;
4006   snat_session_t *s;
4007   clib_bihash_kv_8_8_t kv;
4008   vlib_main_t *vm = vlib_get_main ();
4009   f64 now = vlib_time_now (vm);
4010   nat_outside_fib_t *outside_fib;
4011   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
4012   fib_prefix_t pfx = {
4013     .fp_proto = FIB_PROTOCOL_IP4,
4014     .fp_len = 32,
4015     .fp_addr = {
4016                 .ip4.as_u32 = eh_addr->as_u32,
4017                 },
4018   };
4019
4020   if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
4021     {
4022       if (nat_set_outside_address_and_port
4023           (sm->addresses, thread_index, *out_addr, out_port, proto))
4024         return;
4025     }
4026
4027   u = nat_user_get_or_create (sm, in_addr, fib_index, thread_index);
4028   if (!u)
4029     return;
4030
4031   s = nat_session_alloc_or_recycle (sm, u, thread_index, now);
4032   if (!s)
4033     return;
4034
4035   if (sm->endpoint_dependent)
4036     {
4037       nat_ed_lru_insert (tsm, s, now, nat_proto_to_ip_proto (proto));
4038     }
4039
4040   s->out2in.addr.as_u32 = out_addr->as_u32;
4041   s->out2in.port = out_port;
4042   s->nat_proto = proto;
4043   s->last_heard = now;
4044   s->flags = flags;
4045   s->ext_host_addr.as_u32 = eh_addr->as_u32;
4046   s->ext_host_port = eh_port;
4047   user_session_increment (sm, u, snat_is_session_static (s));
4048   switch (vec_len (sm->outside_fibs))
4049     {
4050     case 0:
4051       s->out2in.fib_index = sm->outside_fib_index;
4052       break;
4053     case 1:
4054       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
4055       break;
4056     default:
4057       /* *INDENT-OFF* */
4058       vec_foreach (outside_fib, sm->outside_fibs)
4059         {
4060           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
4061           if (FIB_NODE_INDEX_INVALID != fei)
4062             {
4063               if (fib_entry_get_resolving_interface (fei) != ~0)
4064                 {
4065                   s->out2in.fib_index = outside_fib->fib_index;
4066                   break;
4067                 }
4068             }
4069         }
4070       /* *INDENT-ON* */
4071       break;
4072     }
4073   init_nat_o2i_kv (&kv, s, s - tsm->sessions);
4074   if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 1))
4075     nat_elog_warn ("out2in key add failed");
4076
4077   s->in2out.addr.as_u32 = in_addr->as_u32;
4078   s->in2out.port = in_port;
4079   s->in2out.fib_index = fib_index;
4080   init_nat_i2o_kv (&kv, s, s - tsm->sessions);
4081   if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 1))
4082     nat_elog_warn ("in2out key add failed");
4083 }
4084
4085 void
4086 nat_ha_sdel_cb (ip4_address_t * out_addr, u16 out_port,
4087                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
4088                 u32 ti)
4089 {
4090   snat_main_t *sm = &snat_main;
4091   clib_bihash_kv_8_8_t kv, value;
4092   u32 thread_index;
4093   snat_session_t *s;
4094   snat_main_per_thread_data_t *tsm;
4095
4096   if (sm->num_workers > 1)
4097     thread_index =
4098       sm->first_worker_index +
4099       (sm->workers[(clib_net_to_host_u16 (out_port) -
4100                     1024) / sm->port_per_thread]);
4101   else
4102     thread_index = sm->num_workers;
4103   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
4104
4105   init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
4106   if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
4107     return;
4108
4109   s = pool_elt_at_index (tsm->sessions, value.value);
4110   nat_free_session_data (sm, s, thread_index, 1);
4111   nat44_delete_session (sm, s, thread_index);
4112 }
4113
4114 void
4115 nat_ha_sref_cb (ip4_address_t * out_addr, u16 out_port,
4116                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
4117                 u32 total_pkts, u64 total_bytes, u32 thread_index)
4118 {
4119   snat_main_t *sm = &snat_main;
4120   clib_bihash_kv_8_8_t kv, value;
4121   snat_session_t *s;
4122   snat_main_per_thread_data_t *tsm;
4123
4124   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
4125
4126   init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
4127   if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
4128     return;
4129
4130   s = pool_elt_at_index (tsm->sessions, value.value);
4131   s->total_pkts = total_pkts;
4132   s->total_bytes = total_bytes;
4133 }
4134
4135 void
4136 nat_ha_sadd_ed_cb (ip4_address_t * in_addr, u16 in_port,
4137                    ip4_address_t * out_addr, u16 out_port,
4138                    ip4_address_t * eh_addr, u16 eh_port,
4139                    ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
4140                    u32 fib_index, u16 flags, u32 thread_index)
4141 {
4142   snat_main_t *sm = &snat_main;
4143   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
4144   snat_session_t *s;
4145   clib_bihash_kv_16_8_t kv;
4146   vlib_main_t *vm = vlib_get_main ();
4147   f64 now = vlib_time_now (vm);
4148   nat_outside_fib_t *outside_fib;
4149   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
4150   fib_prefix_t pfx = {
4151     .fp_proto = FIB_PROTOCOL_IP4,
4152     .fp_len = 32,
4153     .fp_addr = {
4154                 .ip4.as_u32 = eh_addr->as_u32,
4155                 },
4156   };
4157
4158
4159   if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
4160     {
4161       if (nat_set_outside_address_and_port
4162           (sm->addresses, thread_index, *out_addr, out_port, proto))
4163         return;
4164     }
4165
4166   if (flags & SNAT_SESSION_FLAG_TWICE_NAT)
4167     {
4168       if (nat_set_outside_address_and_port
4169           (sm->addresses, thread_index, *ehn_addr, ehn_port, proto))
4170         return;
4171     }
4172
4173   s = nat_ed_session_alloc (sm, thread_index, now, proto);
4174   if (!s)
4175     return;
4176
4177   s->last_heard = now;
4178   s->flags = flags;
4179   s->ext_host_nat_addr.as_u32 = s->ext_host_addr.as_u32 = eh_addr->as_u32;
4180   s->ext_host_nat_port = s->ext_host_port = eh_port;
4181   if (is_twice_nat_session (s))
4182     {
4183       s->ext_host_nat_addr.as_u32 = ehn_addr->as_u32;
4184       s->ext_host_nat_port = ehn_port;
4185     }
4186   switch (vec_len (sm->outside_fibs))
4187     {
4188     case 0:
4189       s->out2in.fib_index = sm->outside_fib_index;
4190       break;
4191     case 1:
4192       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
4193       break;
4194     default:
4195       /* *INDENT-OFF* */
4196       vec_foreach (outside_fib, sm->outside_fibs)
4197         {
4198           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
4199           if (FIB_NODE_INDEX_INVALID != fei)
4200             {
4201               if (fib_entry_get_resolving_interface (fei) != ~0)
4202                 {
4203                   s->out2in.fib_index = outside_fib->fib_index;
4204                   break;
4205                 }
4206             }
4207         }
4208       /* *INDENT-ON* */
4209       break;
4210     }
4211   s->nat_proto = proto;
4212   s->out2in.addr.as_u32 = out_addr->as_u32;
4213   s->out2in.port = out_port;
4214
4215   s->in2out.addr.as_u32 = in_addr->as_u32;
4216   s->in2out.port = in_port;
4217   s->in2out.fib_index = fib_index;
4218
4219   init_ed_kv (&kv, *in_addr, in_port, s->ext_host_nat_addr,
4220               s->ext_host_nat_port, fib_index, nat_proto_to_ip_proto (proto),
4221               thread_index, s - tsm->sessions);
4222   if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
4223     nat_elog_warn ("in2out key add failed");
4224
4225   init_ed_kv (&kv, *out_addr, out_port, *eh_addr, eh_port,
4226               s->out2in.fib_index, nat_proto_to_ip_proto (proto),
4227               thread_index, s - tsm->sessions);
4228   if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &kv, 1))
4229     nat_elog_warn ("out2in key add failed");
4230 }
4231
4232 void
4233 nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
4234                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
4235                    u32 fib_index, u32 ti)
4236 {
4237   snat_main_t *sm = &snat_main;
4238   clib_bihash_kv_16_8_t kv, value;
4239   u32 thread_index;
4240   snat_session_t *s;
4241   snat_main_per_thread_data_t *tsm;
4242
4243   if (sm->num_workers > 1)
4244     thread_index =
4245       sm->first_worker_index +
4246       (sm->workers[(clib_net_to_host_u16 (out_port) -
4247                     1024) / sm->port_per_thread]);
4248   else
4249     thread_index = sm->num_workers;
4250   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
4251
4252   init_ed_k (&kv, *out_addr, out_port, *eh_addr, eh_port, fib_index, proto);
4253   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
4254     return;
4255
4256   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
4257   nat_free_session_data (sm, s, thread_index, 1);
4258   nat44_delete_session (sm, s, thread_index);
4259 }
4260
4261 void
4262 nat_ha_sref_ed_cb (ip4_address_t * out_addr, u16 out_port,
4263                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
4264                    u32 fib_index, u32 total_pkts, u64 total_bytes,
4265                    u32 thread_index)
4266 {
4267   snat_main_t *sm = &snat_main;
4268   clib_bihash_kv_16_8_t kv, value;
4269   snat_session_t *s;
4270   snat_main_per_thread_data_t *tsm;
4271
4272   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
4273
4274   init_ed_k (&kv, *out_addr, out_port, *eh_addr, eh_port, fib_index, proto);
4275   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
4276     return;
4277
4278   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
4279   s->total_pkts = total_pkts;
4280   s->total_bytes = total_bytes;
4281 }
4282
4283 static u32
4284 nat_calc_bihash_buckets (u32 n_elts)
4285 {
4286   return 1 << (max_log2 (n_elts >> 1) + 1);
4287 }
4288
4289 static u32
4290 nat_calc_bihash_memory (u32 n_buckets, uword kv_size)
4291 {
4292   return n_buckets * (8 + kv_size * 4);
4293 }
4294
4295 u32
4296 nat44_get_max_session_limit ()
4297 {
4298   snat_main_t *sm = &snat_main;
4299   u32 max_limit = 0, len = 0;
4300
4301   for (; len < vec_len (sm->max_translations_per_fib); len++)
4302     {
4303       if (max_limit < sm->max_translations_per_fib[len])
4304         max_limit = sm->max_translations_per_fib[len];
4305     }
4306   return max_limit;
4307 }
4308
4309 int
4310 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
4311 {
4312   snat_main_t *sm = &snat_main;
4313   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4314   u32 len = vec_len (sm->max_translations_per_fib);
4315
4316   if (len <= fib_index)
4317     {
4318       vec_validate (sm->max_translations_per_fib, fib_index + 1);
4319
4320       for (; len < vec_len (sm->max_translations_per_fib); len++)
4321         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
4322     }
4323
4324   sm->max_translations_per_fib[fib_index] = session_limit;
4325   return 0;
4326 }
4327
4328 int
4329 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
4330 {
4331   snat_main_t *sm = &snat_main;
4332
4333   if (nat44_set_session_limit (session_limit, vrf_id))
4334     return 1;
4335   sm->max_translations_per_thread = nat44_get_max_session_limit ();
4336
4337   sm->translation_buckets =
4338     nat_calc_bihash_buckets (sm->max_translations_per_thread);
4339
4340   if (!sm->translation_memory_size_set)
4341     {
4342       sm->translation_memory_size =
4343         nat_calc_bihash_memory (sm->translation_buckets,
4344                                 sizeof (clib_bihash_16_8_t));
4345     }
4346
4347   nat44_sessions_clear ();
4348   return 0;
4349 }
4350
4351 void
4352 nat44_db_init (snat_main_per_thread_data_t * tsm)
4353 {
4354   snat_main_t *sm = &snat_main;
4355
4356   pool_alloc (tsm->sessions, sm->max_translations_per_thread);
4357   pool_alloc (tsm->lru_pool, sm->max_translations_per_thread);
4358
4359   dlist_elt_t *head;
4360
4361   pool_get (tsm->lru_pool, head);
4362   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
4363   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
4364
4365   pool_get (tsm->lru_pool, head);
4366   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
4367   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
4368
4369   pool_get (tsm->lru_pool, head);
4370   tsm->udp_lru_head_index = head - tsm->lru_pool;
4371   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
4372
4373   pool_get (tsm->lru_pool, head);
4374   tsm->icmp_lru_head_index = head - tsm->lru_pool;
4375   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
4376
4377   pool_get (tsm->lru_pool, head);
4378   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
4379   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
4380
4381   if (sm->endpoint_dependent)
4382     {
4383       clib_bihash_init_16_8 (&tsm->in2out_ed, "in2out-ed",
4384                              sm->translation_buckets,
4385                              sm->translation_memory_size);
4386       clib_bihash_set_kvp_format_fn_16_8 (&tsm->in2out_ed,
4387                                           format_ed_session_kvp);
4388
4389     }
4390   else
4391     {
4392       clib_bihash_init_8_8 (&tsm->in2out, "in2out",
4393                             sm->translation_buckets,
4394                             sm->translation_memory_size);
4395       clib_bihash_set_kvp_format_fn_8_8 (&tsm->in2out, format_session_kvp);
4396       clib_bihash_init_8_8 (&tsm->out2in, "out2in",
4397                             sm->translation_buckets,
4398                             sm->translation_memory_size);
4399       clib_bihash_set_kvp_format_fn_8_8 (&tsm->out2in, format_session_kvp);
4400     }
4401
4402   // TODO: ED nat is not using these
4403   // before removal large refactor required
4404   pool_alloc (tsm->list_pool, sm->max_translations_per_thread);
4405   clib_bihash_init_8_8 (&tsm->user_hash, "users", sm->user_buckets,
4406                         sm->user_memory_size);
4407   clib_bihash_set_kvp_format_fn_8_8 (&tsm->user_hash, format_user_kvp);
4408 }
4409
4410 void
4411 nat44_db_free (snat_main_per_thread_data_t * tsm)
4412 {
4413   snat_main_t *sm = &snat_main;
4414
4415   pool_free (tsm->sessions);
4416   pool_free (tsm->lru_pool);
4417
4418   if (sm->endpoint_dependent)
4419     {
4420       clib_bihash_free_16_8 (&tsm->in2out_ed);
4421       vec_free (tsm->per_vrf_sessions_vec);
4422     }
4423   else
4424     {
4425       clib_bihash_free_8_8 (&tsm->in2out);
4426       clib_bihash_free_8_8 (&tsm->out2in);
4427     }
4428
4429   // TODO: resolve static mappings (put only to !ED)
4430   pool_free (tsm->users);
4431   pool_free (tsm->list_pool);
4432   clib_bihash_free_8_8 (&tsm->user_hash);
4433 }
4434
4435 void
4436 nat44_sessions_clear ()
4437 {
4438   snat_main_t *sm = &snat_main;
4439   snat_main_per_thread_data_t *tsm;
4440
4441   if (sm->endpoint_dependent)
4442     {
4443       clib_bihash_free_16_8 (&sm->out2in_ed);
4444       clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
4445                              clib_max (1, sm->num_workers) *
4446                              sm->translation_buckets,
4447                              clib_max (1, sm->num_workers) *
4448                              sm->translation_memory_size);
4449       clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed,
4450                                           format_ed_session_kvp);
4451     }
4452
4453   /* *INDENT-OFF* */
4454   vec_foreach (tsm, sm->per_thread_data)
4455     {
4456       u32 ti;
4457
4458       nat44_db_free (tsm);
4459       nat44_db_init (tsm);
4460
4461       ti = tsm->snat_thread_index;
4462       vlib_set_simple_counter (&sm->total_users, ti, 0, 0);
4463       vlib_set_simple_counter (&sm->total_sessions, ti, 0, 0);
4464     }
4465   /* *INDENT-ON* */
4466 }
4467
4468 static void
4469 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
4470                                  uword opaque,
4471                                  u32 sw_if_index,
4472                                  ip4_address_t * address,
4473                                  u32 address_length,
4474                                  u32 if_address_index, u32 is_delete)
4475 {
4476   snat_main_t *sm = &snat_main;
4477   snat_static_map_resolve_t *rp;
4478   snat_static_mapping_t *m;
4479   clib_bihash_kv_8_8_t kv, value;
4480   int i, rv;
4481   ip4_address_t l_addr;
4482
4483   if (!sm->enabled)
4484     return;
4485
4486   for (i = 0; i < vec_len (sm->to_resolve); i++)
4487     {
4488       rp = sm->to_resolve + i;
4489       if (rp->addr_only == 0)
4490         continue;
4491       if (rp->sw_if_index == sw_if_index)
4492         goto match;
4493     }
4494
4495   return;
4496
4497 match:
4498   init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
4499               sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
4500   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
4501     m = 0;
4502   else
4503     m = pool_elt_at_index (sm->static_mappings, value.value);
4504
4505   if (!is_delete)
4506     {
4507       /* Don't trip over lease renewal, static config */
4508       if (m)
4509         return;
4510     }
4511   else
4512     {
4513       if (!m)
4514         return;
4515     }
4516
4517   /* Indetity mapping? */
4518   if (rp->l_addr.as_u32 == 0)
4519     l_addr.as_u32 = address[0].as_u32;
4520   else
4521     l_addr.as_u32 = rp->l_addr.as_u32;
4522   /* Add the static mapping */
4523   rv = snat_add_static_mapping (l_addr,
4524                                 address[0],
4525                                 rp->l_port,
4526                                 rp->e_port,
4527                                 rp->vrf_id,
4528                                 rp->addr_only, ~0 /* sw_if_index */ ,
4529                                 rp->proto, !is_delete, rp->twice_nat,
4530                                 rp->out2in_only, rp->tag, rp->identity_nat,
4531                                 rp->pool_addr, rp->exact);
4532   if (rv)
4533     nat_elog_notice_X1 ("snat_add_static_mapping returned %d", "i4", rv);
4534 }
4535
4536 static void
4537 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
4538                                        uword opaque,
4539                                        u32 sw_if_index,
4540                                        ip4_address_t * address,
4541                                        u32 address_length,
4542                                        u32 if_address_index, u32 is_delete)
4543 {
4544   snat_main_t *sm = &snat_main;
4545   snat_static_map_resolve_t *rp;
4546   ip4_address_t l_addr;
4547   int i, j;
4548   int rv;
4549   u8 twice_nat = 0;
4550   snat_address_t *addresses = sm->addresses;
4551
4552   if (!sm->enabled)
4553     return;
4554
4555   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
4556     {
4557       if (sw_if_index == sm->auto_add_sw_if_indices[i])
4558         goto match;
4559     }
4560
4561   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
4562     {
4563       twice_nat = 1;
4564       addresses = sm->twice_nat_addresses;
4565       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
4566         goto match;
4567     }
4568
4569   return;
4570
4571 match:
4572   if (!is_delete)
4573     {
4574       /* Don't trip over lease renewal, static config */
4575       for (j = 0; j < vec_len (addresses); j++)
4576         if (addresses[j].addr.as_u32 == address->as_u32)
4577           return;
4578
4579       (void) snat_add_address (sm, address, ~0, twice_nat);
4580       /* Scan static map resolution vector */
4581       for (j = 0; j < vec_len (sm->to_resolve); j++)
4582         {
4583           rp = sm->to_resolve + j;
4584           if (rp->addr_only)
4585             continue;
4586           /* On this interface? */
4587           if (rp->sw_if_index == sw_if_index)
4588             {
4589               /* Indetity mapping? */
4590               if (rp->l_addr.as_u32 == 0)
4591                 l_addr.as_u32 = address[0].as_u32;
4592               else
4593                 l_addr.as_u32 = rp->l_addr.as_u32;
4594               /* Add the static mapping */
4595               rv = snat_add_static_mapping (l_addr,
4596                                             address[0],
4597                                             rp->l_port,
4598                                             rp->e_port,
4599                                             rp->vrf_id,
4600                                             rp->addr_only,
4601                                             ~0 /* sw_if_index */ ,
4602                                             rp->proto,
4603                                             rp->is_add, rp->twice_nat,
4604                                             rp->out2in_only, rp->tag,
4605                                             rp->identity_nat,
4606                                             rp->pool_addr, rp->exact);
4607               if (rv)
4608                 nat_elog_notice_X1 ("snat_add_static_mapping returned %d",
4609                                     "i4", rv);
4610             }
4611         }
4612       return;
4613     }
4614   else
4615     {
4616       (void) snat_del_address (sm, address[0], 1, twice_nat);
4617       return;
4618     }
4619 }
4620
4621
4622 int
4623 snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
4624                             u8 twice_nat)
4625 {
4626   ip4_main_t *ip4_main = sm->ip4_main;
4627   ip4_address_t *first_int_addr;
4628   snat_static_map_resolve_t *rp;
4629   u32 *indices_to_delete = 0;
4630   int i, j;
4631   u32 *auto_add_sw_if_indices =
4632     twice_nat ? sm->
4633     auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
4634
4635   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0        /* just want the address */
4636     );
4637
4638   for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
4639     {
4640       if (auto_add_sw_if_indices[i] == sw_if_index)
4641         {
4642           if (is_del)
4643             {
4644               /* if have address remove it */
4645               if (first_int_addr)
4646                 (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
4647               else
4648                 {
4649                   for (j = 0; j < vec_len (sm->to_resolve); j++)
4650                     {
4651                       rp = sm->to_resolve + j;
4652                       if (rp->sw_if_index == sw_if_index)
4653                         vec_add1 (indices_to_delete, j);
4654                     }
4655                   if (vec_len (indices_to_delete))
4656                     {
4657                       for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
4658                         vec_del1 (sm->to_resolve, j);
4659                       vec_free (indices_to_delete);
4660                     }
4661                 }
4662               if (twice_nat)
4663                 vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
4664               else
4665                 vec_del1 (sm->auto_add_sw_if_indices, i);
4666             }
4667           else
4668             return VNET_API_ERROR_VALUE_EXIST;
4669
4670           return 0;
4671         }
4672     }
4673
4674   if (is_del)
4675     return VNET_API_ERROR_NO_SUCH_ENTRY;
4676
4677   /* add to the auto-address list */
4678   if (twice_nat)
4679     vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
4680   else
4681     vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
4682
4683   /* If the address is already bound - or static - add it now */
4684   if (first_int_addr)
4685     (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
4686
4687   return 0;
4688 }
4689
4690 int
4691 nat44_del_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
4692                    nat_protocol_t proto, u32 vrf_id, int is_in)
4693 {
4694   snat_main_per_thread_data_t *tsm;
4695   clib_bihash_kv_8_8_t kv, value;
4696   ip4_header_t ip;
4697   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4698   snat_session_t *s;
4699   clib_bihash_8_8_t *t;
4700
4701   if (sm->endpoint_dependent)
4702     return VNET_API_ERROR_UNSUPPORTED;
4703
4704   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
4705   if (sm->num_workers > 1)
4706     tsm =
4707       vec_elt_at_index (sm->per_thread_data,
4708                         sm->worker_in2out_cb (&ip, fib_index, 0));
4709   else
4710     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
4711
4712   init_nat_k (&kv, *addr, port, fib_index, proto);
4713   t = is_in ? &tsm->in2out : &tsm->out2in;
4714   if (!clib_bihash_search_8_8 (t, &kv, &value))
4715     {
4716       if (pool_is_free_index (tsm->sessions, value.value))
4717         return VNET_API_ERROR_UNSPECIFIED;
4718
4719       s = pool_elt_at_index (tsm->sessions, value.value);
4720       nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
4721       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
4722       return 0;
4723     }
4724
4725   return VNET_API_ERROR_NO_SUCH_ENTRY;
4726 }
4727
4728 int
4729 nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
4730                       ip4_address_t * eh_addr, u16 eh_port, u8 proto,
4731                       u32 vrf_id, int is_in)
4732 {
4733   ip4_header_t ip;
4734   clib_bihash_16_8_t *t;
4735   clib_bihash_kv_16_8_t kv, value;
4736   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4737   snat_session_t *s;
4738   snat_main_per_thread_data_t *tsm;
4739
4740   if (!sm->endpoint_dependent)
4741     return VNET_API_ERROR_FEATURE_DISABLED;
4742
4743   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
4744   if (sm->num_workers > 1)
4745     tsm =
4746       vec_elt_at_index (sm->per_thread_data,
4747                         sm->worker_in2out_cb (&ip, fib_index, 0));
4748   else
4749     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
4750
4751   t = is_in ? &tsm->in2out_ed : &sm->out2in_ed;
4752   init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
4753   if (clib_bihash_search_16_8 (t, &kv, &value))
4754     {
4755       return VNET_API_ERROR_NO_SUCH_ENTRY;
4756     }
4757
4758   if (pool_is_free_index (tsm->sessions, value.value))
4759     return VNET_API_ERROR_UNSPECIFIED;
4760   s = pool_elt_at_index (tsm->sessions, value.value);
4761   nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
4762   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
4763   return 0;
4764 }
4765
4766 void
4767 nat_set_alloc_addr_and_port_mape (u16 psid, u16 psid_offset, u16 psid_length)
4768 {
4769   snat_main_t *sm = &snat_main;
4770
4771   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_MAPE;
4772   sm->alloc_addr_and_port = nat_alloc_addr_and_port_mape;
4773   sm->psid = psid;
4774   sm->psid_offset = psid_offset;
4775   sm->psid_length = psid_length;
4776 }
4777
4778 void
4779 nat_set_alloc_addr_and_port_range (u16 start_port, u16 end_port)
4780 {
4781   snat_main_t *sm = &snat_main;
4782
4783   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_RANGE;
4784   sm->alloc_addr_and_port = nat_alloc_addr_and_port_range;
4785   sm->start_port = start_port;
4786   sm->end_port = end_port;
4787 }
4788
4789 void
4790 nat_set_alloc_addr_and_port_default (void)
4791 {
4792   snat_main_t *sm = &snat_main;
4793
4794   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
4795   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
4796 }
4797
4798 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
4799                                  vlib_node_runtime_t * node,
4800                                  vlib_frame_t * frame)
4801 {
4802   return 0;
4803 }
4804
4805 /* *INDENT-OFF* */
4806 VLIB_REGISTER_NODE (nat_default_node) = {
4807   .name = "nat-default",
4808   .vector_size = sizeof (u32),
4809   .format_trace = 0,
4810   .type = VLIB_NODE_TYPE_INTERNAL,
4811   .n_errors = 0,
4812   .n_next_nodes = NAT_N_NEXT,
4813   .next_nodes = {
4814     [NAT_NEXT_DROP] = "error-drop",
4815     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4816     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
4817     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
4818     [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output",
4819     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
4820     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
4821     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
4822     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
4823     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
4824   },
4825 };
4826 /* *INDENT-ON* */
4827
4828 /*
4829  * fd.io coding-style-patch-verification: ON
4830  *
4831  * Local Variables:
4832  * eval: (c-set-style "gnu")
4833  * End:
4834  */