nat: bihash: fix buckets calc and remove mem param
[vpp.git] / src / plugins / nat / nat.c
1 /*
2  * snat.c - simple nat plugin
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/vnet.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/ip/ip4.h>
21 #include <vnet/plugin/plugin.h>
22 #include <nat/nat.h>
23 #include <nat/nat_dpo.h>
24 #include <nat/lib/ipfix_logging.h>
25 #include <nat/nat_inlines.h>
26 #include <nat/nat44/inlines.h>
27 #include <nat/nat_affinity.h>
28 #include <nat/nat_syslog.h>
29 #include <nat/nat_ha.h>
30 #include <vnet/fib/fib_table.h>
31 #include <vnet/fib/ip4_fib.h>
32 #include <vnet/ip/reass/ip4_sv_reass.h>
33 #include <vppinfra/bihash_16_8.h>
34 #include <nat/nat44/ed_inlines.h>
35 #include <vnet/ip/ip_table.h>
36
37 #include <vpp/app/version.h>
38
39 snat_main_t snat_main;
40
41 fib_source_t nat_fib_src_hi;
42 fib_source_t nat_fib_src_low;
43
44 /* *INDENT-OFF* */
45 /* Hook up input features */
46 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
47   .arc_name = "ip4-unicast",
48   .node_name = "nat-pre-in2out",
49   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
50                                "ip4-sv-reassembly-feature"),
51 };
52 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
53   .arc_name = "ip4-unicast",
54   .node_name = "nat-pre-out2in",
55   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
56                                "ip4-dhcp-client-detect",
57                                "ip4-sv-reassembly-feature"),
58 };
59 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
60   .arc_name = "ip4-unicast",
61   .node_name = "nat44-in2out-worker-handoff",
62   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
63 };
64 VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
65   .arc_name = "ip4-unicast",
66   .node_name = "nat44-out2in-worker-handoff",
67   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
68                                "ip4-dhcp-client-detect"),
69 };
70 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
71   .arc_name = "ip4-unicast",
72   .node_name = "nat44-in2out",
73   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
74 };
75 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
76   .arc_name = "ip4-unicast",
77   .node_name = "nat44-out2in",
78   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
79                                "ip4-dhcp-client-detect"),
80 };
81 VNET_FEATURE_INIT (ip4_nat_classify, static) = {
82   .arc_name = "ip4-unicast",
83   .node_name = "nat44-classify",
84   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
85 };
86 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
87   .arc_name = "ip4-unicast",
88   .node_name = "nat44-ed-in2out",
89   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
90 };
91 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
92   .arc_name = "ip4-unicast",
93   .node_name = "nat44-ed-out2in",
94   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
95                                "ip4-dhcp-client-detect"),
96 };
97 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
98   .arc_name = "ip4-unicast",
99   .node_name = "nat44-ed-classify",
100   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
101 };
102 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
103   .arc_name = "ip4-unicast",
104   .node_name = "nat44-handoff-classify",
105   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
106 };
107 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
108   .arc_name = "ip4-unicast",
109   .node_name = "nat44-in2out-fast",
110   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
111 };
112 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
113   .arc_name = "ip4-unicast",
114   .node_name = "nat44-out2in-fast",
115   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
116                                "ip4-dhcp-client-detect"),
117 };
118 VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = {
119   .arc_name = "ip4-unicast",
120   .node_name = "nat44-hairpin-dst",
121   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
122 };
123 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_dst, static) = {
124   .arc_name = "ip4-unicast",
125   .node_name = "nat44-ed-hairpin-dst",
126   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
127 };
128
129 /* Hook up output features */
130 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
131   .arc_name = "ip4-output",
132   .node_name = "nat44-in2out-output",
133   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
134 };
135 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
136   .arc_name = "ip4-output",
137   .node_name = "nat44-in2out-output-worker-handoff",
138   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
139 };
140 VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = {
141   .arc_name = "ip4-output",
142   .node_name = "nat44-hairpin-src",
143   .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
144 };
145 VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
146   .arc_name = "ip4-output",
147   .node_name = "nat-pre-in2out-output",
148   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
149   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
150 };
151 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
152   .arc_name = "ip4-output",
153   .node_name = "nat44-ed-in2out-output",
154   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
155   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
156 };
157 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_src, static) = {
158   .arc_name = "ip4-output",
159   .node_name = "nat44-ed-hairpin-src",
160   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
161   .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
162 };
163
164 /* Hook up ip4-local features */
165 VNET_FEATURE_INIT (ip4_nat_hairpinning, static) =
166 {
167   .arc_name = "ip4-local",
168   .node_name = "nat44-hairpinning",
169   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
170 };
171 VNET_FEATURE_INIT (ip4_nat44_ed_hairpinning, static) =
172 {
173   .arc_name = "ip4-local",
174   .node_name = "nat44-ed-hairpinning",
175   .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
176 };
177
178
179 VLIB_PLUGIN_REGISTER () = {
180     .version = VPP_BUILD_VER,
181     .description = "Network Address Translation (NAT)",
182 };
183 /* *INDENT-ON* */
184
185 static u32
186 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
187                                u32 rx_fib_index, u8 is_output);
188
189 static u32
190 nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index,
191                                u8 is_output);
192
193 static u32
194 snat_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip0,
195                            u32 rx_fib_index0, u8 is_output);
196
197 static u32
198 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0,
199                            u8 is_output);
200
201 static u32 nat_calc_bihash_buckets (u32 n_elts);
202
203 u8 *format_static_mapping_kvp (u8 * s, va_list * args);
204
205 u8 *format_ed_session_kvp (u8 * s, va_list * args);
206
207 void
208 nat_ha_sadd_cb (ip4_address_t * in_addr, u16 in_port,
209                 ip4_address_t * out_addr, u16 out_port,
210                 ip4_address_t * eh_addr, u16 eh_port,
211                 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
212                 u32 fib_index, u16 flags, u32 thread_index);
213
214 void
215 nat_ha_sdel_cb (ip4_address_t * out_addr, u16 out_port,
216                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
217                 u32 ti);
218
219 void
220 nat_ha_sref_cb (ip4_address_t * out_addr, u16 out_port,
221                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
222                 u32 total_pkts, u64 total_bytes, u32 thread_index);
223
224 void
225 nat_ha_sadd_ed_cb (ip4_address_t * in_addr, u16 in_port,
226                    ip4_address_t * out_addr, u16 out_port,
227                    ip4_address_t * eh_addr, u16 eh_port,
228                    ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
229                    u32 fib_index, u16 flags, u32 thread_index);
230
231 void
232 nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
233                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
234                    u32 fib_index, u32 ti);
235
236 void
237 nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
238                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
239                    u32 fib_index, u32 ti);
240
241 void
242 nat_ha_sref_ed_cb (ip4_address_t * out_addr, u16 out_port,
243                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
244                    u32 fib_index, u32 total_pkts, u64 total_bytes,
245                    u32 thread_index);
246
247 void
248 nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
249                        u8 is_ha)
250 {
251   clib_bihash_kv_8_8_t kv;
252   u8 proto;
253   u16 r_port, l_port;
254   ip4_address_t *l_addr, *r_addr;
255   u32 fib_index = 0;
256   clib_bihash_kv_16_8_t ed_kv;
257   snat_main_per_thread_data_t *tsm =
258     vec_elt_at_index (sm->per_thread_data, thread_index);
259
260   if (is_ed_session (s))
261     {
262       per_vrf_sessions_unregister_session (s, thread_index);
263     }
264
265   if (is_fwd_bypass_session (s))
266     {
267       if (snat_is_unk_proto_session (s))
268         {
269           init_ed_k (&ed_kv, s->in2out.addr, 0, s->ext_host_addr, 0, 0,
270                      s->in2out.port);
271         }
272       else
273         {
274           l_port = s->in2out.port;
275           r_port = s->ext_host_port;
276           l_addr = &s->in2out.addr;
277           r_addr = &s->ext_host_addr;
278           proto = nat_proto_to_ip_proto (s->nat_proto);
279           fib_index = s->in2out.fib_index;
280           init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index,
281                      proto);
282         }
283       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
284         nat_elog_warn ("in2out_ed key del failed");
285       return;
286     }
287
288   /* session lookup tables */
289   if (is_ed_session (s))
290     {
291       if (is_affinity_sessions (s))
292         nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
293                              s->nat_proto, s->out2in.port);
294       l_addr = &s->out2in.addr;
295       r_addr = &s->ext_host_addr;
296       fib_index = s->out2in.fib_index;
297       if (snat_is_unk_proto_session (s))
298         {
299           proto = s->in2out.port;
300           r_port = 0;
301           l_port = 0;
302         }
303       else
304         {
305           proto = nat_proto_to_ip_proto (s->nat_proto);
306           l_port = s->out2in.port;
307           r_port = s->ext_host_port;
308         }
309       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
310       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0))
311         nat_elog_warn ("out2in_ed key del failed");
312       l_addr = &s->in2out.addr;
313       fib_index = s->in2out.fib_index;
314       if (!snat_is_unk_proto_session (s))
315         l_port = s->in2out.port;
316       if (is_twice_nat_session (s))
317         {
318           r_addr = &s->ext_host_nat_addr;
319           r_port = s->ext_host_nat_port;
320         }
321       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
322       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
323         nat_elog_warn ("in2out_ed key del failed");
324
325       if (!is_ha)
326         nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
327                                &s->in2out.addr, s->in2out.port,
328                                &s->ext_host_nat_addr, s->ext_host_nat_port,
329                                &s->out2in.addr, s->out2in.port,
330                                &s->ext_host_addr, s->ext_host_port,
331                                s->nat_proto, is_twice_nat_session (s));
332     }
333   else
334     {
335       init_nat_i2o_k (&kv, s);
336       if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0))
337         nat_elog_warn ("in2out key del failed");
338       init_nat_o2i_k (&kv, s);
339       if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0))
340         nat_elog_warn ("out2in key del failed");
341
342       if (!is_ha)
343         nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index,
344                                  &s->in2out.addr, s->in2out.port,
345                                  &s->out2in.addr, s->out2in.port,
346                                  s->nat_proto);
347     }
348
349   if (snat_is_unk_proto_session (s))
350     return;
351
352   if (!is_ha)
353     {
354       /* log NAT event */
355       nat_ipfix_logging_nat44_ses_delete (thread_index,
356                                           s->in2out.addr.as_u32,
357                                           s->out2in.addr.as_u32,
358                                           s->nat_proto,
359                                           s->in2out.port,
360                                           s->out2in.port,
361                                           s->in2out.fib_index);
362
363       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
364                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
365                    thread_index);
366     }
367
368   /* Twice NAT address and port for external host */
369   if (is_twice_nat_session (s))
370     {
371       snat_free_outside_address_and_port (sm->twice_nat_addresses,
372                                           thread_index,
373                                           &s->ext_host_nat_addr,
374                                           s->ext_host_nat_port, s->nat_proto);
375     }
376
377   if (snat_is_session_static (s))
378     return;
379
380   snat_free_outside_address_and_port (sm->addresses, thread_index,
381                                       &s->out2in.addr, s->out2in.port,
382                                       s->nat_proto);
383 }
384
385 void
386 nat44_free_session_data (snat_main_t * sm, snat_session_t * s,
387                          u32 thread_index, u8 is_ha)
388 {
389   u8 proto;
390   u16 r_port, l_port;
391   ip4_address_t *l_addr, *r_addr;
392   u32 fib_index;
393   clib_bihash_kv_16_8_t ed_kv;
394   snat_main_per_thread_data_t *tsm =
395     vec_elt_at_index (sm->per_thread_data, thread_index);
396
397   if (is_fwd_bypass_session (s))
398     {
399       if (snat_is_unk_proto_session (s))
400         {
401           proto = s->in2out.port;
402           r_port = 0;
403           l_port = 0;
404         }
405       else
406         {
407           proto = nat_proto_to_ip_proto (s->nat_proto);
408           l_port = s->in2out.port;
409           r_port = s->ext_host_port;
410         }
411
412       l_addr = &s->in2out.addr;
413       r_addr = &s->ext_host_addr;
414       fib_index = 0;
415       init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
416
417       if (PREDICT_FALSE
418           (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)))
419         nat_elog_warn ("in2out_ed key del failed");
420       return;
421     }
422
423   /* session lookup tables */
424   if (is_affinity_sessions (s))
425     nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
426                          s->nat_proto, s->out2in.port);
427   l_addr = &s->out2in.addr;
428   r_addr = &s->ext_host_addr;
429   fib_index = s->out2in.fib_index;
430   if (snat_is_unk_proto_session (s))
431     {
432       proto = s->in2out.port;
433       r_port = 0;
434       l_port = 0;
435     }
436   else
437     {
438       proto = nat_proto_to_ip_proto (s->nat_proto);
439       l_port = s->out2in.port;
440       r_port = s->ext_host_port;
441     }
442   init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
443
444   if (PREDICT_FALSE (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0)))
445     nat_elog_warn ("out2in_ed key del failed");
446
447   l_addr = &s->in2out.addr;
448   fib_index = s->in2out.fib_index;
449
450   if (!snat_is_unk_proto_session (s))
451     l_port = s->in2out.port;
452
453   if (is_twice_nat_session (s))
454     {
455       r_addr = &s->ext_host_nat_addr;
456       r_port = s->ext_host_nat_port;
457     }
458   init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
459
460   if (PREDICT_FALSE (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)))
461     nat_elog_warn ("in2out_ed key del failed");
462
463   if (!is_ha)
464     {
465       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
466                              &s->in2out.addr, s->in2out.port,
467                              &s->ext_host_nat_addr, s->ext_host_nat_port,
468                              &s->out2in.addr, s->out2in.port,
469                              &s->ext_host_addr, s->ext_host_port,
470                              s->nat_proto, is_twice_nat_session (s));
471     }
472
473   if (snat_is_unk_proto_session (s))
474     return;
475
476   if (!is_ha)
477     {
478       nat_ipfix_logging_nat44_ses_delete (thread_index,
479                                           s->in2out.addr.as_u32,
480                                           s->out2in.addr.as_u32,
481                                           s->nat_proto,
482                                           s->in2out.port,
483                                           s->out2in.port,
484                                           s->in2out.fib_index);
485       nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
486                    s->ext_host_port, s->nat_proto, s->out2in.fib_index,
487                    thread_index);
488     }
489
490   /* Twice NAT address and port for external host */
491   if (is_twice_nat_session (s))
492     {
493       snat_free_outside_address_and_port (sm->twice_nat_addresses,
494                                           thread_index,
495                                           &s->ext_host_nat_addr,
496                                           s->ext_host_nat_port, s->nat_proto);
497     }
498
499   if (snat_is_session_static (s))
500     return;
501
502   snat_free_outside_address_and_port (sm->addresses, thread_index,
503                                       &s->out2in.addr, s->out2in.port,
504                                       s->nat_proto);
505 }
506
507
508 snat_user_t *
509 nat_user_get_or_create (snat_main_t * sm, ip4_address_t * addr, u32 fib_index,
510                         u32 thread_index)
511 {
512   snat_user_t *u = 0;
513   snat_user_key_t user_key;
514   clib_bihash_kv_8_8_t kv, value;
515   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
516   dlist_elt_t *per_user_list_head_elt;
517
518   user_key.addr.as_u32 = addr->as_u32;
519   user_key.fib_index = fib_index;
520   kv.key = user_key.as_u64;
521
522   /* Ever heard of the "user" = src ip4 address before? */
523   if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
524     {
525       if (pool_elts (tsm->users) >= sm->max_users_per_thread)
526         {
527           vlib_increment_simple_counter (&sm->user_limit_reached,
528                                          thread_index, 0, 1);
529           nat_elog_warn ("maximum user limit reached");
530           return NULL;
531         }
532       /* no, make a new one */
533       pool_get (tsm->users, u);
534       clib_memset (u, 0, sizeof (*u));
535
536       u->addr.as_u32 = addr->as_u32;
537       u->fib_index = fib_index;
538
539       pool_get (tsm->list_pool, per_user_list_head_elt);
540
541       u->sessions_per_user_list_head_index = per_user_list_head_elt -
542         tsm->list_pool;
543
544       clib_dlist_init (tsm->list_pool, u->sessions_per_user_list_head_index);
545
546       kv.value = u - tsm->users;
547
548       /* add user */
549       if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1))
550         {
551           nat_elog_warn ("user_hash key add failed");
552           nat44_delete_user_with_no_session (sm, u, thread_index);
553           return NULL;
554         }
555
556       vlib_set_simple_counter (&sm->total_users, thread_index, 0,
557                                pool_elts (tsm->users));
558     }
559   else
560     {
561       u = pool_elt_at_index (tsm->users, value.value);
562     }
563
564   return u;
565 }
566
567 snat_session_t *
568 nat_session_alloc_or_recycle (snat_main_t * sm, snat_user_t * u,
569                               u32 thread_index, f64 now)
570 {
571   snat_session_t *s;
572   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
573   u32 oldest_per_user_translation_list_index, session_index;
574   dlist_elt_t *oldest_per_user_translation_list_elt;
575   dlist_elt_t *per_user_translation_list_elt;
576
577   /* Over quota? Recycle the least recently used translation */
578   if ((u->nsessions + u->nstaticsessions) >= sm->max_translations_per_user)
579     {
580       oldest_per_user_translation_list_index =
581         clib_dlist_remove_head (tsm->list_pool,
582                                 u->sessions_per_user_list_head_index);
583
584       ASSERT (oldest_per_user_translation_list_index != ~0);
585
586       /* Add it back to the end of the LRU list */
587       clib_dlist_addtail (tsm->list_pool,
588                           u->sessions_per_user_list_head_index,
589                           oldest_per_user_translation_list_index);
590       /* Get the list element */
591       oldest_per_user_translation_list_elt =
592         pool_elt_at_index (tsm->list_pool,
593                            oldest_per_user_translation_list_index);
594
595       /* Get the session index from the list element */
596       session_index = oldest_per_user_translation_list_elt->value;
597
598       /* Get the session */
599       s = pool_elt_at_index (tsm->sessions, session_index);
600       nat_free_session_data (sm, s, thread_index, 0);
601       if (snat_is_session_static (s))
602         u->nstaticsessions--;
603       else
604         u->nsessions--;
605       s->flags = 0;
606       s->total_bytes = 0;
607       s->total_pkts = 0;
608       s->state = 0;
609       s->ext_host_addr.as_u32 = 0;
610       s->ext_host_port = 0;
611       s->ext_host_nat_addr.as_u32 = 0;
612       s->ext_host_nat_port = 0;
613     }
614   else
615     {
616       pool_get (tsm->sessions, s);
617       clib_memset (s, 0, sizeof (*s));
618
619       /* Create list elts */
620       pool_get (tsm->list_pool, per_user_translation_list_elt);
621       clib_dlist_init (tsm->list_pool,
622                        per_user_translation_list_elt - tsm->list_pool);
623
624       per_user_translation_list_elt->value = s - tsm->sessions;
625       s->per_user_index = per_user_translation_list_elt - tsm->list_pool;
626       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
627
628       clib_dlist_addtail (tsm->list_pool,
629                           s->per_user_list_head_index,
630                           per_user_translation_list_elt - tsm->list_pool);
631
632       s->user_index = u - tsm->users;
633       vlib_set_simple_counter (&sm->total_sessions, thread_index, 0,
634                                pool_elts (tsm->sessions));
635     }
636
637   s->ha_last_refreshed = now;
638
639   return s;
640 }
641
642 void
643 snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
644                           int is_add)
645 {
646   fib_prefix_t prefix = {
647     .fp_len = p_len,
648     .fp_proto = FIB_PROTOCOL_IP4,
649     .fp_addr = {
650                 .ip4.as_u32 = addr->as_u32,
651                 },
652   };
653   u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
654
655   if (is_add)
656     fib_table_entry_update_one_path (fib_index,
657                                      &prefix,
658                                      nat_fib_src_low,
659                                      (FIB_ENTRY_FLAG_CONNECTED |
660                                       FIB_ENTRY_FLAG_LOCAL |
661                                       FIB_ENTRY_FLAG_EXCLUSIVE),
662                                      DPO_PROTO_IP4,
663                                      NULL,
664                                      sw_if_index,
665                                      ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
666   else
667     fib_table_entry_delete (fib_index, &prefix, nat_fib_src_low);
668 }
669
670 int
671 snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
672                   u8 twice_nat)
673 {
674   snat_address_t *ap;
675   snat_interface_t *i;
676   vlib_thread_main_t *tm = vlib_get_thread_main ();
677
678   if (twice_nat && !sm->endpoint_dependent)
679     {
680       nat_log_err ("unsupported");
681       return VNET_API_ERROR_UNSUPPORTED;
682     }
683
684   /* Check if address already exists */
685   /* *INDENT-OFF* */
686   vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
687     {
688       if (ap->addr.as_u32 == addr->as_u32)
689         {
690           nat_log_err ("address exist");
691           return VNET_API_ERROR_VALUE_EXIST;
692         }
693     }
694   /* *INDENT-ON* */
695
696   if (twice_nat)
697     vec_add2 (sm->twice_nat_addresses, ap, 1);
698   else
699     vec_add2 (sm->addresses, ap, 1);
700
701   ap->addr = *addr;
702   if (vrf_id != ~0)
703     ap->fib_index =
704       fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
705                                          nat_fib_src_low);
706   else
707     ap->fib_index = ~0;
708
709   /* *INDENT-OFF* */
710   #define _(N, i, n, s) \
711     clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
712     ap->busy_##n##_ports = 0; \
713     ap->busy_##n##_ports_per_thread = 0;\
714     vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
715     foreach_nat_protocol
716   #undef _
717   /* *INDENT-ON* */
718
719   if (twice_nat)
720     return 0;
721
722   /* Add external address to FIB */
723   /* *INDENT-OFF* */
724   pool_foreach (i, sm->interfaces,
725   ({
726     if (nat_interface_is_inside(i) || sm->out2in_dpo)
727       continue;
728
729     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
730     break;
731   }));
732   pool_foreach (i, sm->output_feature_interfaces,
733   ({
734     if (nat_interface_is_inside(i) || sm->out2in_dpo)
735       continue;
736
737     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
738     break;
739   }));
740   /* *INDENT-ON* */
741
742   return 0;
743 }
744
745 static int
746 is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
747 {
748   snat_static_mapping_t *m;
749   /* *INDENT-OFF* */
750   pool_foreach (m, sm->static_mappings,
751   ({
752       if (is_addr_only_static_mapping (m) ||
753           is_out2in_only_static_mapping (m) ||
754           is_identity_static_mapping (m))
755         continue;
756       if (m->external_addr.as_u32 == addr.as_u32)
757         return 1;
758   }));
759   /* *INDENT-ON* */
760
761   return 0;
762 }
763
764 static void
765 snat_add_static_mapping_when_resolved (snat_main_t * sm,
766                                        ip4_address_t l_addr,
767                                        u16 l_port,
768                                        u32 sw_if_index,
769                                        u16 e_port,
770                                        u32 vrf_id,
771                                        nat_protocol_t proto,
772                                        int addr_only, int is_add, u8 * tag,
773                                        int twice_nat, int out2in_only,
774                                        int identity_nat,
775                                        ip4_address_t pool_addr, int exact)
776 {
777   snat_static_map_resolve_t *rp;
778
779   vec_add2 (sm->to_resolve, rp, 1);
780   rp->l_addr.as_u32 = l_addr.as_u32;
781   rp->l_port = l_port;
782   rp->sw_if_index = sw_if_index;
783   rp->e_port = e_port;
784   rp->vrf_id = vrf_id;
785   rp->proto = proto;
786   rp->addr_only = addr_only;
787   rp->is_add = is_add;
788   rp->twice_nat = twice_nat;
789   rp->out2in_only = out2in_only;
790   rp->identity_nat = identity_nat;
791   rp->tag = vec_dup (tag);
792   rp->pool_addr = pool_addr;
793   rp->exact = exact;
794 }
795
796 static u32
797 get_thread_idx_by_port (u16 e_port)
798 {
799   snat_main_t *sm = &snat_main;
800   u32 thread_idx = sm->num_workers;
801   if (sm->num_workers > 1)
802     {
803       thread_idx =
804         sm->first_worker_index +
805         sm->workers[(e_port - 1024) / sm->port_per_thread];
806     }
807   return thread_idx;
808 }
809
810 void
811 snat_static_mapping_del_sessions (snat_main_t * sm,
812                                   snat_main_per_thread_data_t * tsm,
813                                   snat_user_key_t u_key, int addr_only,
814                                   ip4_address_t e_addr, u16 e_port)
815 {
816   clib_bihash_kv_8_8_t kv, value;
817   kv.key = u_key.as_u64;
818   u64 user_index;
819   dlist_elt_t *head, *elt;
820   snat_user_t *u;
821   snat_session_t *s;
822   u32 elt_index, head_index, ses_index;
823   if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
824     {
825       user_index = value.value;
826       u = pool_elt_at_index (tsm->users, user_index);
827       if (u->nstaticsessions)
828         {
829           head_index = u->sessions_per_user_list_head_index;
830           head = pool_elt_at_index (tsm->list_pool, head_index);
831           elt_index = head->next;
832           elt = pool_elt_at_index (tsm->list_pool, elt_index);
833           ses_index = elt->value;
834           while (ses_index != ~0)
835             {
836               s = pool_elt_at_index (tsm->sessions, ses_index);
837               elt = pool_elt_at_index (tsm->list_pool, elt->next);
838               ses_index = elt->value;
839
840               if (!addr_only)
841                 {
842                   if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
843                       (s->out2in.port != e_port))
844                     continue;
845                 }
846
847               if (is_lb_session (s))
848                 continue;
849
850               if (!snat_is_session_static (s))
851                 continue;
852
853               nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
854               nat44_delete_session (sm, s, tsm - sm->per_thread_data);
855
856               if (!addr_only)
857                 break;
858             }
859         }
860     }
861 }
862
863 void
864 snat_ed_static_mapping_del_sessions (snat_main_t * sm,
865                                      snat_main_per_thread_data_t * tsm,
866                                      ip4_address_t l_addr,
867                                      u16 l_port,
868                                      u8 protocol,
869                                      u32 fib_index, int addr_only,
870                                      ip4_address_t e_addr, u16 e_port)
871 {
872   snat_session_t *s;
873   u32 *indexes_to_free = NULL;
874   /* *INDENT-OFF* */
875   pool_foreach (s, tsm->sessions, {
876     if (s->in2out.fib_index != fib_index ||
877         s->in2out.addr.as_u32 != l_addr.as_u32)
878       {
879         continue;
880       }
881     if (!addr_only)
882       {
883         if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
884             s->out2in.port != e_port ||
885             s->in2out.port != l_port ||
886             s->nat_proto != protocol)
887           continue;
888       }
889
890     if (is_lb_session (s))
891       continue;
892     if (!snat_is_session_static (s))
893       continue;
894     nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
895     vec_add1 (indexes_to_free, s - tsm->sessions);
896     if (!addr_only)
897       break;
898   });
899   /* *INDENT-ON* */
900   u32 *ses_index;
901   vec_foreach (ses_index, indexes_to_free)
902   {
903     s = pool_elt_at_index (tsm->sessions, *ses_index);
904     nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
905   }
906   vec_free (indexes_to_free);
907 }
908
909 int
910 snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
911                          u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
912                          u32 sw_if_index, nat_protocol_t proto, int is_add,
913                          twice_nat_type_t twice_nat, u8 out2in_only, u8 * tag,
914                          u8 identity_nat, ip4_address_t pool_addr, int exact)
915 {
916   snat_main_t *sm = &snat_main;
917   snat_static_mapping_t *m;
918   clib_bihash_kv_8_8_t kv, value;
919   snat_address_t *a = 0;
920   u32 fib_index = ~0;
921   snat_interface_t *interface;
922   int i;
923   snat_main_per_thread_data_t *tsm;
924   snat_user_key_t u_key;
925   snat_user_t *u;
926   dlist_elt_t *head, *elt;
927   u32 elt_index, head_index;
928   u32 ses_index;
929   u64 user_index;
930   snat_session_t *s;
931   snat_static_map_resolve_t *rp, *rp_match = 0;
932   nat44_lb_addr_port_t *local;
933   u32 find = ~0;
934
935   if (!sm->endpoint_dependent)
936     {
937       if (twice_nat || out2in_only)
938         return VNET_API_ERROR_FEATURE_DISABLED;
939     }
940
941   /* If the external address is a specific interface address */
942   if (sw_if_index != ~0)
943     {
944       ip4_address_t *first_int_addr;
945
946       for (i = 0; i < vec_len (sm->to_resolve); i++)
947         {
948           rp = sm->to_resolve + i;
949           if (rp->sw_if_index != sw_if_index ||
950               rp->l_addr.as_u32 != l_addr.as_u32 ||
951               rp->vrf_id != vrf_id || rp->addr_only != addr_only)
952             continue;
953
954           if (!addr_only)
955             {
956               if ((rp->l_port != l_port && rp->e_port != e_port)
957                   || rp->proto != proto)
958                 continue;
959             }
960
961           rp_match = rp;
962           break;
963         }
964
965       /* Might be already set... */
966       first_int_addr = ip4_interface_first_address
967         (sm->ip4_main, sw_if_index, 0 /* just want the address */ );
968
969       if (is_add)
970         {
971           if (rp_match)
972             return VNET_API_ERROR_VALUE_EXIST;
973
974           snat_add_static_mapping_when_resolved
975             (sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto,
976              addr_only, is_add, tag, twice_nat, out2in_only,
977              identity_nat, pool_addr, exact);
978
979           /* DHCP resolution required? */
980           if (first_int_addr == 0)
981             {
982               return 0;
983             }
984           else
985             {
986               e_addr.as_u32 = first_int_addr->as_u32;
987               /* Identity mapping? */
988               if (l_addr.as_u32 == 0)
989                 l_addr.as_u32 = e_addr.as_u32;
990             }
991         }
992       else
993         {
994           if (!rp_match)
995             return VNET_API_ERROR_NO_SUCH_ENTRY;
996
997           vec_del1 (sm->to_resolve, i);
998
999           if (first_int_addr)
1000             {
1001               e_addr.as_u32 = first_int_addr->as_u32;
1002               /* Identity mapping? */
1003               if (l_addr.as_u32 == 0)
1004                 l_addr.as_u32 = e_addr.as_u32;
1005             }
1006           else
1007             return 0;
1008         }
1009     }
1010
1011   init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
1012   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1013     m = 0;
1014   else
1015     m = pool_elt_at_index (sm->static_mappings, value.value);
1016
1017   if (is_add)
1018     {
1019       if (m)
1020         {
1021           if (is_identity_static_mapping (m))
1022             {
1023               /* *INDENT-OFF* */
1024               pool_foreach (local, m->locals,
1025               ({
1026                 if (local->vrf_id == vrf_id)
1027                   return VNET_API_ERROR_VALUE_EXIST;
1028               }));
1029               /* *INDENT-ON* */
1030               pool_get (m->locals, local);
1031               local->vrf_id = vrf_id;
1032               local->fib_index =
1033                 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1034                                                    nat_fib_src_low);
1035               init_nat_kv (&kv, m->local_addr, m->local_port,
1036                            local->fib_index, m->proto,
1037                            m - sm->static_mappings);
1038               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1039               return 0;
1040             }
1041           else
1042             return VNET_API_ERROR_VALUE_EXIST;
1043         }
1044
1045       if (twice_nat && addr_only)
1046         return VNET_API_ERROR_UNSUPPORTED;
1047
1048       /* Convert VRF id to FIB index */
1049       if (vrf_id != ~0)
1050         fib_index =
1051           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1052                                              nat_fib_src_low);
1053       /* If not specified use inside VRF id from SNAT plugin startup config */
1054       else
1055         {
1056           fib_index = sm->inside_fib_index;
1057           vrf_id = sm->inside_vrf_id;
1058           fib_table_lock (fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1059         }
1060
1061       if (!(out2in_only || identity_nat))
1062         {
1063           init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
1064                       addr_only ? 0 : proto);
1065           if (!clib_bihash_search_8_8
1066               (&sm->static_mapping_by_local, &kv, &value))
1067             return VNET_API_ERROR_VALUE_EXIST;
1068         }
1069
1070       /* Find external address in allocated addresses and reserve port for
1071          address and port pair mapping when dynamic translations enabled */
1072       if (!(addr_only || sm->static_mapping_only || out2in_only))
1073         {
1074           for (i = 0; i < vec_len (sm->addresses); i++)
1075             {
1076               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1077                 {
1078                   a = sm->addresses + i;
1079                   /* External port must be unused */
1080                   switch (proto)
1081                     {
1082 #define _(N, j, n, s) \
1083                     case NAT_PROTOCOL_##N: \
1084                       if (a->busy_##n##_port_refcounts[e_port]) \
1085                         return VNET_API_ERROR_INVALID_VALUE; \
1086                       ++a->busy_##n##_port_refcounts[e_port]; \
1087                       if (e_port > 1024) \
1088                         { \
1089                           a->busy_##n##_ports++; \
1090                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1091                         } \
1092                       break;
1093                       foreach_nat_protocol
1094 #undef _
1095                     default:
1096                       nat_elog_info ("unknown protocol");
1097                       return VNET_API_ERROR_INVALID_VALUE_2;
1098                     }
1099                   break;
1100                 }
1101             }
1102           /* External address must be allocated */
1103           if (!a && (l_addr.as_u32 != e_addr.as_u32))
1104             {
1105               if (sw_if_index != ~0)
1106                 {
1107                   for (i = 0; i < vec_len (sm->to_resolve); i++)
1108                     {
1109                       rp = sm->to_resolve + i;
1110                       if (rp->addr_only)
1111                         continue;
1112                       if (rp->sw_if_index != sw_if_index &&
1113                           rp->l_addr.as_u32 != l_addr.as_u32 &&
1114                           rp->vrf_id != vrf_id && rp->l_port != l_port &&
1115                           rp->e_port != e_port && rp->proto != proto)
1116                         continue;
1117
1118                       vec_del1 (sm->to_resolve, i);
1119                       break;
1120                     }
1121                 }
1122               return VNET_API_ERROR_NO_SUCH_ENTRY;
1123             }
1124         }
1125
1126       pool_get (sm->static_mappings, m);
1127       clib_memset (m, 0, sizeof (*m));
1128       m->tag = vec_dup (tag);
1129       m->local_addr = l_addr;
1130       m->external_addr = e_addr;
1131       m->twice_nat = twice_nat;
1132
1133       if (twice_nat == TWICE_NAT && exact)
1134         {
1135           m->flags |= NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS;
1136           m->pool_addr = pool_addr;
1137         }
1138
1139       if (out2in_only)
1140         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
1141       if (addr_only)
1142         m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY;
1143       if (identity_nat)
1144         {
1145           m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT;
1146           pool_get (m->locals, local);
1147           local->vrf_id = vrf_id;
1148           local->fib_index = fib_index;
1149         }
1150       else
1151         {
1152           m->vrf_id = vrf_id;
1153           m->fib_index = fib_index;
1154         }
1155       if (!addr_only)
1156         {
1157           m->local_port = l_port;
1158           m->external_port = e_port;
1159           m->proto = proto;
1160         }
1161
1162       if (sm->num_workers > 1)
1163         {
1164           ip4_header_t ip = {
1165             .src_address = m->local_addr,
1166           };
1167           vec_add1 (m->workers, sm->worker_in2out_cb (&ip, m->fib_index, 0));
1168           tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1169         }
1170       else
1171         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1172
1173       init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto,
1174                    m - sm->static_mappings);
1175       if (!out2in_only)
1176         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1177
1178       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto,
1179                    m - sm->static_mappings);
1180       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
1181
1182       /* Delete dynamic sessions matching local address (+ local port) */
1183       if (!(sm->static_mapping_only))
1184         {
1185           u_key.addr = m->local_addr;
1186           u_key.fib_index = m->fib_index;
1187           kv.key = u_key.as_u64;
1188           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1189             {
1190               user_index = value.value;
1191               u = pool_elt_at_index (tsm->users, user_index);
1192               if (u->nsessions)
1193                 {
1194                   head_index = u->sessions_per_user_list_head_index;
1195                   head = pool_elt_at_index (tsm->list_pool, head_index);
1196                   elt_index = head->next;
1197                   elt = pool_elt_at_index (tsm->list_pool, elt_index);
1198                   ses_index = elt->value;
1199                   while (ses_index != ~0)
1200                     {
1201                       s = pool_elt_at_index (tsm->sessions, ses_index);
1202                       elt = pool_elt_at_index (tsm->list_pool, elt->next);
1203                       ses_index = elt->value;
1204
1205                       if (snat_is_session_static (s))
1206                         continue;
1207
1208                       if (!addr_only && s->in2out.port != m->local_port)
1209                         continue;
1210
1211                       nat_free_session_data (sm, s,
1212                                              tsm - sm->per_thread_data, 0);
1213                       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
1214
1215                       if (!addr_only && !sm->endpoint_dependent)
1216                         break;
1217                     }
1218                 }
1219             }
1220         }
1221     }
1222   else
1223     {
1224       if (!m)
1225         {
1226           if (sw_if_index != ~0)
1227             return 0;
1228           else
1229             return VNET_API_ERROR_NO_SUCH_ENTRY;
1230         }
1231
1232       if (identity_nat)
1233         {
1234           if (vrf_id == ~0)
1235             vrf_id = sm->inside_vrf_id;
1236
1237           /* *INDENT-OFF* */
1238           pool_foreach (local, m->locals,
1239           ({
1240             if (local->vrf_id == vrf_id)
1241               find = local - m->locals;
1242           }));
1243           /* *INDENT-ON* */
1244           if (find == ~0)
1245             return VNET_API_ERROR_NO_SUCH_ENTRY;
1246
1247           local = pool_elt_at_index (m->locals, find);
1248           fib_index = local->fib_index;
1249           pool_put (m->locals, local);
1250         }
1251       else
1252         fib_index = m->fib_index;
1253
1254       /* Free external address port */
1255       if (!(addr_only || sm->static_mapping_only || out2in_only))
1256         {
1257           for (i = 0; i < vec_len (sm->addresses); i++)
1258             {
1259               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1260                 {
1261                   a = sm->addresses + i;
1262                   switch (proto)
1263                     {
1264 #define _(N, j, n, s) \
1265                     case NAT_PROTOCOL_##N: \
1266                       --a->busy_##n##_port_refcounts[e_port]; \
1267                       if (e_port > 1024) \
1268                         { \
1269                           a->busy_##n##_ports--; \
1270                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1271                         } \
1272                       break;
1273                       foreach_nat_protocol
1274 #undef _
1275                     default:
1276                       nat_elog_info ("unknown protocol");
1277                       return VNET_API_ERROR_INVALID_VALUE_2;
1278                     }
1279                   break;
1280                 }
1281             }
1282         }
1283
1284       if (sm->num_workers > 1)
1285         tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
1286       else
1287         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1288
1289       init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
1290       if (!out2in_only)
1291         clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
1292
1293       /* Delete session(s) for static mapping if exist */
1294       if (!(sm->static_mapping_only) ||
1295           (sm->static_mapping_only && sm->static_mapping_connection_tracking))
1296         {
1297           if (sm->endpoint_dependent)
1298             {
1299               snat_ed_static_mapping_del_sessions (sm, tsm, m->local_addr,
1300                                                    m->local_port, m->proto,
1301                                                    fib_index, addr_only,
1302                                                    e_addr, e_port);
1303             }
1304           else
1305             {
1306               u_key.addr = m->local_addr;
1307               u_key.fib_index = fib_index;
1308               kv.key = u_key.as_u64;
1309               snat_static_mapping_del_sessions (sm, tsm, u_key, addr_only,
1310                                                 e_addr, e_port);
1311             }
1312         }
1313
1314       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1315       if (pool_elts (m->locals))
1316         return 0;
1317
1318       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1319       clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
1320
1321       vec_free (m->tag);
1322       vec_free (m->workers);
1323       /* Delete static mapping from pool */
1324       pool_put (sm->static_mappings, m);
1325     }
1326
1327   if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
1328     return 0;
1329
1330   /* Add/delete external address to FIB */
1331   /* *INDENT-OFF* */
1332   pool_foreach (interface, sm->interfaces,
1333   ({
1334     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1335       continue;
1336
1337     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1338     break;
1339   }));
1340   pool_foreach (interface, sm->output_feature_interfaces,
1341   ({
1342     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1343       continue;
1344
1345     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
1346     break;
1347   }));
1348   /* *INDENT-ON* */
1349
1350   return 0;
1351 }
1352
1353 int
1354 nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
1355                                  nat_protocol_t proto,
1356                                  nat44_lb_addr_port_t * locals, u8 is_add,
1357                                  twice_nat_type_t twice_nat, u8 out2in_only,
1358                                  u8 * tag, u32 affinity)
1359 {
1360   snat_main_t *sm = &snat_main;
1361   snat_static_mapping_t *m;
1362   clib_bihash_kv_8_8_t kv, value;
1363   snat_address_t *a = 0;
1364   int i;
1365   nat44_lb_addr_port_t *local;
1366   snat_main_per_thread_data_t *tsm;
1367   snat_session_t *s;
1368   uword *bitmap = 0;
1369
1370   if (!sm->endpoint_dependent)
1371     return VNET_API_ERROR_FEATURE_DISABLED;
1372
1373   init_nat_k (&kv, e_addr, e_port, 0, proto);
1374   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1375     m = 0;
1376   else
1377     m = pool_elt_at_index (sm->static_mappings, value.value);
1378
1379   if (is_add)
1380     {
1381       if (m)
1382         return VNET_API_ERROR_VALUE_EXIST;
1383
1384       if (vec_len (locals) < 2)
1385         return VNET_API_ERROR_INVALID_VALUE;
1386
1387       /* Find external address in allocated addresses and reserve port for
1388          address and port pair mapping when dynamic translations enabled */
1389       if (!(sm->static_mapping_only || out2in_only))
1390         {
1391           for (i = 0; i < vec_len (sm->addresses); i++)
1392             {
1393               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1394                 {
1395                   a = sm->addresses + i;
1396                   /* External port must be unused */
1397                   switch (proto)
1398                     {
1399 #define _(N, j, n, s) \
1400                     case NAT_PROTOCOL_##N: \
1401                       if (a->busy_##n##_port_refcounts[e_port]) \
1402                         return VNET_API_ERROR_INVALID_VALUE; \
1403                       ++a->busy_##n##_port_refcounts[e_port]; \
1404                       if (e_port > 1024) \
1405                         { \
1406                           a->busy_##n##_ports++; \
1407                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \
1408                         } \
1409                       break;
1410                       foreach_nat_protocol
1411 #undef _
1412                     default:
1413                       nat_elog_info ("unknown protocol");
1414                       return VNET_API_ERROR_INVALID_VALUE_2;
1415                     }
1416                   break;
1417                 }
1418             }
1419           /* External address must be allocated */
1420           if (!a)
1421             return VNET_API_ERROR_NO_SUCH_ENTRY;
1422         }
1423
1424       pool_get (sm->static_mappings, m);
1425       clib_memset (m, 0, sizeof (*m));
1426       m->tag = vec_dup (tag);
1427       m->external_addr = e_addr;
1428       m->external_port = e_port;
1429       m->proto = proto;
1430       m->twice_nat = twice_nat;
1431       m->flags |= NAT_STATIC_MAPPING_FLAG_LB;
1432       if (out2in_only)
1433         m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY;
1434       m->affinity = affinity;
1435
1436       if (affinity)
1437         m->affinity_per_service_list_head_index =
1438           nat_affinity_get_per_service_list_head_index ();
1439       else
1440         m->affinity_per_service_list_head_index = ~0;
1441
1442       init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto,
1443                    m - sm->static_mappings);
1444       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
1445         {
1446           nat_elog_err ("static_mapping_by_external key add failed");
1447           return VNET_API_ERROR_UNSPECIFIED;
1448         }
1449
1450       for (i = 0; i < vec_len (locals); i++)
1451         {
1452           locals[i].fib_index =
1453             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
1454                                                locals[i].vrf_id,
1455                                                nat_fib_src_low);
1456           if (!out2in_only)
1457             {
1458               init_nat_kv (&kv, locals[i].addr, locals[i].port,
1459                            locals[i].fib_index, m->proto,
1460                            m - sm->static_mappings);
1461               clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
1462             }
1463           locals[i].prefix = (i == 0) ? locals[i].probability :
1464             (locals[i - 1].prefix + locals[i].probability);
1465           pool_get (m->locals, local);
1466           *local = locals[i];
1467           if (sm->num_workers > 1)
1468             {
1469               ip4_header_t ip = {
1470                 .src_address = locals[i].addr,
1471               };
1472               bitmap =
1473                 clib_bitmap_set (bitmap,
1474                                  sm->worker_in2out_cb (&ip, m->fib_index, 0),
1475                                  1);
1476             }
1477         }
1478
1479       /* Assign workers */
1480       if (sm->num_workers > 1)
1481         {
1482           /* *INDENT-OFF* */
1483           clib_bitmap_foreach (i, bitmap,
1484             ({
1485                vec_add1(m->workers, i);
1486             }));
1487           /* *INDENT-ON* */
1488         }
1489     }
1490   else
1491     {
1492       if (!m)
1493         return VNET_API_ERROR_NO_SUCH_ENTRY;
1494
1495       if (!is_lb_static_mapping (m))
1496         return VNET_API_ERROR_INVALID_VALUE;
1497
1498       /* Free external address port */
1499       if (!(sm->static_mapping_only || out2in_only))
1500         {
1501           for (i = 0; i < vec_len (sm->addresses); i++)
1502             {
1503               if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
1504                 {
1505                   a = sm->addresses + i;
1506                   switch (proto)
1507                     {
1508 #define _(N, j, n, s) \
1509                     case NAT_PROTOCOL_##N: \
1510                       --a->busy_##n##_port_refcounts[e_port]; \
1511                       if (e_port > 1024) \
1512                         { \
1513                           a->busy_##n##_ports--; \
1514                           a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \
1515                         } \
1516                       break;
1517                       foreach_nat_protocol
1518 #undef _
1519                     default:
1520                       nat_elog_info ("unknown protocol");
1521                       return VNET_API_ERROR_INVALID_VALUE_2;
1522                     }
1523                   break;
1524                 }
1525             }
1526         }
1527
1528       init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
1529       if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
1530         {
1531           nat_elog_err ("static_mapping_by_external key del failed");
1532           return VNET_API_ERROR_UNSPECIFIED;
1533         }
1534
1535       /* *INDENT-OFF* */
1536       pool_foreach (local, m->locals,
1537       ({
1538           fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4,
1539                             nat_fib_src_low);
1540           if (!out2in_only)
1541             {
1542 init_nat_k(&              kv, local->addr, local->port, local->fib_index, m->proto);
1543               if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0))
1544                 {
1545                   nat_elog_err ("static_mapping_by_local key del failed");
1546                   return VNET_API_ERROR_UNSPECIFIED;
1547                 }
1548             }
1549
1550           if (sm->num_workers > 1)
1551             {
1552               ip4_header_t ip = {
1553                 .src_address = local->addr,
1554               };
1555               tsm = vec_elt_at_index (sm->per_thread_data,
1556                                       sm->worker_in2out_cb (&ip, m->fib_index, 0));
1557             }
1558           else
1559             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1560
1561           /* Delete sessions */
1562           pool_foreach (s, tsm->sessions, {
1563             if (!(is_lb_session (s)))
1564               continue;
1565
1566             if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
1567                 s->in2out.port != local->port)
1568               continue;
1569
1570             nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1571             nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1572           });
1573       }));
1574       /* *INDENT-ON* */
1575       if (m->affinity)
1576         nat_affinity_flush_service (m->affinity_per_service_list_head_index);
1577       pool_free (m->locals);
1578       vec_free (m->tag);
1579       vec_free (m->workers);
1580
1581       pool_put (sm->static_mappings, m);
1582     }
1583
1584   return 0;
1585 }
1586
1587 int
1588 nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
1589                                        ip4_address_t l_addr, u16 l_port,
1590                                        nat_protocol_t proto, u32 vrf_id,
1591                                        u8 probability, u8 is_add)
1592 {
1593   snat_main_t *sm = &snat_main;
1594   snat_static_mapping_t *m = 0;
1595   clib_bihash_kv_8_8_t kv, value;
1596   nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
1597   snat_main_per_thread_data_t *tsm;
1598   snat_session_t *s;
1599   u32 *locals = 0;
1600   uword *bitmap = 0;
1601   int i;
1602
1603   if (!sm->endpoint_dependent)
1604     return VNET_API_ERROR_FEATURE_DISABLED;
1605
1606   init_nat_k (&kv, e_addr, e_port, 0, proto);
1607   if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1608     m = pool_elt_at_index (sm->static_mappings, value.value);
1609
1610   if (!m)
1611     return VNET_API_ERROR_NO_SUCH_ENTRY;
1612
1613   if (!is_lb_static_mapping (m))
1614     return VNET_API_ERROR_INVALID_VALUE;
1615
1616   /* *INDENT-OFF* */
1617   pool_foreach (local, m->locals,
1618   ({
1619     if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) &&
1620         (local->vrf_id == vrf_id))
1621       {
1622         match_local = local;
1623         break;
1624       }
1625   }));
1626   /* *INDENT-ON* */
1627
1628   if (is_add)
1629     {
1630       if (match_local)
1631         return VNET_API_ERROR_VALUE_EXIST;
1632
1633       pool_get (m->locals, local);
1634       clib_memset (local, 0, sizeof (*local));
1635       local->addr.as_u32 = l_addr.as_u32;
1636       local->port = l_port;
1637       local->probability = probability;
1638       local->vrf_id = vrf_id;
1639       local->fib_index =
1640         fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
1641                                            nat_fib_src_low);
1642
1643       if (!is_out2in_only_static_mapping (m))
1644         {
1645           init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto,
1646                        m - sm->static_mappings);
1647           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
1648             nat_elog_err ("static_mapping_by_local key add failed");
1649         }
1650     }
1651   else
1652     {
1653       if (!match_local)
1654         return VNET_API_ERROR_NO_SUCH_ENTRY;
1655
1656       if (pool_elts (m->locals) < 3)
1657         return VNET_API_ERROR_UNSPECIFIED;
1658
1659       fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4,
1660                         nat_fib_src_low);
1661
1662       if (!is_out2in_only_static_mapping (m))
1663         {
1664           init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
1665           if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
1666             nat_elog_err ("static_mapping_by_local key del failed");
1667         }
1668
1669       if (sm->num_workers > 1)
1670         {
1671           ip4_header_t ip = {
1672             .src_address = local->addr,
1673           };
1674           tsm = vec_elt_at_index (sm->per_thread_data,
1675                                   sm->worker_in2out_cb (&ip, m->fib_index,
1676                                                         0));
1677         }
1678       else
1679         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
1680
1681       /* Delete sessions */
1682       /* *INDENT-OFF* */
1683       pool_foreach (s, tsm->sessions, {
1684         if (!(is_lb_session (s)))
1685           continue;
1686
1687         if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
1688             s->in2out.port != match_local->port)
1689           continue;
1690
1691         nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
1692         nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
1693       });
1694       /* *INDENT-ON* */
1695
1696       pool_put (m->locals, match_local);
1697     }
1698
1699   vec_free (m->workers);
1700
1701   /* *INDENT-OFF* */
1702   pool_foreach (local, m->locals,
1703   ({
1704     vec_add1 (locals, local - m->locals);
1705     if (sm->num_workers > 1)
1706       {
1707         ip4_header_t ip;
1708         ip.src_address.as_u32 = local->addr.as_u32,
1709         bitmap = clib_bitmap_set (bitmap,
1710                                   sm->worker_in2out_cb (&ip, local->fib_index, 0),
1711                                   1);
1712       }
1713   }));
1714   /* *INDENT-ON* */
1715
1716   ASSERT (vec_len (locals) > 1);
1717
1718   local = pool_elt_at_index (m->locals, locals[0]);
1719   local->prefix = local->probability;
1720   for (i = 1; i < vec_len (locals); i++)
1721     {
1722       local = pool_elt_at_index (m->locals, locals[i]);
1723       prev_local = pool_elt_at_index (m->locals, locals[i - 1]);
1724       local->prefix = local->probability + prev_local->prefix;
1725     }
1726
1727   /* Assign workers */
1728   if (sm->num_workers > 1)
1729     {
1730       /* *INDENT-OFF* */
1731       clib_bitmap_foreach (i, bitmap, ({ vec_add1(m->workers, i); }));
1732       /* *INDENT-ON* */
1733     }
1734
1735   return 0;
1736 }
1737
1738 int
1739 snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
1740                   u8 twice_nat)
1741 {
1742   snat_address_t *a = 0;
1743   snat_session_t *ses;
1744   u32 *ses_to_be_removed = 0, *ses_index;
1745   snat_main_per_thread_data_t *tsm;
1746   snat_static_mapping_t *m;
1747   snat_interface_t *interface;
1748   int i;
1749   snat_address_t *addresses =
1750     twice_nat ? sm->twice_nat_addresses : sm->addresses;
1751
1752   /* Find SNAT address */
1753   for (i = 0; i < vec_len (addresses); i++)
1754     {
1755       if (addresses[i].addr.as_u32 == addr.as_u32)
1756         {
1757           a = addresses + i;
1758           break;
1759         }
1760     }
1761   if (!a)
1762     {
1763       nat_log_err ("no such address");
1764       return VNET_API_ERROR_NO_SUCH_ENTRY;
1765     }
1766
1767   if (delete_sm)
1768     {
1769       ip4_address_t pool_addr = { 0 };
1770       /* *INDENT-OFF* */
1771       pool_foreach (m, sm->static_mappings,
1772       ({
1773           if (m->external_addr.as_u32 == addr.as_u32)
1774             (void) snat_add_static_mapping (m->local_addr, m->external_addr,
1775                                             m->local_port, m->external_port,
1776                                             m->vrf_id,
1777                                             is_addr_only_static_mapping(m), ~0,
1778                                             m->proto, 0 /* is_add */,
1779                                             m->twice_nat,
1780                                             is_out2in_only_static_mapping(m),
1781                                             m->tag,
1782                                             is_identity_static_mapping(m),
1783                                             pool_addr, 0);
1784       }));
1785       /* *INDENT-ON* */
1786     }
1787   else
1788     {
1789       /* Check if address is used in some static mapping */
1790       if (is_snat_address_used_in_static_mapping (sm, addr))
1791         {
1792           nat_log_err ("address used in static mapping");
1793           return VNET_API_ERROR_UNSPECIFIED;
1794         }
1795     }
1796
1797   if (a->fib_index != ~0)
1798     fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, nat_fib_src_low);
1799
1800   /* Delete sessions using address */
1801   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
1802     {
1803       /* *INDENT-OFF* */
1804       vec_foreach (tsm, sm->per_thread_data)
1805         {
1806           pool_foreach (ses, tsm->sessions, ({
1807             if (ses->out2in.addr.as_u32 == addr.as_u32)
1808               {
1809                 nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
1810                 vec_add1 (ses_to_be_removed, ses - tsm->sessions);
1811               }
1812           }));
1813
1814           if (sm->endpoint_dependent){
1815               vec_foreach (ses_index, ses_to_be_removed)
1816                 {
1817                   ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1818                   nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
1819                 }
1820           }else{
1821               vec_foreach (ses_index, ses_to_be_removed)
1822                 {
1823                   ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
1824                   nat44_delete_session (sm, ses, tsm - sm->per_thread_data);
1825                 }
1826           }
1827
1828           vec_free (ses_to_be_removed);
1829         }
1830       /* *INDENT-ON* */
1831     }
1832
1833 #define _(N, i, n, s) \
1834   vec_free (a->busy_##n##_ports_per_thread);
1835   foreach_nat_protocol
1836 #undef _
1837     if (twice_nat)
1838     {
1839       vec_del1 (sm->twice_nat_addresses, i);
1840       return 0;
1841     }
1842   else
1843     vec_del1 (sm->addresses, i);
1844
1845   /* Delete external address from FIB */
1846   /* *INDENT-OFF* */
1847   pool_foreach (interface, sm->interfaces,
1848   ({
1849     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1850       continue;
1851
1852     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1853     break;
1854   }));
1855   pool_foreach (interface, sm->output_feature_interfaces,
1856   ({
1857     if (nat_interface_is_inside(interface) || sm->out2in_dpo)
1858       continue;
1859
1860     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
1861     break;
1862   }));
1863   /* *INDENT-ON* */
1864
1865   return 0;
1866 }
1867
1868 static void
1869 nat_validate_counters (snat_main_t * sm, u32 sw_if_index)
1870 {
1871 #define _(x)                                                                  \
1872   vlib_validate_simple_counter (&sm->counters.fastpath.in2out.x,              \
1873                                 sw_if_index);                                 \
1874   vlib_zero_simple_counter (&sm->counters.fastpath.in2out.x, sw_if_index);    \
1875   vlib_validate_simple_counter (&sm->counters.fastpath.out2in.x,              \
1876                                 sw_if_index);                                 \
1877   vlib_zero_simple_counter (&sm->counters.fastpath.out2in.x, sw_if_index);    \
1878   vlib_validate_simple_counter (&sm->counters.slowpath.in2out.x,              \
1879                                 sw_if_index);                                 \
1880   vlib_zero_simple_counter (&sm->counters.slowpath.in2out.x, sw_if_index);    \
1881   vlib_validate_simple_counter (&sm->counters.slowpath.out2in.x,              \
1882                                 sw_if_index);                                 \
1883   vlib_zero_simple_counter (&sm->counters.slowpath.out2in.x, sw_if_index);    \
1884   vlib_validate_simple_counter (&sm->counters.fastpath.in2out_ed.x,           \
1885                                 sw_if_index);                                 \
1886   vlib_zero_simple_counter (&sm->counters.fastpath.in2out_ed.x, sw_if_index); \
1887   vlib_validate_simple_counter (&sm->counters.fastpath.out2in_ed.x,           \
1888                                 sw_if_index);                                 \
1889   vlib_zero_simple_counter (&sm->counters.fastpath.out2in_ed.x, sw_if_index); \
1890   vlib_validate_simple_counter (&sm->counters.slowpath.in2out_ed.x,           \
1891                                 sw_if_index);                                 \
1892   vlib_zero_simple_counter (&sm->counters.slowpath.in2out_ed.x, sw_if_index); \
1893   vlib_validate_simple_counter (&sm->counters.slowpath.out2in_ed.x,           \
1894                                 sw_if_index);                                 \
1895   vlib_zero_simple_counter (&sm->counters.slowpath.out2in_ed.x, sw_if_index);
1896   foreach_nat_counter;
1897 #undef _
1898   vlib_validate_simple_counter (&sm->counters.hairpinning, sw_if_index);
1899   vlib_zero_simple_counter (&sm->counters.hairpinning, sw_if_index);
1900 }
1901
1902 void
1903 expire_per_vrf_sessions (u32 fib_index)
1904 {
1905   per_vrf_sessions_t *per_vrf_sessions;
1906   snat_main_per_thread_data_t *tsm;
1907   snat_main_t *sm = &snat_main;
1908
1909   /* *INDENT-OFF* */
1910   vec_foreach (tsm, sm->per_thread_data)
1911     {
1912       vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
1913         {
1914           if ((per_vrf_sessions->rx_fib_index == fib_index) ||
1915               (per_vrf_sessions->tx_fib_index == fib_index))
1916             {
1917               per_vrf_sessions->expired = 1;
1918             }
1919         }
1920     }
1921   /* *INDENT-ON* */
1922 }
1923
1924 void
1925 update_per_vrf_sessions_vec (u32 fib_index, int is_del)
1926 {
1927   snat_main_t *sm = &snat_main;
1928   nat_fib_t *fib;
1929
1930   // we don't care if it is outside/inside fib
1931   // we just care about their ref_count
1932   // if it reaches 0 sessions should expire
1933   // because the fib isn't valid for NAT anymore
1934
1935   vec_foreach (fib, sm->fibs)
1936   {
1937     if (fib->fib_index == fib_index)
1938       {
1939         if (is_del)
1940           {
1941             fib->ref_count--;
1942             if (!fib->ref_count)
1943               {
1944                 vec_del1 (sm->fibs, fib - sm->fibs);
1945                 expire_per_vrf_sessions (fib_index);
1946               }
1947             return;
1948           }
1949         else
1950           fib->ref_count++;
1951       }
1952   }
1953   if (!is_del)
1954     {
1955       vec_add2 (sm->fibs, fib, 1);
1956       fib->ref_count = 1;
1957       fib->fib_index = fib_index;
1958     }
1959 }
1960
1961 int
1962 snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
1963 {
1964   snat_main_t *sm = &snat_main;
1965   snat_interface_t *i;
1966   const char *feature_name, *del_feature_name;
1967   snat_address_t *ap;
1968   snat_static_mapping_t *m;
1969   nat_outside_fib_t *outside_fib;
1970   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
1971                                                        sw_if_index);
1972
1973   if (!sm->enabled)
1974     {
1975       nat_log_err ("nat44 is disabled");
1976       return VNET_API_ERROR_UNSUPPORTED;
1977     }
1978
1979   if (sm->out2in_dpo && !is_inside)
1980     {
1981       nat_log_err ("error unsupported");
1982       return VNET_API_ERROR_UNSUPPORTED;
1983     }
1984
1985   /* *INDENT-OFF* */
1986   pool_foreach (i, sm->output_feature_interfaces,
1987   ({
1988     if (i->sw_if_index == sw_if_index)
1989       {
1990         nat_log_err ("error interface already configured");
1991         return VNET_API_ERROR_VALUE_EXIST;
1992       }
1993   }));
1994   /* *INDENT-ON* */
1995
1996   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
1997     feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast";
1998   else
1999     {
2000       if (sm->num_workers > 1)
2001         feature_name =
2002           is_inside ? "nat44-in2out-worker-handoff" :
2003           "nat44-out2in-worker-handoff";
2004       else if (sm->endpoint_dependent)
2005         {
2006           feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in";
2007         }
2008       else
2009         feature_name = is_inside ? "nat44-in2out" : "nat44-out2in";
2010     }
2011
2012   if (sm->fq_in2out_index == ~0 && sm->num_workers > 1)
2013     sm->fq_in2out_index =
2014       vlib_frame_queue_main_init (sm->in2out_node_index, NAT_FQ_NELTS);
2015
2016   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
2017     sm->fq_out2in_index =
2018       vlib_frame_queue_main_init (sm->out2in_node_index, NAT_FQ_NELTS);
2019
2020   if (sm->endpoint_dependent)
2021     update_per_vrf_sessions_vec (fib_index, is_del);
2022
2023   if (!is_inside)
2024     {
2025       /* *INDENT-OFF* */
2026       vec_foreach (outside_fib, sm->outside_fibs)
2027         {
2028           if (outside_fib->fib_index == fib_index)
2029             {
2030               if (is_del)
2031                 {
2032                   outside_fib->refcount--;
2033                   if (!outside_fib->refcount)
2034                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2035                 }
2036               else
2037                 outside_fib->refcount++;
2038               goto feature_set;
2039             }
2040         }
2041       /* *INDENT-ON* */
2042       if (!is_del)
2043         {
2044           vec_add2 (sm->outside_fibs, outside_fib, 1);
2045           outside_fib->refcount = 1;
2046           outside_fib->fib_index = fib_index;
2047         }
2048     }
2049
2050 feature_set:
2051   /* *INDENT-OFF* */
2052   pool_foreach (i, sm->interfaces,
2053   ({
2054     if (i->sw_if_index == sw_if_index)
2055       {
2056         if (is_del)
2057           {
2058             if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
2059               {
2060                 if (is_inside)
2061                   i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
2062                 else
2063                   i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
2064
2065                 if (sm->num_workers > 1)
2066                   {
2067                     del_feature_name = "nat44-handoff-classify";
2068                     feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
2069                                                  "nat44-out2in-worker-handoff";
2070                   }
2071                 else if (sm->endpoint_dependent)
2072                   {
2073                     del_feature_name = "nat44-ed-classify";
2074                     feature_name = !is_inside ?  "nat-pre-in2out" :
2075                                                  "nat-pre-out2in";
2076                   }
2077                 else
2078                   {
2079                     del_feature_name = "nat44-classify";
2080                     feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
2081                   }
2082
2083                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
2084                 if (rv)
2085                   return rv;
2086                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
2087                                              sw_if_index, 0, 0, 0);
2088                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
2089                                              sw_if_index, 1, 0, 0);
2090                 if (!is_inside)
2091                   {
2092                     if (sm->endpoint_dependent)
2093                       vnet_feature_enable_disable ("ip4-local",
2094                                                    "nat44-ed-hairpinning",
2095                                                    sw_if_index, 1, 0, 0);
2096                     else
2097                       vnet_feature_enable_disable ("ip4-local",
2098                                                    "nat44-hairpinning",
2099                                                    sw_if_index, 1, 0, 0);
2100                   }
2101               }
2102             else
2103               {
2104                 int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
2105                 if (rv)
2106                   return rv;
2107                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
2108                                              sw_if_index, 0, 0, 0);
2109                 pool_put (sm->interfaces, i);
2110                 if (is_inside)
2111                   {
2112                     if (sm->endpoint_dependent)
2113                       vnet_feature_enable_disable ("ip4-local",
2114                                                    "nat44-ed-hairpinning",
2115                                                    sw_if_index, 0, 0, 0);
2116                     else
2117                       vnet_feature_enable_disable ("ip4-local",
2118                                                    "nat44-hairpinning",
2119                                                    sw_if_index, 0, 0, 0);
2120                   }
2121               }
2122           }
2123         else
2124           {
2125             if ((nat_interface_is_inside(i) && is_inside) ||
2126                 (nat_interface_is_outside(i) && !is_inside))
2127               return 0;
2128
2129             if (sm->num_workers > 1)
2130               {
2131                 del_feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
2132                                                  "nat44-out2in-worker-handoff";
2133                 feature_name = "nat44-handoff-classify";
2134               }
2135             else if (sm->endpoint_dependent)
2136               {
2137                 del_feature_name = !is_inside ?  "nat-pre-in2out" :
2138                                                  "nat-pre-out2in";
2139
2140                 feature_name = "nat44-ed-classify";
2141               }
2142             else
2143               {
2144                 del_feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
2145                 feature_name = "nat44-classify";
2146               }
2147
2148             int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
2149             if (rv)
2150               return rv;
2151             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
2152                                          sw_if_index, 0, 0, 0);
2153             vnet_feature_enable_disable ("ip4-unicast", feature_name,
2154                                          sw_if_index, 1, 0, 0);
2155             if (!is_inside)
2156               {
2157                 if (sm->endpoint_dependent)
2158                   vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
2159                                                sw_if_index, 0, 0, 0);
2160                 else
2161                   vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
2162                                                sw_if_index, 0, 0, 0);
2163               }
2164             goto set_flags;
2165           }
2166
2167         goto fib;
2168       }
2169   }));
2170   /* *INDENT-ON* */
2171
2172   if (is_del)
2173     {
2174       nat_log_err ("error interface couldn't be found");
2175       return VNET_API_ERROR_NO_SUCH_ENTRY;
2176     }
2177
2178   pool_get (sm->interfaces, i);
2179   i->sw_if_index = sw_if_index;
2180   i->flags = 0;
2181   nat_validate_counters (sm, sw_if_index);
2182
2183   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0,
2184                                0);
2185
2186   int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
2187   if (rv)
2188     return rv;
2189
2190   if (is_inside && !sm->out2in_dpo)
2191     {
2192       if (sm->endpoint_dependent)
2193         vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning",
2194                                      sw_if_index, 1, 0, 0);
2195       else
2196         vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
2197                                      sw_if_index, 1, 0, 0);
2198     }
2199
2200 set_flags:
2201   if (is_inside)
2202     {
2203       i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
2204       return 0;
2205     }
2206   else
2207     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
2208
2209   /* Add/delete external addresses to FIB */
2210 fib:
2211   /* *INDENT-OFF* */
2212   vec_foreach (ap, sm->addresses)
2213     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
2214
2215   pool_foreach (m, sm->static_mappings,
2216   ({
2217     if (!(is_addr_only_static_mapping(m)) || (m->local_addr.as_u32 == m->external_addr.as_u32))
2218       continue;
2219
2220     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
2221   }));
2222   /* *INDENT-ON* */
2223
2224   return 0;
2225 }
2226
2227 int
2228 snat_interface_add_del_output_feature (u32 sw_if_index,
2229                                        u8 is_inside, int is_del)
2230 {
2231   snat_main_t *sm = &snat_main;
2232   snat_interface_t *i;
2233   snat_address_t *ap;
2234   snat_static_mapping_t *m;
2235   nat_outside_fib_t *outside_fib;
2236   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2237                                                        sw_if_index);
2238
2239   if (!sm->enabled)
2240     {
2241       nat_log_err ("nat44 is disabled");
2242       return VNET_API_ERROR_UNSUPPORTED;
2243     }
2244
2245   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
2246     {
2247       nat_log_err ("error unsupported");
2248       return VNET_API_ERROR_UNSUPPORTED;
2249     }
2250
2251   /* *INDENT-OFF* */
2252   pool_foreach (i, sm->interfaces,
2253   ({
2254     if (i->sw_if_index == sw_if_index)
2255       {
2256         nat_log_err ("error interface already configured");
2257         return VNET_API_ERROR_VALUE_EXIST;
2258       }
2259   }));
2260   /* *INDENT-ON* */
2261
2262   if (sm->endpoint_dependent)
2263     update_per_vrf_sessions_vec (fib_index, is_del);
2264
2265   if (!is_inside)
2266     {
2267       /* *INDENT-OFF* */
2268       vec_foreach (outside_fib, sm->outside_fibs)
2269         {
2270           if (outside_fib->fib_index == fib_index)
2271             {
2272               if (is_del)
2273                 {
2274                   outside_fib->refcount--;
2275                   if (!outside_fib->refcount)
2276                     vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2277                 }
2278               else
2279                 outside_fib->refcount++;
2280               goto feature_set;
2281             }
2282         }
2283       /* *INDENT-ON* */
2284       if (!is_del)
2285         {
2286           vec_add2 (sm->outside_fibs, outside_fib, 1);
2287           outside_fib->refcount = 1;
2288           outside_fib->fib_index = fib_index;
2289         }
2290     }
2291
2292 feature_set:
2293   if (is_inside)
2294     {
2295       if (sm->endpoint_dependent)
2296         {
2297           int rv =
2298             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2299           if (rv)
2300             return rv;
2301           rv =
2302             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2303                                                             !is_del);
2304           if (rv)
2305             return rv;
2306           vnet_feature_enable_disable ("ip4-unicast", "nat44-ed-hairpin-dst",
2307                                        sw_if_index, !is_del, 0, 0);
2308           vnet_feature_enable_disable ("ip4-output", "nat44-ed-hairpin-src",
2309                                        sw_if_index, !is_del, 0, 0);
2310         }
2311       else
2312         {
2313           int rv =
2314             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2315           if (rv)
2316             return rv;
2317           rv =
2318             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2319                                                             !is_del);
2320           if (rv)
2321             return rv;
2322           vnet_feature_enable_disable ("ip4-unicast", "nat44-hairpin-dst",
2323                                        sw_if_index, !is_del, 0, 0);
2324           vnet_feature_enable_disable ("ip4-output", "nat44-hairpin-src",
2325                                        sw_if_index, !is_del, 0, 0);
2326         }
2327       goto fq;
2328     }
2329
2330   if (sm->num_workers > 1)
2331     {
2332       int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2333       if (rv)
2334         return rv;
2335       rv =
2336         ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
2337       if (rv)
2338         return rv;
2339       vnet_feature_enable_disable ("ip4-unicast",
2340                                    "nat44-out2in-worker-handoff",
2341                                    sw_if_index, !is_del, 0, 0);
2342       vnet_feature_enable_disable ("ip4-output",
2343                                    "nat44-in2out-output-worker-handoff",
2344                                    sw_if_index, !is_del, 0, 0);
2345     }
2346   else
2347     {
2348       if (sm->endpoint_dependent)
2349         {
2350           int rv =
2351             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2352           if (rv)
2353             return rv;
2354           rv =
2355             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2356                                                             !is_del);
2357           if (rv)
2358             return rv;
2359           vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
2360                                        sw_if_index, !is_del, 0, 0);
2361           vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output",
2362                                        sw_if_index, !is_del, 0, 0);
2363         }
2364       else
2365         {
2366           int rv =
2367             ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
2368           if (rv)
2369             return rv;
2370           rv =
2371             ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
2372                                                             !is_del);
2373           if (rv)
2374             return rv;
2375           vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in",
2376                                        sw_if_index, !is_del, 0, 0);
2377           vnet_feature_enable_disable ("ip4-output", "nat44-in2out-output",
2378                                        sw_if_index, !is_del, 0, 0);
2379         }
2380     }
2381
2382 fq:
2383   if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1)
2384     sm->fq_in2out_output_index =
2385       vlib_frame_queue_main_init (sm->in2out_output_node_index, 0);
2386
2387   if (sm->fq_out2in_index == ~0 && sm->num_workers > 1)
2388     sm->fq_out2in_index =
2389       vlib_frame_queue_main_init (sm->out2in_node_index, 0);
2390
2391   /* *INDENT-OFF* */
2392   pool_foreach (i, sm->output_feature_interfaces,
2393   ({
2394     if (i->sw_if_index == sw_if_index)
2395       {
2396         if (is_del)
2397           pool_put (sm->output_feature_interfaces, i);
2398         else
2399           return VNET_API_ERROR_VALUE_EXIST;
2400
2401         goto fib;
2402       }
2403   }));
2404   /* *INDENT-ON* */
2405
2406   if (is_del)
2407     {
2408       nat_log_err ("error interface couldn't be found");
2409       return VNET_API_ERROR_NO_SUCH_ENTRY;
2410     }
2411
2412   pool_get (sm->output_feature_interfaces, i);
2413   i->sw_if_index = sw_if_index;
2414   i->flags = 0;
2415   nat_validate_counters (sm, sw_if_index);
2416   if (is_inside)
2417     i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
2418   else
2419     i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
2420
2421   /* Add/delete external addresses to FIB */
2422 fib:
2423   if (is_inside)
2424     return 0;
2425
2426   /* *INDENT-OFF* */
2427   vec_foreach (ap, sm->addresses)
2428     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
2429
2430   pool_foreach (m, sm->static_mappings,
2431   ({
2432     if (!((is_addr_only_static_mapping(m)))  || (m->local_addr.as_u32 == m->external_addr.as_u32))
2433       continue;
2434
2435     snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del);
2436   }));
2437   /* *INDENT-ON* */
2438
2439   return 0;
2440 }
2441
2442 int
2443 snat_set_workers (uword * bitmap)
2444 {
2445   snat_main_t *sm = &snat_main;
2446   int i, j = 0;
2447
2448   if (sm->num_workers < 2)
2449     return VNET_API_ERROR_FEATURE_DISABLED;
2450
2451   if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
2452     return VNET_API_ERROR_INVALID_WORKER;
2453
2454   vec_free (sm->workers);
2455   /* *INDENT-OFF* */
2456   clib_bitmap_foreach (i, bitmap,
2457     ({
2458       vec_add1(sm->workers, i);
2459       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
2460       sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
2461       j++;
2462     }));
2463   /* *INDENT-ON* */
2464
2465   sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
2466
2467   return 0;
2468 }
2469
2470 static void
2471 snat_update_outside_fib (ip4_main_t * im, uword opaque,
2472                          u32 sw_if_index, u32 new_fib_index,
2473                          u32 old_fib_index)
2474 {
2475   snat_main_t *sm = &snat_main;
2476   nat_outside_fib_t *outside_fib;
2477   snat_interface_t *i;
2478   u8 is_add = 1;
2479   u8 match = 0;
2480
2481   if (!sm->enabled || (new_fib_index == old_fib_index)
2482       || (!vec_len (sm->outside_fibs)))
2483     {
2484       return;
2485     }
2486
2487   /* *INDENT-OFF* */
2488   pool_foreach (i, sm->interfaces,
2489     ({
2490       if (i->sw_if_index == sw_if_index)
2491         {
2492           if (!(nat_interface_is_outside (i)))
2493             return;
2494           match = 1;
2495         }
2496     }));
2497
2498   pool_foreach (i, sm->output_feature_interfaces,
2499     ({
2500       if (i->sw_if_index == sw_if_index)
2501         {
2502           if (!(nat_interface_is_outside (i)))
2503             return;
2504           match = 1;
2505         }
2506     }));
2507   /* *INDENT-ON* */
2508
2509   if (!match)
2510     return;
2511
2512   vec_foreach (outside_fib, sm->outside_fibs)
2513   {
2514     if (outside_fib->fib_index == old_fib_index)
2515       {
2516         outside_fib->refcount--;
2517         if (!outside_fib->refcount)
2518           vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
2519         break;
2520       }
2521   }
2522
2523   vec_foreach (outside_fib, sm->outside_fibs)
2524   {
2525     if (outside_fib->fib_index == new_fib_index)
2526       {
2527         outside_fib->refcount++;
2528         is_add = 0;
2529         break;
2530       }
2531   }
2532
2533   if (is_add)
2534     {
2535       vec_add2 (sm->outside_fibs, outside_fib, 1);
2536       outside_fib->refcount = 1;
2537       outside_fib->fib_index = new_fib_index;
2538     }
2539 }
2540
2541 static void
2542 snat_update_outside_fib (ip4_main_t * im, uword opaque,
2543                          u32 sw_if_index, u32 new_fib_index,
2544                          u32 old_fib_index);
2545
2546 static void
2547 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
2548                                        uword opaque,
2549                                        u32 sw_if_index,
2550                                        ip4_address_t * address,
2551                                        u32 address_length,
2552                                        u32 if_address_index, u32 is_delete);
2553
2554 static void
2555 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
2556                                  uword opaque,
2557                                  u32 sw_if_index,
2558                                  ip4_address_t * address,
2559                                  u32 address_length,
2560                                  u32 if_address_index, u32 is_delete);
2561
2562 static int
2563 nat_alloc_addr_and_port_default (snat_address_t * addresses, u32 fib_index,
2564                                  u32 thread_index, nat_protocol_t proto,
2565                                  ip4_address_t * addr, u16 * port,
2566                                  u16 port_per_thread, u32 snat_thread_index);
2567
2568 void
2569 test_key_calc_split ()
2570 {
2571   ip4_address_t l_addr;
2572   l_addr.as_u8[0] = 1;
2573   l_addr.as_u8[1] = 1;
2574   l_addr.as_u8[2] = 1;
2575   l_addr.as_u8[3] = 1;
2576   ip4_address_t r_addr;
2577   r_addr.as_u8[0] = 2;
2578   r_addr.as_u8[1] = 2;
2579   r_addr.as_u8[2] = 2;
2580   r_addr.as_u8[3] = 2;
2581   u16 l_port = 40001;
2582   u16 r_port = 40301;
2583   u8 proto = 9;
2584   u32 fib_index = 9000001;
2585   u32 thread_index = 3000000001;
2586   u32 session_index = 3000000221;
2587   clib_bihash_kv_16_8_t kv;
2588   init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
2589               thread_index, session_index);
2590   ip4_address_t l_addr2;
2591   ip4_address_t r_addr2;
2592   clib_memset (&l_addr2, 0, sizeof (l_addr2));
2593   clib_memset (&r_addr2, 0, sizeof (r_addr2));
2594   u16 l_port2 = 0;
2595   u16 r_port2 = 0;
2596   u8 proto2 = 0;
2597   u32 fib_index2 = 0;
2598   split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2,
2599                &r_port2);
2600   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2601   ASSERT (r_addr.as_u32 == r_addr2.as_u32);
2602   ASSERT (l_port == l_port2);
2603   ASSERT (r_port == r_port2);
2604   ASSERT (proto == proto2);
2605   ASSERT (fib_index == fib_index2);
2606   ASSERT (thread_index == ed_value_get_thread_index (&kv));
2607   ASSERT (session_index == ed_value_get_session_index (&kv));
2608
2609   fib_index = 7001;
2610   proto = 5;
2611   nat_protocol_t proto3 = ~0;
2612   u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
2613   split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
2614   ASSERT (l_addr.as_u32 == l_addr2.as_u32);
2615   ASSERT (l_port == l_port2);
2616   ASSERT (proto == proto3);
2617   ASSERT (fib_index == fib_index2);
2618 }
2619
2620 static clib_error_t *
2621 nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
2622 {
2623   snat_main_t *sm = &snat_main;
2624   u32 fib_index;
2625
2626   if (sm->endpoint_dependent)
2627     {
2628       // TODO: consider removing all NAT interfaces
2629
2630       if (!is_add)
2631         {
2632           fib_index = ip4_fib_index_from_table_id (table_id);
2633           if (fib_index != ~0)
2634             expire_per_vrf_sessions (fib_index);
2635         }
2636     }
2637   return 0;
2638 }
2639
2640 VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
2641
2642 void
2643 nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm)
2644 {
2645   vlib_node_t *node;
2646
2647   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in");
2648   sm->ei_out2in_node_index = node->index;
2649   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out");
2650   sm->ei_in2out_node_index = node->index;
2651   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output");
2652   sm->ei_in2out_output_node_index = node->index;
2653
2654   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
2655   sm->ed_out2in_node_index = node->index;
2656   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
2657   sm->ed_in2out_node_index = node->index;
2658   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
2659   sm->ed_in2out_output_node_index = node->index;
2660
2661   node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
2662   sm->error_node_index = node->index;
2663   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-in2out");
2664   sm->pre_in2out_node_index = node->index;
2665   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-out2in");
2666   sm->pre_out2in_node_index = node->index;
2667   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-in2out");
2668   sm->pre_in2out_node_index = node->index;
2669   node = vlib_get_node_by_name (vm, (u8 *) "nat-pre-out2in");
2670   sm->pre_out2in_node_index = node->index;
2671   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-fast");
2672   sm->in2out_fast_node_index = node->index;
2673   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-slowpath");
2674   sm->in2out_slowpath_node_index = node->index;
2675   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output-slowpath");
2676   sm->in2out_slowpath_output_node_index = node->index;
2677   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-slowpath");
2678   sm->ed_in2out_slowpath_node_index = node->index;
2679   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in-fast");
2680   sm->out2in_fast_node_index = node->index;
2681   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in-slowpath");
2682   sm->ed_out2in_slowpath_node_index = node->index;
2683   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpinning");
2684   sm->hairpinning_node_index = node->index;
2685   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-dst");
2686   sm->hairpin_dst_node_index = node->index;
2687   node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-src");
2688   sm->hairpin_src_node_index = node->index;
2689   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpinning");
2690   sm->ed_hairpinning_node_index = node->index;
2691   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-dst");
2692   sm->ed_hairpin_dst_node_index = node->index;
2693   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-src");
2694   sm->ed_hairpin_src_node_index = node->index;
2695 }
2696
2697 #define nat_init_simple_counter(c, n, sn) \
2698 do                                        \
2699   {                                       \
2700     c.name = n;                           \
2701     c.stat_segment_name = sn;             \
2702     vlib_validate_simple_counter (&c, 0); \
2703     vlib_zero_simple_counter (&c, 0);     \
2704   } while (0);
2705
2706 static clib_error_t *
2707 nat_init (vlib_main_t * vm)
2708 {
2709   snat_main_t *sm = &snat_main;
2710   clib_error_t *error = 0;
2711   vlib_thread_main_t *tm = vlib_get_thread_main ();
2712   vlib_thread_registration_t *tr;
2713   ip4_add_del_interface_address_callback_t cbi = { 0 };
2714   ip4_table_bind_callback_t cbt = { 0 };
2715   u32 i, num_threads = 0;
2716   uword *p, *bitmap = 0;
2717
2718   clib_memset (sm, 0, sizeof (*sm));
2719
2720   // required
2721   sm->vnet_main = vnet_get_main ();
2722   // convenience
2723   sm->ip4_main = &ip4_main;
2724   sm->api_main = vlibapi_get_main ();
2725   sm->ip4_lookup_main = &ip4_main.lookup_main;
2726
2727   // frame queue indices used for handoff
2728   sm->fq_out2in_index = ~0;
2729   sm->fq_in2out_index = ~0;
2730   sm->fq_in2out_output_index = ~0;
2731
2732   sm->log_level = SNAT_LOG_ERROR;
2733
2734   nat44_set_node_indexes (sm, vm);
2735   sm->log_class = vlib_log_register_class ("nat", 0);
2736   nat_ipfix_logging_init (vm);
2737
2738   nat_init_simple_counter (sm->total_users, "total-users",
2739                            "/nat44/total-users");
2740   nat_init_simple_counter (sm->total_sessions, "total-sessions",
2741                            "/nat44/total-sessions");
2742   nat_init_simple_counter (sm->user_limit_reached, "user-limit-reached",
2743                            "/nat44/user-limit-reached");
2744
2745 #define _(x)                                            \
2746   sm->counters.fastpath.in2out.x.name = #x;             \
2747   sm->counters.fastpath.in2out.x.stat_segment_name =    \
2748       "/nat44/in2out/fastpath/" #x;                     \
2749   sm->counters.slowpath.in2out.x.name = #x;             \
2750   sm->counters.slowpath.in2out.x.stat_segment_name =    \
2751       "/nat44/in2out/slowpath/" #x;                     \
2752   sm->counters.fastpath.out2in.x.name = #x;             \
2753   sm->counters.fastpath.out2in.x.stat_segment_name =    \
2754       "/nat44/out2in/fastpath/" #x;                     \
2755   sm->counters.slowpath.out2in.x.name = #x;             \
2756   sm->counters.slowpath.out2in.x.stat_segment_name =    \
2757       "/nat44/out2in/slowpath/" #x;                     \
2758   sm->counters.fastpath.in2out_ed.x.name = #x;          \
2759   sm->counters.fastpath.in2out_ed.x.stat_segment_name = \
2760       "/nat44/ed/in2out/fastpath/" #x;                  \
2761   sm->counters.slowpath.in2out_ed.x.name = #x;          \
2762   sm->counters.slowpath.in2out_ed.x.stat_segment_name = \
2763       "/nat44/ed/in2out/slowpath/" #x;                  \
2764   sm->counters.fastpath.out2in_ed.x.name = #x;          \
2765   sm->counters.fastpath.out2in_ed.x.stat_segment_name = \
2766       "/nat44/ed/out2in/fastpath/" #x;                  \
2767   sm->counters.slowpath.out2in_ed.x.name = #x;          \
2768   sm->counters.slowpath.out2in_ed.x.stat_segment_name = \
2769       "/nat44/ed/out2in/slowpath/" #x;
2770   foreach_nat_counter;
2771 #undef _
2772   sm->counters.hairpinning.name = "hairpinning";
2773   sm->counters.hairpinning.stat_segment_name = "/nat44/hairpinning";
2774
2775   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
2776   if (p)
2777     {
2778       tr = (vlib_thread_registration_t *) p[0];
2779       if (tr)
2780         {
2781           sm->num_workers = tr->count;
2782           sm->first_worker_index = tr->first_index;
2783         }
2784     }
2785   num_threads = tm->n_vlib_mains - 1;
2786   sm->port_per_thread = 0xffff - 1024;
2787   vec_validate (sm->per_thread_data, num_threads);
2788
2789   /* Use all available workers by default */
2790   if (sm->num_workers > 1)
2791     {
2792
2793       for (i = 0; i < sm->num_workers; i++)
2794         bitmap = clib_bitmap_set (bitmap, i, 1);
2795       snat_set_workers (bitmap);
2796       clib_bitmap_free (bitmap);
2797     }
2798   else
2799     sm->per_thread_data[0].snat_thread_index = 0;
2800
2801   /* callbacks to call when interface address changes. */
2802   cbi.function = snat_ip4_add_del_interface_address_cb;
2803   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2804   cbi.function = nat_ip4_add_del_addr_only_sm_cb;
2805   vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
2806
2807   /* callbacks to call when interface to table biding changes */
2808   cbt.function = snat_update_outside_fib;
2809   vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
2810
2811   // TODO: is it possible to move it into snat_main ?
2812   nat_fib_src_low =
2813     fib_source_allocate ("nat-low", FIB_SOURCE_PRIORITY_LOW,
2814                          FIB_SOURCE_BH_SIMPLE);
2815   nat_fib_src_hi =
2816     fib_source_allocate ("nat-hi", FIB_SOURCE_PRIORITY_HI,
2817                          FIB_SOURCE_BH_SIMPLE);
2818
2819   /* used only by out2in-dpo feature */
2820   nat_dpo_module_init ();
2821
2822   nat_affinity_init (vm);
2823   nat_ha_init (vm, sm->num_workers, num_threads);
2824
2825   test_key_calc_split ();
2826   error = snat_api_init (vm, sm);
2827   return error;
2828 }
2829
2830 VLIB_INIT_FUNCTION (nat_init);
2831
2832 int
2833 nat44_plugin_enable (nat44_config_t c)
2834 {
2835   snat_main_t *sm = &snat_main;
2836   u32 static_mapping_buckets = 1024;
2837   u32 static_mapping_memory_size = 64 << 20;
2838
2839   if (sm->enabled)
2840     {
2841       nat_log_err ("nat44 is enabled");
2842       return 1;
2843     }
2844
2845   // c.static_mapping_only + c.connection_tracking
2846   //  - supported in NAT EI & NAT ED
2847   // c.out2in_dpo, c.static_mapping_only
2848   //  - supported in NAT EI
2849
2850   if (c.endpoint_dependent)
2851     {
2852       if ((c.static_mapping_only && !c.connection_tracking) || c.out2in_dpo)
2853         {
2854           nat_log_err ("unsupported combination of configuration");
2855           return 1;
2856         }
2857       if (c.users || c.user_sessions)
2858         {
2859           nat_log_err ("unsupported combination of configuration");
2860           return 1;
2861         }
2862     }
2863
2864   // reset to defaults:
2865   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
2866   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
2867   //
2868   sm->udp_timeout = SNAT_UDP_TIMEOUT;
2869   sm->icmp_timeout = SNAT_ICMP_TIMEOUT;
2870   sm->tcp_transitory_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
2871   sm->tcp_established_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
2872
2873   // nat44 feature configuration
2874   sm->endpoint_dependent = c.endpoint_dependent;
2875   sm->static_mapping_only = c.static_mapping_only;
2876   sm->static_mapping_connection_tracking = c.connection_tracking;
2877   sm->forwarding_enabled = 0;
2878   sm->mss_clamping = 0;
2879
2880   if (!c.users)
2881     {
2882       c.users = 1024;
2883     }
2884   sm->max_users_per_thread = c.users;
2885   sm->user_buckets = nat_calc_bihash_buckets (c.users);
2886
2887   if (!c.sessions)
2888     {
2889       // default value based on legacy setting of load factor 10 * default
2890       // translation buckets 1024
2891       c.sessions = 10 * 1024;
2892     }
2893   sm->max_translations_per_thread = c.sessions;
2894   sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
2895
2896   vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
2897   sm->max_translations_per_user
2898     = c.user_sessions ? c.user_sessions : sm->max_translations_per_thread;
2899
2900   sm->outside_vrf_id = c.outside_vrf;
2901   sm->outside_fib_index =
2902     fib_table_find_or_create_and_lock
2903     (FIB_PROTOCOL_IP4, c.outside_vrf, nat_fib_src_hi);
2904
2905   sm->inside_vrf_id = c.inside_vrf;
2906   sm->inside_fib_index =
2907     fib_table_find_or_create_and_lock
2908     (FIB_PROTOCOL_IP4, c.inside_vrf, nat_fib_src_hi);
2909
2910   if (c.endpoint_dependent)
2911     {
2912       sm->worker_out2in_cb = nat44_ed_get_worker_out2in_cb;
2913       sm->worker_in2out_cb = nat44_ed_get_worker_in2out_cb;
2914       sm->out2in_node_index = sm->ed_out2in_node_index;
2915       sm->in2out_node_index = sm->ed_in2out_node_index;
2916       sm->in2out_output_node_index = sm->ed_in2out_output_node_index;
2917       sm->icmp_match_out2in_cb = icmp_match_out2in_ed;
2918       sm->icmp_match_in2out_cb = icmp_match_in2out_ed;
2919
2920       clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
2921                              sm->translation_buckets, 0);
2922       clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed,
2923                                           format_ed_session_kvp);
2924
2925
2926       nat_affinity_enable ();
2927
2928       nat_ha_enable (nat_ha_sadd_ed_cb, nat_ha_sdel_ed_cb, nat_ha_sref_ed_cb);
2929     }
2930   else
2931     {
2932       sm->worker_out2in_cb = snat_get_worker_out2in_cb;
2933       sm->worker_in2out_cb = snat_get_worker_in2out_cb;
2934       sm->out2in_node_index = sm->ei_out2in_node_index;
2935       sm->in2out_node_index = sm->ei_in2out_node_index;
2936       sm->in2out_output_node_index = sm->ei_in2out_output_node_index;
2937       sm->icmp_match_out2in_cb = icmp_match_out2in_slow;
2938       sm->icmp_match_in2out_cb = icmp_match_in2out_slow;
2939
2940       nat_ha_enable (nat_ha_sadd_cb, nat_ha_sdel_cb, nat_ha_sref_cb);
2941     }
2942
2943   // c.static_mapping & c.connection_tracking require
2944   // session database
2945   if (!c.static_mapping_only
2946       || (c.static_mapping_only && c.connection_tracking))
2947     {
2948       snat_main_per_thread_data_t *tsm;
2949       /* *INDENT-OFF* */
2950       vec_foreach (tsm, sm->per_thread_data)
2951         {
2952           nat44_db_init (tsm);
2953         }
2954       /* *INDENT-ON* */
2955     }
2956   else
2957     {
2958       sm->icmp_match_in2out_cb = icmp_match_in2out_fast;
2959       sm->icmp_match_out2in_cb = icmp_match_out2in_fast;
2960     }
2961
2962   clib_bihash_init_8_8 (&sm->static_mapping_by_local,
2963                         "static_mapping_by_local", static_mapping_buckets,
2964                         static_mapping_memory_size);
2965   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
2966                                      format_static_mapping_kvp);
2967
2968   clib_bihash_init_8_8 (&sm->static_mapping_by_external,
2969                         "static_mapping_by_external",
2970                         static_mapping_buckets, static_mapping_memory_size);
2971   clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
2972                                      format_static_mapping_kvp);
2973
2974   // last: reset counters
2975   vlib_zero_simple_counter (&sm->total_users, 0);
2976   vlib_zero_simple_counter (&sm->total_sessions, 0);
2977   vlib_zero_simple_counter (&sm->user_limit_reached, 0);
2978
2979   sm->enabled = 1;
2980
2981   nat_log_info ("nat44 enable");
2982
2983   return 0;
2984 }
2985
2986 void
2987 nat44_addresses_free (snat_address_t ** addresses)
2988 {
2989   snat_address_t *ap;
2990   /* *INDENT-OFF* */
2991   vec_foreach (ap, *addresses)
2992     {
2993     #define _(N, i, n, s) \
2994       vec_free (ap->busy_##n##_ports_per_thread);
2995       foreach_nat_protocol
2996     #undef _
2997     }
2998   /* *INDENT-ON* */
2999   vec_free (*addresses);
3000   *addresses = 0;
3001 }
3002
3003 int
3004 nat44_plugin_disable ()
3005 {
3006   snat_main_t *sm = &snat_main;
3007   snat_interface_t *i, *vec;
3008   int error = 0;
3009
3010   if (!sm->enabled)
3011     {
3012       nat_log_err ("nat44 is disabled");
3013       return 1;
3014     }
3015
3016   // first unregister all nodes from interfaces
3017   vec = vec_dup (sm->interfaces);
3018   /* *INDENT-OFF* */
3019   vec_foreach (i, vec)
3020     {
3021       if (nat_interface_is_inside(i))
3022         error = snat_interface_add_del (i->sw_if_index, 1, 1);
3023       if (nat_interface_is_outside(i))
3024         error = snat_interface_add_del (i->sw_if_index, 0, 1);
3025
3026       if (error)
3027         {
3028           nat_log_err ("error occurred while removing interface %u",
3029                        i->sw_if_index);
3030         }
3031     }
3032   /* *INDENT-ON* */
3033   vec_free (vec);
3034   sm->interfaces = 0;
3035
3036   vec = vec_dup (sm->output_feature_interfaces);
3037   /* *INDENT-OFF* */
3038   vec_foreach (i, vec)
3039     {
3040       if (nat_interface_is_inside(i))
3041         error = snat_interface_add_del_output_feature (i->sw_if_index, 1, 1);
3042       if (nat_interface_is_outside(i))
3043         error = snat_interface_add_del_output_feature (i->sw_if_index, 0, 1);
3044
3045       if (error)
3046         {
3047           nat_log_err ("error occurred while removing interface %u",
3048                        i->sw_if_index);
3049         }
3050     }
3051   /* *INDENT-ON* */
3052   vec_free (vec);
3053   sm->output_feature_interfaces = 0;
3054
3055   vec_free (sm->max_translations_per_fib);
3056
3057   if (sm->endpoint_dependent)
3058     {
3059       nat_affinity_disable ();
3060       clib_bihash_free_16_8 (&sm->out2in_ed);
3061     }
3062
3063   clib_bihash_free_8_8 (&sm->static_mapping_by_local);
3064   clib_bihash_free_8_8 (&sm->static_mapping_by_external);
3065
3066   if (!sm->static_mapping_only ||
3067       (sm->static_mapping_only && sm->static_mapping_connection_tracking))
3068     {
3069       snat_main_per_thread_data_t *tsm;
3070      /* *INDENT-OFF* */
3071       vec_foreach (tsm, sm->per_thread_data)
3072         {
3073           nat44_db_free (tsm);
3074         }
3075       /* *INDENT-ON* */
3076     }
3077
3078   pool_free (sm->static_mappings);
3079
3080   nat44_addresses_free (&sm->addresses);
3081   nat44_addresses_free (&sm->twice_nat_addresses);
3082
3083
3084   vec_free (sm->to_resolve);
3085   vec_free (sm->auto_add_sw_if_indices);
3086   vec_free (sm->auto_add_sw_if_indices_twice_nat);
3087
3088   sm->to_resolve = 0;
3089   sm->auto_add_sw_if_indices = 0;
3090   sm->auto_add_sw_if_indices_twice_nat = 0;
3091
3092   sm->forwarding_enabled = 0;
3093
3094   sm->enabled = 0;
3095
3096   return 0;
3097 }
3098
3099 void
3100 snat_free_outside_address_and_port (snat_address_t * addresses,
3101                                     u32 thread_index,
3102                                     ip4_address_t * addr,
3103                                     u16 port, nat_protocol_t protocol)
3104 {
3105   snat_address_t *a;
3106   u32 address_index;
3107   u16 port_host_byte_order = clib_net_to_host_u16 (port);
3108
3109   for (address_index = 0; address_index < vec_len (addresses);
3110        address_index++)
3111     {
3112       if (addresses[address_index].addr.as_u32 == addr->as_u32)
3113         break;
3114     }
3115
3116   ASSERT (address_index < vec_len (addresses));
3117
3118   a = addresses + address_index;
3119
3120   switch (protocol)
3121     {
3122 #define _(N, i, n, s) \
3123     case NAT_PROTOCOL_##N: \
3124       ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
3125       --a->busy_##n##_port_refcounts[port_host_byte_order]; \
3126       a->busy_##n##_ports--; \
3127       a->busy_##n##_ports_per_thread[thread_index]--; \
3128       break;
3129       foreach_nat_protocol
3130 #undef _
3131     default:
3132       nat_elog_info ("unknown protocol");
3133       return;
3134     }
3135 }
3136
3137 static int
3138 nat_set_outside_address_and_port (snat_address_t * addresses,
3139                                   u32 thread_index, ip4_address_t addr,
3140                                   u16 port, nat_protocol_t protocol)
3141 {
3142   snat_address_t *a = 0;
3143   u32 address_index;
3144   u16 port_host_byte_order = clib_net_to_host_u16 (port);
3145
3146   for (address_index = 0; address_index < vec_len (addresses);
3147        address_index++)
3148     {
3149       if (addresses[address_index].addr.as_u32 != addr.as_u32)
3150         continue;
3151
3152       a = addresses + address_index;
3153       switch (protocol)
3154         {
3155 #define _(N, j, n, s) \
3156         case NAT_PROTOCOL_##N: \
3157           if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
3158             return VNET_API_ERROR_INSTANCE_IN_USE; \
3159           ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
3160           a->busy_##n##_ports_per_thread[thread_index]++; \
3161           a->busy_##n##_ports++; \
3162           return 0;
3163           foreach_nat_protocol
3164 #undef _
3165         default:
3166           nat_elog_info ("unknown protocol");
3167           return 1;
3168         }
3169     }
3170
3171   return VNET_API_ERROR_NO_SUCH_ENTRY;
3172 }
3173
3174 int
3175 snat_static_mapping_match (snat_main_t * sm,
3176                            ip4_address_t match_addr,
3177                            u16 match_port,
3178                            u32 match_fib_index,
3179                            nat_protocol_t match_protocol,
3180                            ip4_address_t * mapping_addr,
3181                            u16 * mapping_port,
3182                            u32 * mapping_fib_index,
3183                            u8 by_external,
3184                            u8 * is_addr_only,
3185                            twice_nat_type_t * twice_nat,
3186                            lb_nat_type_t * lb, ip4_address_t * ext_host_addr,
3187                            u8 * is_identity_nat, snat_static_mapping_t ** out)
3188 {
3189   clib_bihash_kv_8_8_t kv, value;
3190   clib_bihash_8_8_t *mapping_hash;
3191   snat_static_mapping_t *m;
3192   u32 rand, lo = 0, hi, mid, *tmp = 0, i;
3193   nat44_lb_addr_port_t *local;
3194   u8 backend_index;
3195
3196   if (!by_external)
3197     {
3198       mapping_hash = &sm->static_mapping_by_local;
3199       init_nat_k (&kv, match_addr, match_port, match_fib_index,
3200                   match_protocol);
3201       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
3202         {
3203           /* Try address only mapping */
3204           init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
3205           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
3206             return 1;
3207         }
3208     }
3209   else
3210     {
3211       mapping_hash = &sm->static_mapping_by_external;
3212       init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
3213       if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
3214         {
3215           /* Try address only mapping */
3216           init_nat_k (&kv, match_addr, 0, 0, 0);
3217           if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
3218             return 1;
3219         }
3220     }
3221
3222   m = pool_elt_at_index (sm->static_mappings, value.value);
3223
3224   if (by_external)
3225     {
3226       if (is_lb_static_mapping (m))
3227         {
3228           if (PREDICT_FALSE (lb != 0))
3229             *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
3230           if (m->affinity && !nat_affinity_find_and_lock (ext_host_addr[0],
3231                                                           match_addr,
3232                                                           match_protocol,
3233                                                           match_port,
3234                                                           &backend_index))
3235             {
3236               local = pool_elt_at_index (m->locals, backend_index);
3237               *mapping_addr = local->addr;
3238               *mapping_port = local->port;
3239               *mapping_fib_index = local->fib_index;
3240               goto end;
3241             }
3242           // pick locals matching this worker
3243           if (PREDICT_FALSE (sm->num_workers > 1))
3244             {
3245               u32 thread_index = vlib_get_thread_index ();
3246               /* *INDENT-OFF* */
3247               pool_foreach_index (i, m->locals,
3248               ({
3249                 local = pool_elt_at_index (m->locals, i);
3250
3251                 ip4_header_t ip = {
3252                   .src_address = local->addr,
3253                 };
3254
3255                 if (sm->worker_in2out_cb (&ip, m->fib_index, 0) ==
3256                     thread_index)
3257                   {
3258                     vec_add1 (tmp, i);
3259                   }
3260               }));
3261               /* *INDENT-ON* */
3262               ASSERT (vec_len (tmp) != 0);
3263             }
3264           else
3265             {
3266               /* *INDENT-OFF* */
3267               pool_foreach_index (i, m->locals,
3268               ({
3269                 vec_add1 (tmp, i);
3270               }));
3271               /* *INDENT-ON* */
3272             }
3273           hi = vec_len (tmp) - 1;
3274           local = pool_elt_at_index (m->locals, tmp[hi]);
3275           rand = 1 + (random_u32 (&sm->random_seed) % local->prefix);
3276           while (lo < hi)
3277             {
3278               mid = ((hi - lo) >> 1) + lo;
3279               local = pool_elt_at_index (m->locals, tmp[mid]);
3280               (rand > local->prefix) ? (lo = mid + 1) : (hi = mid);
3281             }
3282           local = pool_elt_at_index (m->locals, tmp[lo]);
3283           if (!(local->prefix >= rand))
3284             return 1;
3285           *mapping_addr = local->addr;
3286           *mapping_port = local->port;
3287           *mapping_fib_index = local->fib_index;
3288           if (m->affinity)
3289             {
3290               if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr,
3291                                                 match_protocol, match_port,
3292                                                 tmp[lo], m->affinity,
3293                                                 m->affinity_per_service_list_head_index))
3294                 nat_elog_info ("create affinity record failed");
3295             }
3296           vec_free (tmp);
3297         }
3298       else
3299         {
3300           if (PREDICT_FALSE (lb != 0))
3301             *lb = NO_LB_NAT;
3302           *mapping_fib_index = m->fib_index;
3303           *mapping_addr = m->local_addr;
3304           /* Address only mapping doesn't change port */
3305           *mapping_port = is_addr_only_static_mapping (m) ? match_port
3306             : m->local_port;
3307         }
3308     }
3309   else
3310     {
3311       *mapping_addr = m->external_addr;
3312       /* Address only mapping doesn't change port */
3313       *mapping_port = is_addr_only_static_mapping (m) ? match_port
3314         : m->external_port;
3315       *mapping_fib_index = sm->outside_fib_index;
3316     }
3317
3318 end:
3319   if (PREDICT_FALSE (is_addr_only != 0))
3320     *is_addr_only = is_addr_only_static_mapping (m);
3321
3322   if (PREDICT_FALSE (twice_nat != 0))
3323     *twice_nat = m->twice_nat;
3324
3325   if (PREDICT_FALSE (is_identity_nat != 0))
3326     *is_identity_nat = is_identity_static_mapping (m);
3327
3328   if (out != 0)
3329     *out = m;
3330
3331   return 0;
3332 }
3333
3334 int
3335 snat_alloc_outside_address_and_port (snat_address_t * addresses,
3336                                      u32 fib_index,
3337                                      u32 thread_index,
3338                                      nat_protocol_t proto,
3339                                      ip4_address_t * addr,
3340                                      u16 * port,
3341                                      u16 port_per_thread,
3342                                      u32 snat_thread_index)
3343 {
3344   snat_main_t *sm = &snat_main;
3345
3346   return sm->alloc_addr_and_port (addresses, fib_index, thread_index, proto,
3347                                   addr, port, port_per_thread,
3348                                   snat_thread_index);
3349 }
3350
3351 static int
3352 nat_alloc_addr_and_port_default (snat_address_t * addresses,
3353                                  u32 fib_index,
3354                                  u32 thread_index,
3355                                  nat_protocol_t proto,
3356                                  ip4_address_t * addr,
3357                                  u16 * port,
3358                                  u16 port_per_thread, u32 snat_thread_index)
3359 {
3360   int i;
3361   snat_address_t *a, *ga = 0;
3362   u32 portnum;
3363
3364   for (i = 0; i < vec_len (addresses); i++)
3365     {
3366       a = addresses + i;
3367       switch (proto)
3368         {
3369 #define _(N, j, n, s) \
3370         case NAT_PROTOCOL_##N: \
3371           if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
3372             { \
3373               if (a->fib_index == fib_index) \
3374                 { \
3375                   while (1) \
3376                     { \
3377                       portnum = (port_per_thread * \
3378                         snat_thread_index) + \
3379                         snat_random_port(0, port_per_thread - 1) + 1024; \
3380                       if (a->busy_##n##_port_refcounts[portnum]) \
3381                         continue; \
3382                       --a->busy_##n##_port_refcounts[portnum]; \
3383                       a->busy_##n##_ports_per_thread[thread_index]++; \
3384                       a->busy_##n##_ports++; \
3385                       *addr = a->addr; \
3386                       *port = clib_host_to_net_u16(portnum); \
3387                       return 0; \
3388                     } \
3389                 } \
3390               else if (a->fib_index == ~0) \
3391                 { \
3392                   ga = a; \
3393                 } \
3394             } \
3395           break;
3396           foreach_nat_protocol
3397 #undef _
3398         default:
3399           nat_elog_info ("unknown protocol");
3400           return 1;
3401         }
3402
3403     }
3404
3405   if (ga)
3406     {
3407       a = ga;
3408       switch (proto)
3409         {
3410 #define _(N, j, n, s) \
3411         case NAT_PROTOCOL_##N: \
3412           while (1) \
3413             { \
3414               portnum = (port_per_thread * \
3415                 snat_thread_index) + \
3416                 snat_random_port(0, port_per_thread - 1) + 1024; \
3417               if (a->busy_##n##_port_refcounts[portnum]) \
3418                 continue; \
3419               ++a->busy_##n##_port_refcounts[portnum]; \
3420               a->busy_##n##_ports_per_thread[thread_index]++; \
3421               a->busy_##n##_ports++; \
3422               *addr = a->addr; \
3423               *port = clib_host_to_net_u16(portnum); \
3424               return 0; \
3425             }
3426           break;
3427           foreach_nat_protocol
3428 #undef _
3429         default:
3430           nat_elog_info ("unknown protocol");
3431           return 1;
3432         }
3433     }
3434
3435   /* Totally out of translations to use... */
3436   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
3437   return 1;
3438 }
3439
3440 static int
3441 nat_alloc_addr_and_port_mape (snat_address_t * addresses, u32 fib_index,
3442                               u32 thread_index, nat_protocol_t proto,
3443                               ip4_address_t * addr, u16 * port,
3444                               u16 port_per_thread, u32 snat_thread_index)
3445 {
3446   snat_main_t *sm = &snat_main;
3447   snat_address_t *a = addresses;
3448   u16 m, ports, portnum, A, j;
3449   m = 16 - (sm->psid_offset + sm->psid_length);
3450   ports = (1 << (16 - sm->psid_length)) - (1 << m);
3451
3452   if (!vec_len (addresses))
3453     goto exhausted;
3454
3455   switch (proto)
3456     {
3457 #define _(N, i, n, s) \
3458     case NAT_PROTOCOL_##N: \
3459       if (a->busy_##n##_ports < ports) \
3460         { \
3461           while (1) \
3462             { \
3463               A = snat_random_port(1, pow2_mask(sm->psid_offset)); \
3464               j = snat_random_port(0, pow2_mask(m)); \
3465               portnum = A | (sm->psid << sm->psid_offset) | (j << (16 - m)); \
3466               if (a->busy_##n##_port_refcounts[portnum]) \
3467                 continue; \
3468               ++a->busy_##n##_port_refcounts[portnum]; \
3469               a->busy_##n##_ports++; \
3470               *addr = a->addr; \
3471               *port = clib_host_to_net_u16 (portnum); \
3472               return 0; \
3473             } \
3474         } \
3475       break;
3476       foreach_nat_protocol
3477 #undef _
3478     default:
3479       nat_elog_info ("unknown protocol");
3480       return 1;
3481     }
3482
3483 exhausted:
3484   /* Totally out of translations to use... */
3485   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
3486   return 1;
3487 }
3488
3489 static int
3490 nat_alloc_addr_and_port_range (snat_address_t * addresses, u32 fib_index,
3491                                u32 thread_index, nat_protocol_t proto,
3492                                ip4_address_t * addr, u16 * port,
3493                                u16 port_per_thread, u32 snat_thread_index)
3494 {
3495   snat_main_t *sm = &snat_main;
3496   snat_address_t *a = addresses;
3497   u16 portnum, ports;
3498
3499   ports = sm->end_port - sm->start_port + 1;
3500
3501   if (!vec_len (addresses))
3502     goto exhausted;
3503
3504   switch (proto)
3505     {
3506 #define _(N, i, n, s) \
3507     case NAT_PROTOCOL_##N: \
3508       if (a->busy_##n##_ports < ports) \
3509         { \
3510           while (1) \
3511             { \
3512               portnum = snat_random_port(sm->start_port, sm->end_port); \
3513               if (a->busy_##n##_port_refcounts[portnum]) \
3514                 continue; \
3515               ++a->busy_##n##_port_refcounts[portnum]; \
3516               a->busy_##n##_ports++; \
3517               *addr = a->addr; \
3518               *port = clib_host_to_net_u16 (portnum); \
3519               return 0; \
3520             } \
3521         } \
3522       break;
3523       foreach_nat_protocol
3524 #undef _
3525     default:
3526       nat_elog_info ("unknown protocol");
3527       return 1;
3528     }
3529
3530 exhausted:
3531   /* Totally out of translations to use... */
3532   nat_ipfix_logging_addresses_exhausted (thread_index, 0);
3533   return 1;
3534 }
3535
3536 void
3537 nat44_add_del_address_dpo (ip4_address_t addr, u8 is_add)
3538 {
3539   dpo_id_t dpo_v4 = DPO_INVALID;
3540   fib_prefix_t pfx = {
3541     .fp_proto = FIB_PROTOCOL_IP4,
3542     .fp_len = 32,
3543     .fp_addr.ip4.as_u32 = addr.as_u32,
3544   };
3545
3546   if (is_add)
3547     {
3548       nat_dpo_create (DPO_PROTO_IP4, 0, &dpo_v4);
3549       fib_table_entry_special_dpo_add (0, &pfx, nat_fib_src_hi,
3550                                        FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4);
3551       dpo_reset (&dpo_v4);
3552     }
3553   else
3554     {
3555       fib_table_entry_special_remove (0, &pfx, nat_fib_src_hi);
3556     }
3557 }
3558
3559 u8 *
3560 format_session_kvp (u8 * s, va_list * args)
3561 {
3562   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3563
3564   s = format (s, "%U session-index %llu", format_snat_key, v->key, v->value);
3565
3566   return s;
3567 }
3568
3569 u8 *
3570 format_static_mapping_kvp (u8 * s, va_list * args)
3571 {
3572   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3573
3574   s = format (s, "%U static-mapping-index %llu",
3575               format_snat_key, v->key, v->value);
3576
3577   return s;
3578 }
3579
3580 u8 *
3581 format_user_kvp (u8 * s, va_list * args)
3582 {
3583   clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
3584   snat_user_key_t k;
3585
3586   k.as_u64 = v->key;
3587
3588   s = format (s, "%U fib %d user-index %llu", format_ip4_address, &k.addr,
3589               k.fib_index, v->value);
3590
3591   return s;
3592 }
3593
3594 u8 *
3595 format_ed_session_kvp (u8 * s, va_list * args)
3596 {
3597   clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
3598
3599   u8 proto;
3600   u16 r_port, l_port;
3601   ip4_address_t l_addr, r_addr;
3602   u32 fib_index;
3603
3604   split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
3605   s =
3606     format (s,
3607             "local %U:%d remote %U:%d proto %U fib %d thread-index %u session-index %u",
3608             format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
3609             format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
3610             format_ip_protocol, proto, fib_index,
3611             ed_value_get_session_index (v), ed_value_get_thread_index (v));
3612
3613   return s;
3614 }
3615
3616 static u32
3617 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0,
3618                            u8 is_output)
3619 {
3620   snat_main_t *sm = &snat_main;
3621   u32 next_worker_index = 0;
3622   u32 hash;
3623
3624   next_worker_index = sm->first_worker_index;
3625   hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) +
3626     (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >> 24);
3627
3628   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
3629     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
3630   else
3631     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
3632
3633   return next_worker_index;
3634 }
3635
3636 static u32
3637 snat_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip0,
3638                            u32 rx_fib_index0, u8 is_output)
3639 {
3640   snat_main_t *sm = &snat_main;
3641   udp_header_t *udp;
3642   u16 port;
3643   clib_bihash_kv_8_8_t kv, value;
3644   snat_static_mapping_t *m;
3645   u32 proto;
3646   u32 next_worker_index = 0;
3647
3648   /* first try static mappings without port */
3649   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3650     {
3651       init_nat_k (&kv, ip0->dst_address, 0, rx_fib_index0, 0);
3652       if (!clib_bihash_search_8_8
3653           (&sm->static_mapping_by_external, &kv, &value))
3654         {
3655           m = pool_elt_at_index (sm->static_mappings, value.value);
3656           return m->workers[0];
3657         }
3658     }
3659
3660   proto = ip_proto_to_nat_proto (ip0->protocol);
3661   udp = ip4_next_header (ip0);
3662   port = udp->dst_port;
3663
3664   /* unknown protocol */
3665   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
3666     {
3667       /* use current thread */
3668       return vlib_get_thread_index ();
3669     }
3670
3671   if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_ICMP))
3672     {
3673       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3674       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3675       if (!icmp_type_is_error_message
3676           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3677         port = vnet_buffer (b)->ip.reass.l4_src_port;
3678       else
3679         {
3680           /* if error message, then it's not fragmented and we can access it */
3681           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3682           proto = ip_proto_to_nat_proto (inner_ip->protocol);
3683           void *l4_header = ip4_next_header (inner_ip);
3684           switch (proto)
3685             {
3686             case NAT_PROTOCOL_ICMP:
3687               icmp = (icmp46_header_t *) l4_header;
3688               echo = (icmp_echo_header_t *) (icmp + 1);
3689               port = echo->identifier;
3690               break;
3691             case NAT_PROTOCOL_UDP:
3692             case NAT_PROTOCOL_TCP:
3693               port = ((tcp_udp_header_t *) l4_header)->src_port;
3694               break;
3695             default:
3696               return vlib_get_thread_index ();
3697             }
3698         }
3699     }
3700
3701   /* try static mappings with port */
3702   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3703     {
3704       init_nat_k (&kv, ip0->dst_address, port, rx_fib_index0, proto);
3705       if (!clib_bihash_search_8_8
3706           (&sm->static_mapping_by_external, &kv, &value))
3707         {
3708           m = pool_elt_at_index (sm->static_mappings, value.value);
3709           return m->workers[0];
3710         }
3711     }
3712
3713   /* worker by outside port */
3714   next_worker_index = sm->first_worker_index;
3715   next_worker_index +=
3716     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3717   return next_worker_index;
3718 }
3719
3720 static u32
3721 nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index,
3722                                u8 is_output)
3723 {
3724   snat_main_t *sm = &snat_main;
3725   u32 next_worker_index = sm->first_worker_index;
3726   u32 hash;
3727
3728   clib_bihash_kv_16_8_t kv16, value16;
3729   snat_main_per_thread_data_t *tsm;
3730   udp_header_t *udp;
3731
3732   if (PREDICT_FALSE (is_output))
3733     {
3734       u32 fib_index = sm->outside_fib_index;
3735       nat_outside_fib_t *outside_fib;
3736       fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3737       fib_prefix_t pfx = {
3738         .fp_proto = FIB_PROTOCOL_IP4,
3739         .fp_len = 32,
3740         .fp_addr = {
3741                     .ip4.as_u32 = ip->dst_address.as_u32,
3742                     }
3743         ,
3744       };
3745
3746       udp = ip4_next_header (ip);
3747
3748       switch (vec_len (sm->outside_fibs))
3749         {
3750         case 0:
3751           fib_index = sm->outside_fib_index;
3752           break;
3753         case 1:
3754           fib_index = sm->outside_fibs[0].fib_index;
3755           break;
3756         default:
3757             /* *INDENT-OFF* */
3758             vec_foreach (outside_fib, sm->outside_fibs)
3759               {
3760                 fei = fib_table_lookup (outside_fib->fib_index, &pfx);
3761                 if (FIB_NODE_INDEX_INVALID != fei)
3762                   {
3763                     if (fib_entry_get_resolving_interface (fei) != ~0)
3764                       {
3765                         fib_index = outside_fib->fib_index;
3766                         break;
3767                       }
3768                   }
3769               }
3770             /* *INDENT-ON* */
3771           break;
3772         }
3773
3774       init_ed_k (&kv16, ip->src_address, udp->src_port, ip->dst_address,
3775                  udp->dst_port, fib_index, ip->protocol);
3776
3777       if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3778                                                   &kv16, &value16)))
3779         {
3780           tsm =
3781             vec_elt_at_index (sm->per_thread_data,
3782                               ed_value_get_thread_index (&value16));
3783           next_worker_index += tsm->thread_index;
3784
3785           nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE (session)",
3786                                   next_worker_index, fib_index,
3787                                   clib_net_to_host_u32 (ip->
3788                                                         src_address.as_u32),
3789                                   clib_net_to_host_u32 (ip->
3790                                                         dst_address.as_u32));
3791
3792           return next_worker_index;
3793         }
3794     }
3795
3796   hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3797     (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3798
3799   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
3800     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
3801   else
3802     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
3803
3804   if (PREDICT_TRUE (!is_output))
3805     {
3806       nat_elog_debug_handoff ("HANDOFF IN2OUT",
3807                               next_worker_index, rx_fib_index,
3808                               clib_net_to_host_u32 (ip->src_address.as_u32),
3809                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3810     }
3811   else
3812     {
3813       nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE",
3814                               next_worker_index, rx_fib_index,
3815                               clib_net_to_host_u32 (ip->src_address.as_u32),
3816                               clib_net_to_host_u32 (ip->dst_address.as_u32));
3817     }
3818
3819   return next_worker_index;
3820 }
3821
3822 static u32
3823 nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
3824                                u32 rx_fib_index, u8 is_output)
3825 {
3826   snat_main_t *sm = &snat_main;
3827   clib_bihash_kv_8_8_t kv, value;
3828   clib_bihash_kv_16_8_t kv16, value16;
3829   snat_main_per_thread_data_t *tsm;
3830
3831   u32 proto, next_worker_index = 0;
3832   udp_header_t *udp;
3833   u16 port;
3834   snat_static_mapping_t *m;
3835   u32 hash;
3836
3837   proto = ip_proto_to_nat_proto (ip->protocol);
3838
3839   if (PREDICT_TRUE (proto == NAT_PROTOCOL_UDP || proto == NAT_PROTOCOL_TCP))
3840     {
3841       udp = ip4_next_header (ip);
3842
3843       init_ed_k (&kv16, ip->dst_address, udp->dst_port, ip->src_address,
3844                  udp->src_port, rx_fib_index, ip->protocol);
3845
3846       if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3847                                                   &kv16, &value16)))
3848         {
3849           tsm =
3850             vec_elt_at_index (sm->per_thread_data,
3851                               ed_value_get_thread_index (&value16));
3852           vnet_buffer2 (b)->nat.ed_out2in_nat_session_index =
3853             ed_value_get_session_index (&value16);
3854           next_worker_index = sm->first_worker_index + tsm->thread_index;
3855           nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
3856                                   next_worker_index, rx_fib_index,
3857                                   clib_net_to_host_u32 (ip->
3858                                                         src_address.as_u32),
3859                                   clib_net_to_host_u32 (ip->
3860                                                         dst_address.as_u32));
3861           return next_worker_index;
3862         }
3863     }
3864   else if (proto == NAT_PROTOCOL_ICMP)
3865     {
3866       if (!get_icmp_o2i_ed_key (b, ip, rx_fib_index, ~0, ~0, 0, 0, 0, &kv16))
3867         {
3868           if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed,
3869                                                       &kv16, &value16)))
3870             {
3871               tsm =
3872                 vec_elt_at_index (sm->per_thread_data,
3873                                   ed_value_get_thread_index (&value16));
3874               next_worker_index = sm->first_worker_index + tsm->thread_index;
3875               nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
3876                                       next_worker_index, rx_fib_index,
3877                                       clib_net_to_host_u32 (ip->
3878                                                             src_address.as_u32),
3879                                       clib_net_to_host_u32 (ip->
3880                                                             dst_address.as_u32));
3881               return next_worker_index;
3882             }
3883         }
3884     }
3885
3886   /* first try static mappings without port */
3887   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3888     {
3889       init_nat_k (&kv, ip->dst_address, 0, 0, 0);
3890       if (!clib_bihash_search_8_8
3891           (&sm->static_mapping_by_external, &kv, &value))
3892         {
3893           m = pool_elt_at_index (sm->static_mappings, value.value);
3894           next_worker_index = m->workers[0];
3895           goto done;
3896         }
3897     }
3898
3899   /* unknown protocol */
3900   if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
3901     {
3902       /* use current thread */
3903       next_worker_index = vlib_get_thread_index ();
3904       goto done;
3905     }
3906
3907   udp = ip4_next_header (ip);
3908   port = udp->dst_port;
3909
3910   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
3911     {
3912       icmp46_header_t *icmp = (icmp46_header_t *) udp;
3913       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
3914       if (!icmp_type_is_error_message
3915           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
3916         port = vnet_buffer (b)->ip.reass.l4_src_port;
3917       else
3918         {
3919           /* if error message, then it's not fragmented and we can access it */
3920           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
3921           proto = ip_proto_to_nat_proto (inner_ip->protocol);
3922           void *l4_header = ip4_next_header (inner_ip);
3923           switch (proto)
3924             {
3925             case NAT_PROTOCOL_ICMP:
3926               icmp = (icmp46_header_t *) l4_header;
3927               echo = (icmp_echo_header_t *) (icmp + 1);
3928               port = echo->identifier;
3929               break;
3930             case NAT_PROTOCOL_UDP:
3931             case NAT_PROTOCOL_TCP:
3932               port = ((tcp_udp_header_t *) l4_header)->src_port;
3933               break;
3934             default:
3935               next_worker_index = vlib_get_thread_index ();
3936               goto done;
3937             }
3938         }
3939     }
3940
3941   /* try static mappings with port */
3942   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
3943     {
3944       init_nat_k (&kv, ip->dst_address, port, 0, proto);
3945       if (!clib_bihash_search_8_8
3946           (&sm->static_mapping_by_external, &kv, &value))
3947         {
3948           m = pool_elt_at_index (sm->static_mappings, value.value);
3949           if (!is_lb_static_mapping (m))
3950             {
3951               next_worker_index = m->workers[0];
3952               goto done;
3953             }
3954
3955           hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
3956             (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
3957
3958           if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
3959             next_worker_index =
3960               m->workers[hash & (_vec_len (m->workers) - 1)];
3961           else
3962             next_worker_index = m->workers[hash % _vec_len (m->workers)];
3963           goto done;
3964         }
3965     }
3966
3967   /* worker by outside port */
3968   next_worker_index = sm->first_worker_index;
3969   next_worker_index +=
3970     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
3971
3972 done:
3973   nat_elog_debug_handoff ("HANDOFF OUT2IN", next_worker_index, rx_fib_index,
3974                           clib_net_to_host_u32 (ip->src_address.as_u32),
3975                           clib_net_to_host_u32 (ip->dst_address.as_u32));
3976   return next_worker_index;
3977 }
3978
3979 void
3980 nat_ha_sadd_cb (ip4_address_t * in_addr, u16 in_port,
3981                 ip4_address_t * out_addr, u16 out_port,
3982                 ip4_address_t * eh_addr, u16 eh_port,
3983                 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
3984                 u32 fib_index, u16 flags, u32 thread_index)
3985 {
3986   snat_main_t *sm = &snat_main;
3987   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
3988   snat_user_t *u;
3989   snat_session_t *s;
3990   clib_bihash_kv_8_8_t kv;
3991   vlib_main_t *vm = vlib_get_main ();
3992   f64 now = vlib_time_now (vm);
3993   nat_outside_fib_t *outside_fib;
3994   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
3995   fib_prefix_t pfx = {
3996     .fp_proto = FIB_PROTOCOL_IP4,
3997     .fp_len = 32,
3998     .fp_addr = {
3999                 .ip4.as_u32 = eh_addr->as_u32,
4000                 },
4001   };
4002
4003   if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
4004     {
4005       if (nat_set_outside_address_and_port
4006           (sm->addresses, thread_index, *out_addr, out_port, proto))
4007         return;
4008     }
4009
4010   u = nat_user_get_or_create (sm, in_addr, fib_index, thread_index);
4011   if (!u)
4012     return;
4013
4014   s = nat_session_alloc_or_recycle (sm, u, thread_index, now);
4015   if (!s)
4016     return;
4017
4018   if (sm->endpoint_dependent)
4019     {
4020       nat_ed_lru_insert (tsm, s, now, nat_proto_to_ip_proto (proto));
4021     }
4022
4023   s->out2in.addr.as_u32 = out_addr->as_u32;
4024   s->out2in.port = out_port;
4025   s->nat_proto = proto;
4026   s->last_heard = now;
4027   s->flags = flags;
4028   s->ext_host_addr.as_u32 = eh_addr->as_u32;
4029   s->ext_host_port = eh_port;
4030   user_session_increment (sm, u, snat_is_session_static (s));
4031   switch (vec_len (sm->outside_fibs))
4032     {
4033     case 0:
4034       s->out2in.fib_index = sm->outside_fib_index;
4035       break;
4036     case 1:
4037       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
4038       break;
4039     default:
4040       /* *INDENT-OFF* */
4041       vec_foreach (outside_fib, sm->outside_fibs)
4042         {
4043           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
4044           if (FIB_NODE_INDEX_INVALID != fei)
4045             {
4046               if (fib_entry_get_resolving_interface (fei) != ~0)
4047                 {
4048                   s->out2in.fib_index = outside_fib->fib_index;
4049                   break;
4050                 }
4051             }
4052         }
4053       /* *INDENT-ON* */
4054       break;
4055     }
4056   init_nat_o2i_kv (&kv, s, s - tsm->sessions);
4057   if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 1))
4058     nat_elog_warn ("out2in key add failed");
4059
4060   s->in2out.addr.as_u32 = in_addr->as_u32;
4061   s->in2out.port = in_port;
4062   s->in2out.fib_index = fib_index;
4063   init_nat_i2o_kv (&kv, s, s - tsm->sessions);
4064   if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 1))
4065     nat_elog_warn ("in2out key add failed");
4066 }
4067
4068 void
4069 nat_ha_sdel_cb (ip4_address_t * out_addr, u16 out_port,
4070                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
4071                 u32 ti)
4072 {
4073   snat_main_t *sm = &snat_main;
4074   clib_bihash_kv_8_8_t kv, value;
4075   u32 thread_index;
4076   snat_session_t *s;
4077   snat_main_per_thread_data_t *tsm;
4078
4079   if (sm->num_workers > 1)
4080     thread_index =
4081       sm->first_worker_index +
4082       (sm->workers[(clib_net_to_host_u16 (out_port) -
4083                     1024) / sm->port_per_thread]);
4084   else
4085     thread_index = sm->num_workers;
4086   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
4087
4088   init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
4089   if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
4090     return;
4091
4092   s = pool_elt_at_index (tsm->sessions, value.value);
4093   nat_free_session_data (sm, s, thread_index, 1);
4094   nat44_delete_session (sm, s, thread_index);
4095 }
4096
4097 void
4098 nat_ha_sref_cb (ip4_address_t * out_addr, u16 out_port,
4099                 ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
4100                 u32 total_pkts, u64 total_bytes, u32 thread_index)
4101 {
4102   snat_main_t *sm = &snat_main;
4103   clib_bihash_kv_8_8_t kv, value;
4104   snat_session_t *s;
4105   snat_main_per_thread_data_t *tsm;
4106
4107   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
4108
4109   init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
4110   if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
4111     return;
4112
4113   s = pool_elt_at_index (tsm->sessions, value.value);
4114   s->total_pkts = total_pkts;
4115   s->total_bytes = total_bytes;
4116 }
4117
4118 void
4119 nat_ha_sadd_ed_cb (ip4_address_t * in_addr, u16 in_port,
4120                    ip4_address_t * out_addr, u16 out_port,
4121                    ip4_address_t * eh_addr, u16 eh_port,
4122                    ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
4123                    u32 fib_index, u16 flags, u32 thread_index)
4124 {
4125   snat_main_t *sm = &snat_main;
4126   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
4127   snat_session_t *s;
4128   clib_bihash_kv_16_8_t kv;
4129   vlib_main_t *vm = vlib_get_main ();
4130   f64 now = vlib_time_now (vm);
4131   nat_outside_fib_t *outside_fib;
4132   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
4133   fib_prefix_t pfx = {
4134     .fp_proto = FIB_PROTOCOL_IP4,
4135     .fp_len = 32,
4136     .fp_addr = {
4137                 .ip4.as_u32 = eh_addr->as_u32,
4138                 },
4139   };
4140
4141
4142   if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
4143     {
4144       if (nat_set_outside_address_and_port
4145           (sm->addresses, thread_index, *out_addr, out_port, proto))
4146         return;
4147     }
4148
4149   if (flags & SNAT_SESSION_FLAG_TWICE_NAT)
4150     {
4151       if (nat_set_outside_address_and_port
4152           (sm->addresses, thread_index, *ehn_addr, ehn_port, proto))
4153         return;
4154     }
4155
4156   s = nat_ed_session_alloc (sm, thread_index, now, proto);
4157   if (!s)
4158     return;
4159
4160   s->last_heard = now;
4161   s->flags = flags;
4162   s->ext_host_nat_addr.as_u32 = s->ext_host_addr.as_u32 = eh_addr->as_u32;
4163   s->ext_host_nat_port = s->ext_host_port = eh_port;
4164   if (is_twice_nat_session (s))
4165     {
4166       s->ext_host_nat_addr.as_u32 = ehn_addr->as_u32;
4167       s->ext_host_nat_port = ehn_port;
4168     }
4169   switch (vec_len (sm->outside_fibs))
4170     {
4171     case 0:
4172       s->out2in.fib_index = sm->outside_fib_index;
4173       break;
4174     case 1:
4175       s->out2in.fib_index = sm->outside_fibs[0].fib_index;
4176       break;
4177     default:
4178       /* *INDENT-OFF* */
4179       vec_foreach (outside_fib, sm->outside_fibs)
4180         {
4181           fei = fib_table_lookup (outside_fib->fib_index, &pfx);
4182           if (FIB_NODE_INDEX_INVALID != fei)
4183             {
4184               if (fib_entry_get_resolving_interface (fei) != ~0)
4185                 {
4186                   s->out2in.fib_index = outside_fib->fib_index;
4187                   break;
4188                 }
4189             }
4190         }
4191       /* *INDENT-ON* */
4192       break;
4193     }
4194   s->nat_proto = proto;
4195   s->out2in.addr.as_u32 = out_addr->as_u32;
4196   s->out2in.port = out_port;
4197
4198   s->in2out.addr.as_u32 = in_addr->as_u32;
4199   s->in2out.port = in_port;
4200   s->in2out.fib_index = fib_index;
4201
4202   init_ed_kv (&kv, *in_addr, in_port, s->ext_host_nat_addr,
4203               s->ext_host_nat_port, fib_index, nat_proto_to_ip_proto (proto),
4204               thread_index, s - tsm->sessions);
4205   if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
4206     nat_elog_warn ("in2out key add failed");
4207
4208   init_ed_kv (&kv, *out_addr, out_port, *eh_addr, eh_port,
4209               s->out2in.fib_index, nat_proto_to_ip_proto (proto),
4210               thread_index, s - tsm->sessions);
4211   if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &kv, 1))
4212     nat_elog_warn ("out2in key add failed");
4213 }
4214
4215 void
4216 nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
4217                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
4218                    u32 fib_index, u32 ti)
4219 {
4220   snat_main_t *sm = &snat_main;
4221   clib_bihash_kv_16_8_t kv, value;
4222   u32 thread_index;
4223   snat_session_t *s;
4224   snat_main_per_thread_data_t *tsm;
4225
4226   if (sm->num_workers > 1)
4227     thread_index =
4228       sm->first_worker_index +
4229       (sm->workers[(clib_net_to_host_u16 (out_port) -
4230                     1024) / sm->port_per_thread]);
4231   else
4232     thread_index = sm->num_workers;
4233   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
4234
4235   init_ed_k (&kv, *out_addr, out_port, *eh_addr, eh_port, fib_index, proto);
4236   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
4237     return;
4238
4239   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
4240   nat_free_session_data (sm, s, thread_index, 1);
4241   nat44_delete_session (sm, s, thread_index);
4242 }
4243
4244 void
4245 nat_ha_sref_ed_cb (ip4_address_t * out_addr, u16 out_port,
4246                    ip4_address_t * eh_addr, u16 eh_port, u8 proto,
4247                    u32 fib_index, u32 total_pkts, u64 total_bytes,
4248                    u32 thread_index)
4249 {
4250   snat_main_t *sm = &snat_main;
4251   clib_bihash_kv_16_8_t kv, value;
4252   snat_session_t *s;
4253   snat_main_per_thread_data_t *tsm;
4254
4255   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
4256
4257   init_ed_k (&kv, *out_addr, out_port, *eh_addr, eh_port, fib_index, proto);
4258   if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
4259     return;
4260
4261   s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
4262   s->total_pkts = total_pkts;
4263   s->total_bytes = total_bytes;
4264 }
4265
4266 static u32
4267 nat_calc_bihash_buckets (u32 n_elts)
4268 {
4269   n_elts = n_elts / 2.5;
4270   u64 lower_pow2 = 1;
4271   while (lower_pow2 * 2 < n_elts)
4272     {
4273       lower_pow2 = 2 * lower_pow2;
4274     }
4275   u64 upper_pow2 = 2 * lower_pow2;
4276   if ((upper_pow2 - n_elts) < (n_elts - lower_pow2))
4277     {
4278       if (upper_pow2 <= UINT32_MAX)
4279         {
4280           return upper_pow2;
4281         }
4282     }
4283   return lower_pow2;
4284 }
4285
4286 u32
4287 nat44_get_max_session_limit ()
4288 {
4289   snat_main_t *sm = &snat_main;
4290   u32 max_limit = 0, len = 0;
4291
4292   for (; len < vec_len (sm->max_translations_per_fib); len++)
4293     {
4294       if (max_limit < sm->max_translations_per_fib[len])
4295         max_limit = sm->max_translations_per_fib[len];
4296     }
4297   return max_limit;
4298 }
4299
4300 int
4301 nat44_set_session_limit (u32 session_limit, u32 vrf_id)
4302 {
4303   snat_main_t *sm = &snat_main;
4304   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4305   u32 len = vec_len (sm->max_translations_per_fib);
4306
4307   if (len <= fib_index)
4308     {
4309       vec_validate (sm->max_translations_per_fib, fib_index + 1);
4310
4311       for (; len < vec_len (sm->max_translations_per_fib); len++)
4312         sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
4313     }
4314
4315   sm->max_translations_per_fib[fib_index] = session_limit;
4316   return 0;
4317 }
4318
4319 int
4320 nat44_update_session_limit (u32 session_limit, u32 vrf_id)
4321 {
4322   snat_main_t *sm = &snat_main;
4323
4324   if (nat44_set_session_limit (session_limit, vrf_id))
4325     return 1;
4326   sm->max_translations_per_thread = nat44_get_max_session_limit ();
4327
4328   sm->translation_buckets =
4329     nat_calc_bihash_buckets (sm->max_translations_per_thread);
4330
4331   nat44_sessions_clear ();
4332   return 0;
4333 }
4334
4335 void
4336 nat44_db_init (snat_main_per_thread_data_t * tsm)
4337 {
4338   snat_main_t *sm = &snat_main;
4339
4340   pool_alloc (tsm->sessions, sm->max_translations_per_thread);
4341   pool_alloc (tsm->lru_pool, sm->max_translations_per_thread);
4342
4343   dlist_elt_t *head;
4344
4345   pool_get (tsm->lru_pool, head);
4346   tsm->tcp_trans_lru_head_index = head - tsm->lru_pool;
4347   clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index);
4348
4349   pool_get (tsm->lru_pool, head);
4350   tsm->tcp_estab_lru_head_index = head - tsm->lru_pool;
4351   clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index);
4352
4353   pool_get (tsm->lru_pool, head);
4354   tsm->udp_lru_head_index = head - tsm->lru_pool;
4355   clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index);
4356
4357   pool_get (tsm->lru_pool, head);
4358   tsm->icmp_lru_head_index = head - tsm->lru_pool;
4359   clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index);
4360
4361   pool_get (tsm->lru_pool, head);
4362   tsm->unk_proto_lru_head_index = head - tsm->lru_pool;
4363   clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index);
4364
4365   if (sm->endpoint_dependent)
4366     {
4367       clib_bihash_init_16_8 (&tsm->in2out_ed, "in2out-ed",
4368                              sm->translation_buckets, 0);
4369       clib_bihash_set_kvp_format_fn_16_8 (&tsm->in2out_ed,
4370                                           format_ed_session_kvp);
4371
4372     }
4373   else
4374     {
4375       clib_bihash_init_8_8 (&tsm->in2out, "in2out", sm->translation_buckets,
4376                             0);
4377       clib_bihash_set_kvp_format_fn_8_8 (&tsm->in2out, format_session_kvp);
4378       clib_bihash_init_8_8 (&tsm->out2in, "out2in", sm->translation_buckets,
4379                             0);
4380       clib_bihash_set_kvp_format_fn_8_8 (&tsm->out2in, format_session_kvp);
4381     }
4382
4383   // TODO: ED nat is not using these
4384   // before removal large refactor required
4385   pool_alloc (tsm->list_pool, sm->max_translations_per_thread);
4386   clib_bihash_init_8_8 (&tsm->user_hash, "users", sm->user_buckets, 0);
4387   clib_bihash_set_kvp_format_fn_8_8 (&tsm->user_hash, format_user_kvp);
4388 }
4389
4390 void
4391 nat44_db_free (snat_main_per_thread_data_t * tsm)
4392 {
4393   snat_main_t *sm = &snat_main;
4394
4395   pool_free (tsm->sessions);
4396   pool_free (tsm->lru_pool);
4397
4398   if (sm->endpoint_dependent)
4399     {
4400       clib_bihash_free_16_8 (&tsm->in2out_ed);
4401       vec_free (tsm->per_vrf_sessions_vec);
4402     }
4403   else
4404     {
4405       clib_bihash_free_8_8 (&tsm->in2out);
4406       clib_bihash_free_8_8 (&tsm->out2in);
4407     }
4408
4409   // TODO: resolve static mappings (put only to !ED)
4410   pool_free (tsm->users);
4411   pool_free (tsm->list_pool);
4412   clib_bihash_free_8_8 (&tsm->user_hash);
4413 }
4414
4415 void
4416 nat44_sessions_clear ()
4417 {
4418   snat_main_t *sm = &snat_main;
4419   snat_main_per_thread_data_t *tsm;
4420
4421   if (sm->endpoint_dependent)
4422     {
4423       clib_bihash_free_16_8 (&sm->out2in_ed);
4424       clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
4425                              clib_max (1,
4426                                        sm->num_workers) *
4427                              sm->translation_buckets, 0);
4428       clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed,
4429                                           format_ed_session_kvp);
4430     }
4431
4432   /* *INDENT-OFF* */
4433   vec_foreach (tsm, sm->per_thread_data)
4434     {
4435       u32 ti;
4436
4437       nat44_db_free (tsm);
4438       nat44_db_init (tsm);
4439
4440       ti = tsm->snat_thread_index;
4441       vlib_set_simple_counter (&sm->total_users, ti, 0, 0);
4442       vlib_set_simple_counter (&sm->total_sessions, ti, 0, 0);
4443     }
4444   /* *INDENT-ON* */
4445 }
4446
4447 static void
4448 nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
4449                                  uword opaque,
4450                                  u32 sw_if_index,
4451                                  ip4_address_t * address,
4452                                  u32 address_length,
4453                                  u32 if_address_index, u32 is_delete)
4454 {
4455   snat_main_t *sm = &snat_main;
4456   snat_static_map_resolve_t *rp;
4457   snat_static_mapping_t *m;
4458   clib_bihash_kv_8_8_t kv, value;
4459   int i, rv;
4460   ip4_address_t l_addr;
4461
4462   if (!sm->enabled)
4463     return;
4464
4465   for (i = 0; i < vec_len (sm->to_resolve); i++)
4466     {
4467       rp = sm->to_resolve + i;
4468       if (rp->addr_only == 0)
4469         continue;
4470       if (rp->sw_if_index == sw_if_index)
4471         goto match;
4472     }
4473
4474   return;
4475
4476 match:
4477   init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
4478               sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
4479   if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
4480     m = 0;
4481   else
4482     m = pool_elt_at_index (sm->static_mappings, value.value);
4483
4484   if (!is_delete)
4485     {
4486       /* Don't trip over lease renewal, static config */
4487       if (m)
4488         return;
4489     }
4490   else
4491     {
4492       if (!m)
4493         return;
4494     }
4495
4496   /* Indetity mapping? */
4497   if (rp->l_addr.as_u32 == 0)
4498     l_addr.as_u32 = address[0].as_u32;
4499   else
4500     l_addr.as_u32 = rp->l_addr.as_u32;
4501   /* Add the static mapping */
4502   rv = snat_add_static_mapping (l_addr,
4503                                 address[0],
4504                                 rp->l_port,
4505                                 rp->e_port,
4506                                 rp->vrf_id,
4507                                 rp->addr_only, ~0 /* sw_if_index */ ,
4508                                 rp->proto, !is_delete, rp->twice_nat,
4509                                 rp->out2in_only, rp->tag, rp->identity_nat,
4510                                 rp->pool_addr, rp->exact);
4511   if (rv)
4512     nat_elog_notice_X1 ("snat_add_static_mapping returned %d", "i4", rv);
4513 }
4514
4515 static void
4516 snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
4517                                        uword opaque,
4518                                        u32 sw_if_index,
4519                                        ip4_address_t * address,
4520                                        u32 address_length,
4521                                        u32 if_address_index, u32 is_delete)
4522 {
4523   snat_main_t *sm = &snat_main;
4524   snat_static_map_resolve_t *rp;
4525   ip4_address_t l_addr;
4526   int i, j;
4527   int rv;
4528   u8 twice_nat = 0;
4529   snat_address_t *addresses = sm->addresses;
4530
4531   if (!sm->enabled)
4532     return;
4533
4534   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
4535     {
4536       if (sw_if_index == sm->auto_add_sw_if_indices[i])
4537         goto match;
4538     }
4539
4540   for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
4541     {
4542       twice_nat = 1;
4543       addresses = sm->twice_nat_addresses;
4544       if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
4545         goto match;
4546     }
4547
4548   return;
4549
4550 match:
4551   if (!is_delete)
4552     {
4553       /* Don't trip over lease renewal, static config */
4554       for (j = 0; j < vec_len (addresses); j++)
4555         if (addresses[j].addr.as_u32 == address->as_u32)
4556           return;
4557
4558       (void) snat_add_address (sm, address, ~0, twice_nat);
4559       /* Scan static map resolution vector */
4560       for (j = 0; j < vec_len (sm->to_resolve); j++)
4561         {
4562           rp = sm->to_resolve + j;
4563           if (rp->addr_only)
4564             continue;
4565           /* On this interface? */
4566           if (rp->sw_if_index == sw_if_index)
4567             {
4568               /* Indetity mapping? */
4569               if (rp->l_addr.as_u32 == 0)
4570                 l_addr.as_u32 = address[0].as_u32;
4571               else
4572                 l_addr.as_u32 = rp->l_addr.as_u32;
4573               /* Add the static mapping */
4574               rv = snat_add_static_mapping (l_addr,
4575                                             address[0],
4576                                             rp->l_port,
4577                                             rp->e_port,
4578                                             rp->vrf_id,
4579                                             rp->addr_only,
4580                                             ~0 /* sw_if_index */ ,
4581                                             rp->proto,
4582                                             rp->is_add, rp->twice_nat,
4583                                             rp->out2in_only, rp->tag,
4584                                             rp->identity_nat,
4585                                             rp->pool_addr, rp->exact);
4586               if (rv)
4587                 nat_elog_notice_X1 ("snat_add_static_mapping returned %d",
4588                                     "i4", rv);
4589             }
4590         }
4591       return;
4592     }
4593   else
4594     {
4595       (void) snat_del_address (sm, address[0], 1, twice_nat);
4596       return;
4597     }
4598 }
4599
4600
4601 int
4602 snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
4603                             u8 twice_nat)
4604 {
4605   ip4_main_t *ip4_main = sm->ip4_main;
4606   ip4_address_t *first_int_addr;
4607   snat_static_map_resolve_t *rp;
4608   u32 *indices_to_delete = 0;
4609   int i, j;
4610   u32 *auto_add_sw_if_indices =
4611     twice_nat ? sm->
4612     auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
4613
4614   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0        /* just want the address */
4615     );
4616
4617   for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
4618     {
4619       if (auto_add_sw_if_indices[i] == sw_if_index)
4620         {
4621           if (is_del)
4622             {
4623               /* if have address remove it */
4624               if (first_int_addr)
4625                 (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
4626               else
4627                 {
4628                   for (j = 0; j < vec_len (sm->to_resolve); j++)
4629                     {
4630                       rp = sm->to_resolve + j;
4631                       if (rp->sw_if_index == sw_if_index)
4632                         vec_add1 (indices_to_delete, j);
4633                     }
4634                   if (vec_len (indices_to_delete))
4635                     {
4636                       for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
4637                         vec_del1 (sm->to_resolve, j);
4638                       vec_free (indices_to_delete);
4639                     }
4640                 }
4641               if (twice_nat)
4642                 vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
4643               else
4644                 vec_del1 (sm->auto_add_sw_if_indices, i);
4645             }
4646           else
4647             return VNET_API_ERROR_VALUE_EXIST;
4648
4649           return 0;
4650         }
4651     }
4652
4653   if (is_del)
4654     return VNET_API_ERROR_NO_SUCH_ENTRY;
4655
4656   /* add to the auto-address list */
4657   if (twice_nat)
4658     vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
4659   else
4660     vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
4661
4662   /* If the address is already bound - or static - add it now */
4663   if (first_int_addr)
4664     (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
4665
4666   return 0;
4667 }
4668
4669 int
4670 nat44_del_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
4671                    nat_protocol_t proto, u32 vrf_id, int is_in)
4672 {
4673   snat_main_per_thread_data_t *tsm;
4674   clib_bihash_kv_8_8_t kv, value;
4675   ip4_header_t ip;
4676   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4677   snat_session_t *s;
4678   clib_bihash_8_8_t *t;
4679
4680   if (sm->endpoint_dependent)
4681     return VNET_API_ERROR_UNSUPPORTED;
4682
4683   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
4684   if (sm->num_workers > 1)
4685     tsm =
4686       vec_elt_at_index (sm->per_thread_data,
4687                         sm->worker_in2out_cb (&ip, fib_index, 0));
4688   else
4689     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
4690
4691   init_nat_k (&kv, *addr, port, fib_index, proto);
4692   t = is_in ? &tsm->in2out : &tsm->out2in;
4693   if (!clib_bihash_search_8_8 (t, &kv, &value))
4694     {
4695       if (pool_is_free_index (tsm->sessions, value.value))
4696         return VNET_API_ERROR_UNSPECIFIED;
4697
4698       s = pool_elt_at_index (tsm->sessions, value.value);
4699       nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
4700       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
4701       return 0;
4702     }
4703
4704   return VNET_API_ERROR_NO_SUCH_ENTRY;
4705 }
4706
4707 int
4708 nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
4709                       ip4_address_t * eh_addr, u16 eh_port, u8 proto,
4710                       u32 vrf_id, int is_in)
4711 {
4712   ip4_header_t ip;
4713   clib_bihash_16_8_t *t;
4714   clib_bihash_kv_16_8_t kv, value;
4715   u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
4716   snat_session_t *s;
4717   snat_main_per_thread_data_t *tsm;
4718
4719   if (!sm->endpoint_dependent)
4720     return VNET_API_ERROR_FEATURE_DISABLED;
4721
4722   ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
4723   if (sm->num_workers > 1)
4724     tsm =
4725       vec_elt_at_index (sm->per_thread_data,
4726                         sm->worker_in2out_cb (&ip, fib_index, 0));
4727   else
4728     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
4729
4730   t = is_in ? &tsm->in2out_ed : &sm->out2in_ed;
4731   init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
4732   if (clib_bihash_search_16_8 (t, &kv, &value))
4733     {
4734       return VNET_API_ERROR_NO_SUCH_ENTRY;
4735     }
4736
4737   if (pool_is_free_index (tsm->sessions, value.value))
4738     return VNET_API_ERROR_UNSPECIFIED;
4739   s = pool_elt_at_index (tsm->sessions, value.value);
4740   nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
4741   nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
4742   return 0;
4743 }
4744
4745 void
4746 nat_set_alloc_addr_and_port_mape (u16 psid, u16 psid_offset, u16 psid_length)
4747 {
4748   snat_main_t *sm = &snat_main;
4749
4750   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_MAPE;
4751   sm->alloc_addr_and_port = nat_alloc_addr_and_port_mape;
4752   sm->psid = psid;
4753   sm->psid_offset = psid_offset;
4754   sm->psid_length = psid_length;
4755 }
4756
4757 void
4758 nat_set_alloc_addr_and_port_range (u16 start_port, u16 end_port)
4759 {
4760   snat_main_t *sm = &snat_main;
4761
4762   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_RANGE;
4763   sm->alloc_addr_and_port = nat_alloc_addr_and_port_range;
4764   sm->start_port = start_port;
4765   sm->end_port = end_port;
4766 }
4767
4768 void
4769 nat_set_alloc_addr_and_port_default (void)
4770 {
4771   snat_main_t *sm = &snat_main;
4772
4773   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
4774   sm->alloc_addr_and_port = nat_alloc_addr_and_port_default;
4775 }
4776
4777 VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm,
4778                                  vlib_node_runtime_t * node,
4779                                  vlib_frame_t * frame)
4780 {
4781   return 0;
4782 }
4783
4784 /* *INDENT-OFF* */
4785 VLIB_REGISTER_NODE (nat_default_node) = {
4786   .name = "nat-default",
4787   .vector_size = sizeof (u32),
4788   .format_trace = 0,
4789   .type = VLIB_NODE_TYPE_INTERNAL,
4790   .n_errors = 0,
4791   .n_next_nodes = NAT_N_NEXT,
4792   .next_nodes = {
4793     [NAT_NEXT_DROP] = "error-drop",
4794     [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4795     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
4796     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
4797     [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output",
4798     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
4799     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
4800     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
4801     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
4802     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
4803   },
4804 };
4805 /* *INDENT-ON* */
4806
4807 /*
4808  * fd.io coding-style-patch-verification: ON
4809  *
4810  * Local Variables:
4811  * eval: (c-set-style "gnu")
4812  * End:
4813  */